xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1e8d8bef9SDimitry Andric //===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric /// \file
9e8d8bef9SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the
10e8d8bef9SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide
11e8d8bef9SDimitry Andric /// more precise answers to certain TTI queries, while letting the target
12e8d8bef9SDimitry Andric /// independent and default TTI implementations handle the rest.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric 
16e8d8bef9SDimitry Andric #include "X86TargetTransformInfo.h"
17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
18e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsX86.h"
19e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h"
20e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
21bdd1243dSDimitry Andric #include <optional>
22e8d8bef9SDimitry Andric 
23e8d8bef9SDimitry Andric using namespace llvm;
24e8d8bef9SDimitry Andric 
25e8d8bef9SDimitry Andric #define DEBUG_TYPE "x86tti"
26e8d8bef9SDimitry Andric 
27e8d8bef9SDimitry Andric /// Return a constant boolean vector that has true elements in all positions
28e8d8bef9SDimitry Andric /// where the input constant data vector has an element with the sign bit set.
29e8d8bef9SDimitry Andric static Constant *getNegativeIsTrueBoolVec(Constant *V) {
30e8d8bef9SDimitry Andric   VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
31e8d8bef9SDimitry Andric   V = ConstantExpr::getBitCast(V, IntTy);
32e8d8bef9SDimitry Andric   V = ConstantExpr::getICmp(CmpInst::ICMP_SGT, Constant::getNullValue(IntTy),
33e8d8bef9SDimitry Andric                             V);
34e8d8bef9SDimitry Andric   return V;
35e8d8bef9SDimitry Andric }
36e8d8bef9SDimitry Andric 
37e8d8bef9SDimitry Andric /// Convert the x86 XMM integer vector mask to a vector of bools based on
38e8d8bef9SDimitry Andric /// each element's most significant bit (the sign bit).
39e8d8bef9SDimitry Andric static Value *getBoolVecFromMask(Value *Mask) {
40e8d8bef9SDimitry Andric   // Fold Constant Mask.
41e8d8bef9SDimitry Andric   if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
42e8d8bef9SDimitry Andric     return getNegativeIsTrueBoolVec(ConstantMask);
43e8d8bef9SDimitry Andric 
44e8d8bef9SDimitry Andric   // Mask was extended from a boolean vector.
45e8d8bef9SDimitry Andric   Value *ExtMask;
46e8d8bef9SDimitry Andric   if (PatternMatch::match(
47e8d8bef9SDimitry Andric           Mask, PatternMatch::m_SExt(PatternMatch::m_Value(ExtMask))) &&
48e8d8bef9SDimitry Andric       ExtMask->getType()->isIntOrIntVectorTy(1))
49e8d8bef9SDimitry Andric     return ExtMask;
50e8d8bef9SDimitry Andric 
51e8d8bef9SDimitry Andric   return nullptr;
52e8d8bef9SDimitry Andric }
53e8d8bef9SDimitry Andric 
54e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
55e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
56e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
57e8d8bef9SDimitry Andric static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) {
58e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
59e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
60e8d8bef9SDimitry Andric   Constant *ZeroVec = Constant::getNullValue(II.getType());
61e8d8bef9SDimitry Andric 
62e8d8bef9SDimitry Andric   // Zero Mask - masked load instruction creates a zero vector.
63e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask))
64e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, ZeroVec);
65e8d8bef9SDimitry Andric 
66e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
67e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
68e8d8bef9SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask)) {
69e8d8bef9SDimitry Andric     // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
70e8d8bef9SDimitry Andric     // the LLVM intrinsic definition for the pointer argument.
71e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
72e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
73e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
74e8d8bef9SDimitry Andric 
75e8d8bef9SDimitry Andric     // The pass-through vector for an x86 masked load is a zero vector.
76fe6060f1SDimitry Andric     CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad(
77fe6060f1SDimitry Andric         II.getType(), PtrCast, Align(1), BoolMask, ZeroVec);
78e8d8bef9SDimitry Andric     return IC.replaceInstUsesWith(II, NewMaskedLoad);
79e8d8bef9SDimitry Andric   }
80e8d8bef9SDimitry Andric 
81e8d8bef9SDimitry Andric   return nullptr;
82e8d8bef9SDimitry Andric }
83e8d8bef9SDimitry Andric 
84e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an
85e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics
86e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
87e8d8bef9SDimitry Andric static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
88e8d8bef9SDimitry Andric   Value *Ptr = II.getOperand(0);
89e8d8bef9SDimitry Andric   Value *Mask = II.getOperand(1);
90e8d8bef9SDimitry Andric   Value *Vec = II.getOperand(2);
91e8d8bef9SDimitry Andric 
92e8d8bef9SDimitry Andric   // Zero Mask - this masked store instruction does nothing.
93e8d8bef9SDimitry Andric   if (isa<ConstantAggregateZero>(Mask)) {
94e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
95e8d8bef9SDimitry Andric     return true;
96e8d8bef9SDimitry Andric   }
97e8d8bef9SDimitry Andric 
98e8d8bef9SDimitry Andric   // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
99e8d8bef9SDimitry Andric   // anything else at this level.
100e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
101e8d8bef9SDimitry Andric     return false;
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric   // The mask is constant or extended from a bool vector. Convert this x86
104e8d8bef9SDimitry Andric   // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
105e8d8bef9SDimitry Andric   if (Value *BoolMask = getBoolVecFromMask(Mask)) {
106e8d8bef9SDimitry Andric     unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
107e8d8bef9SDimitry Andric     PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
108e8d8bef9SDimitry Andric     Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
109e8d8bef9SDimitry Andric 
110e8d8bef9SDimitry Andric     IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
111e8d8bef9SDimitry Andric 
112e8d8bef9SDimitry Andric     // 'Replace uses' doesn't work for stores. Erase the original masked store.
113e8d8bef9SDimitry Andric     IC.eraseInstFromFunction(II);
114e8d8bef9SDimitry Andric     return true;
115e8d8bef9SDimitry Andric   }
116e8d8bef9SDimitry Andric 
117e8d8bef9SDimitry Andric   return false;
118e8d8bef9SDimitry Andric }
119e8d8bef9SDimitry Andric 
120e8d8bef9SDimitry Andric static Value *simplifyX86immShift(const IntrinsicInst &II,
121e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
122e8d8bef9SDimitry Andric   bool LogicalShift = false;
123e8d8bef9SDimitry Andric   bool ShiftLeft = false;
124e8d8bef9SDimitry Andric   bool IsImm = false;
125e8d8bef9SDimitry Andric 
126e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
127e8d8bef9SDimitry Andric   default:
128e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
129e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
130e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
131e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
132e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
133e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
134e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
135e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
136e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
137e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
138e8d8bef9SDimitry Andric     IsImm = true;
139bdd1243dSDimitry Andric     [[fallthrough]];
140e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
141e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
142e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
143e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
144e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
145e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
146e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
147e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
148e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
149e8d8bef9SDimitry Andric     LogicalShift = false;
150e8d8bef9SDimitry Andric     ShiftLeft = false;
151e8d8bef9SDimitry Andric     break;
152e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
153e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
154e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
155e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
156e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
157e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
158e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
159e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
160e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
161e8d8bef9SDimitry Andric     IsImm = true;
162bdd1243dSDimitry Andric     [[fallthrough]];
163e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
164e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
165e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
166e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
167e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
168e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
169e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
170e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
171e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
172e8d8bef9SDimitry Andric     LogicalShift = true;
173e8d8bef9SDimitry Andric     ShiftLeft = false;
174e8d8bef9SDimitry Andric     break;
175e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
176e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
177e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
178e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
179e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
180e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
181e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
182e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
183e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
184e8d8bef9SDimitry Andric     IsImm = true;
185bdd1243dSDimitry Andric     [[fallthrough]];
186e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
187e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
188e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
189e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
190e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
191e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
192e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
193e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
194e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512:
195e8d8bef9SDimitry Andric     LogicalShift = true;
196e8d8bef9SDimitry Andric     ShiftLeft = true;
197e8d8bef9SDimitry Andric     break;
198e8d8bef9SDimitry Andric   }
199e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
200e8d8bef9SDimitry Andric 
201fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
202fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
203fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(Vec->getType());
204fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
205fe6060f1SDimitry Andric   Type *AmtVT = Amt->getType();
206e8d8bef9SDimitry Andric   unsigned VWidth = VT->getNumElements();
207e8d8bef9SDimitry Andric   unsigned BitWidth = SVT->getPrimitiveSizeInBits();
208e8d8bef9SDimitry Andric 
209e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
210e8d8bef9SDimitry Andric   // generic shift. If its guaranteed to be out of range, logical shifts combine
211e8d8bef9SDimitry Andric   // to zero and arithmetic shifts are clamped to (BitWidth - 1).
212e8d8bef9SDimitry Andric   if (IsImm) {
213e8d8bef9SDimitry Andric     assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
214e8d8bef9SDimitry Andric     KnownBits KnownAmtBits =
215e8d8bef9SDimitry Andric         llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
216e8d8bef9SDimitry Andric     if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
217e8d8bef9SDimitry Andric       Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
218e8d8bef9SDimitry Andric       Amt = Builder.CreateVectorSplat(VWidth, Amt);
219e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
220e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
221e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
222e8d8bef9SDimitry Andric     }
223e8d8bef9SDimitry Andric     if (KnownAmtBits.getMinValue().uge(BitWidth)) {
224e8d8bef9SDimitry Andric       if (LogicalShift)
225e8d8bef9SDimitry Andric         return ConstantAggregateZero::get(VT);
226e8d8bef9SDimitry Andric       Amt = ConstantInt::get(SVT, BitWidth - 1);
227e8d8bef9SDimitry Andric       return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
228e8d8bef9SDimitry Andric     }
229e8d8bef9SDimitry Andric   } else {
230e8d8bef9SDimitry Andric     // Ensure the first element has an in-range value and the rest of the
231e8d8bef9SDimitry Andric     // elements in the bottom 64 bits are zero.
232e8d8bef9SDimitry Andric     assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
233e8d8bef9SDimitry Andric            cast<VectorType>(AmtVT)->getElementType() == SVT &&
234e8d8bef9SDimitry Andric            "Unexpected shift-by-scalar type");
235e8d8bef9SDimitry Andric     unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
236e8d8bef9SDimitry Andric     APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
237e8d8bef9SDimitry Andric     APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
238e8d8bef9SDimitry Andric     KnownBits KnownLowerBits = llvm::computeKnownBits(
239e8d8bef9SDimitry Andric         Amt, DemandedLower, II.getModule()->getDataLayout());
240e8d8bef9SDimitry Andric     KnownBits KnownUpperBits = llvm::computeKnownBits(
241e8d8bef9SDimitry Andric         Amt, DemandedUpper, II.getModule()->getDataLayout());
242e8d8bef9SDimitry Andric     if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
243349cc55cSDimitry Andric         (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
244e8d8bef9SDimitry Andric       SmallVector<int, 16> ZeroSplat(VWidth, 0);
245e8d8bef9SDimitry Andric       Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
246e8d8bef9SDimitry Andric       return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
247e8d8bef9SDimitry Andric                                         : Builder.CreateLShr(Vec, Amt))
248e8d8bef9SDimitry Andric                            : Builder.CreateAShr(Vec, Amt));
249e8d8bef9SDimitry Andric     }
250e8d8bef9SDimitry Andric   }
251e8d8bef9SDimitry Andric 
252e8d8bef9SDimitry Andric   // Simplify if count is constant vector.
253fe6060f1SDimitry Andric   auto *CDV = dyn_cast<ConstantDataVector>(Amt);
254e8d8bef9SDimitry Andric   if (!CDV)
255e8d8bef9SDimitry Andric     return nullptr;
256e8d8bef9SDimitry Andric 
257e8d8bef9SDimitry Andric   // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
258e8d8bef9SDimitry Andric   // operand to compute the shift amount.
259e8d8bef9SDimitry Andric   assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
260e8d8bef9SDimitry Andric          cast<VectorType>(AmtVT)->getElementType() == SVT &&
261e8d8bef9SDimitry Andric          "Unexpected shift-by-scalar type");
262e8d8bef9SDimitry Andric 
263e8d8bef9SDimitry Andric   // Concatenate the sub-elements to create the 64-bit value.
264e8d8bef9SDimitry Andric   APInt Count(64, 0);
265e8d8bef9SDimitry Andric   for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
266e8d8bef9SDimitry Andric     unsigned SubEltIdx = (NumSubElts - 1) - i;
267fe6060f1SDimitry Andric     auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
268e8d8bef9SDimitry Andric     Count <<= BitWidth;
269e8d8bef9SDimitry Andric     Count |= SubElt->getValue().zextOrTrunc(64);
270e8d8bef9SDimitry Andric   }
271e8d8bef9SDimitry Andric 
272e8d8bef9SDimitry Andric   // If shift-by-zero then just return the original value.
273349cc55cSDimitry Andric   if (Count.isZero())
274e8d8bef9SDimitry Andric     return Vec;
275e8d8bef9SDimitry Andric 
276e8d8bef9SDimitry Andric   // Handle cases when Shift >= BitWidth.
277e8d8bef9SDimitry Andric   if (Count.uge(BitWidth)) {
278e8d8bef9SDimitry Andric     // If LogicalShift - just return zero.
279e8d8bef9SDimitry Andric     if (LogicalShift)
280e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(VT);
281e8d8bef9SDimitry Andric 
282e8d8bef9SDimitry Andric     // If ArithmeticShift - clamp Shift to (BitWidth - 1).
283e8d8bef9SDimitry Andric     Count = APInt(64, BitWidth - 1);
284e8d8bef9SDimitry Andric   }
285e8d8bef9SDimitry Andric 
286e8d8bef9SDimitry Andric   // Get a constant vector of the same type as the first operand.
287e8d8bef9SDimitry Andric   auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
288e8d8bef9SDimitry Andric   auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
289e8d8bef9SDimitry Andric 
290e8d8bef9SDimitry Andric   if (ShiftLeft)
291e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
292e8d8bef9SDimitry Andric 
293e8d8bef9SDimitry Andric   if (LogicalShift)
294e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
295e8d8bef9SDimitry Andric 
296e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
297e8d8bef9SDimitry Andric }
298e8d8bef9SDimitry Andric 
299e8d8bef9SDimitry Andric // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
300e8d8bef9SDimitry Andric // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
301e8d8bef9SDimitry Andric // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
302e8d8bef9SDimitry Andric static Value *simplifyX86varShift(const IntrinsicInst &II,
303e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
304e8d8bef9SDimitry Andric   bool LogicalShift = false;
305e8d8bef9SDimitry Andric   bool ShiftLeft = false;
306e8d8bef9SDimitry Andric 
307e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
308e8d8bef9SDimitry Andric   default:
309e8d8bef9SDimitry Andric     llvm_unreachable("Unexpected intrinsic!");
310e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
311e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
312e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
313e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
314e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
315e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
316e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
317e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
318e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
319e8d8bef9SDimitry Andric     LogicalShift = false;
320e8d8bef9SDimitry Andric     ShiftLeft = false;
321e8d8bef9SDimitry Andric     break;
322e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
323e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
324e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
325e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
326e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
327e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
328e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
329e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
330e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
331e8d8bef9SDimitry Andric     LogicalShift = true;
332e8d8bef9SDimitry Andric     ShiftLeft = false;
333e8d8bef9SDimitry Andric     break;
334e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
335e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
336e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
337e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
338e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
339e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
340e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
341e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
342e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
343e8d8bef9SDimitry Andric     LogicalShift = true;
344e8d8bef9SDimitry Andric     ShiftLeft = true;
345e8d8bef9SDimitry Andric     break;
346e8d8bef9SDimitry Andric   }
347e8d8bef9SDimitry Andric   assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
348e8d8bef9SDimitry Andric 
349fe6060f1SDimitry Andric   Value *Vec = II.getArgOperand(0);
350fe6060f1SDimitry Andric   Value *Amt = II.getArgOperand(1);
351fe6060f1SDimitry Andric   auto *VT = cast<FixedVectorType>(II.getType());
352fe6060f1SDimitry Andric   Type *SVT = VT->getElementType();
353e8d8bef9SDimitry Andric   int NumElts = VT->getNumElements();
354e8d8bef9SDimitry Andric   int BitWidth = SVT->getIntegerBitWidth();
355e8d8bef9SDimitry Andric 
356e8d8bef9SDimitry Andric   // If the shift amount is guaranteed to be in-range we can replace it with a
357e8d8bef9SDimitry Andric   // generic shift.
35881ad6265SDimitry Andric   KnownBits KnownAmt =
35981ad6265SDimitry Andric       llvm::computeKnownBits(Amt, II.getModule()->getDataLayout());
36081ad6265SDimitry Andric   if (KnownAmt.getMaxValue().ult(BitWidth)) {
361e8d8bef9SDimitry Andric     return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
362e8d8bef9SDimitry Andric                                       : Builder.CreateLShr(Vec, Amt))
363e8d8bef9SDimitry Andric                          : Builder.CreateAShr(Vec, Amt));
364e8d8bef9SDimitry Andric   }
365e8d8bef9SDimitry Andric 
366e8d8bef9SDimitry Andric   // Simplify if all shift amounts are constant/undef.
367e8d8bef9SDimitry Andric   auto *CShift = dyn_cast<Constant>(Amt);
368e8d8bef9SDimitry Andric   if (!CShift)
369e8d8bef9SDimitry Andric     return nullptr;
370e8d8bef9SDimitry Andric 
371e8d8bef9SDimitry Andric   // Collect each element's shift amount.
372e8d8bef9SDimitry Andric   // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
373e8d8bef9SDimitry Andric   bool AnyOutOfRange = false;
374e8d8bef9SDimitry Andric   SmallVector<int, 8> ShiftAmts;
375e8d8bef9SDimitry Andric   for (int I = 0; I < NumElts; ++I) {
376e8d8bef9SDimitry Andric     auto *CElt = CShift->getAggregateElement(I);
377e8d8bef9SDimitry Andric     if (isa_and_nonnull<UndefValue>(CElt)) {
378e8d8bef9SDimitry Andric       ShiftAmts.push_back(-1);
379e8d8bef9SDimitry Andric       continue;
380e8d8bef9SDimitry Andric     }
381e8d8bef9SDimitry Andric 
382e8d8bef9SDimitry Andric     auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
383e8d8bef9SDimitry Andric     if (!COp)
384e8d8bef9SDimitry Andric       return nullptr;
385e8d8bef9SDimitry Andric 
386e8d8bef9SDimitry Andric     // Handle out of range shifts.
387e8d8bef9SDimitry Andric     // If LogicalShift - set to BitWidth (special case).
388e8d8bef9SDimitry Andric     // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
389e8d8bef9SDimitry Andric     APInt ShiftVal = COp->getValue();
390e8d8bef9SDimitry Andric     if (ShiftVal.uge(BitWidth)) {
391e8d8bef9SDimitry Andric       AnyOutOfRange = LogicalShift;
392e8d8bef9SDimitry Andric       ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
393e8d8bef9SDimitry Andric       continue;
394e8d8bef9SDimitry Andric     }
395e8d8bef9SDimitry Andric 
396e8d8bef9SDimitry Andric     ShiftAmts.push_back((int)ShiftVal.getZExtValue());
397e8d8bef9SDimitry Andric   }
398e8d8bef9SDimitry Andric 
399e8d8bef9SDimitry Andric   // If all elements out of range or UNDEF, return vector of zeros/undefs.
400e8d8bef9SDimitry Andric   // ArithmeticShift should only hit this if they are all UNDEF.
401e8d8bef9SDimitry Andric   auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
402e8d8bef9SDimitry Andric   if (llvm::all_of(ShiftAmts, OutOfRange)) {
403e8d8bef9SDimitry Andric     SmallVector<Constant *, 8> ConstantVec;
404e8d8bef9SDimitry Andric     for (int Idx : ShiftAmts) {
405e8d8bef9SDimitry Andric       if (Idx < 0) {
406e8d8bef9SDimitry Andric         ConstantVec.push_back(UndefValue::get(SVT));
407e8d8bef9SDimitry Andric       } else {
408e8d8bef9SDimitry Andric         assert(LogicalShift && "Logical shift expected");
409e8d8bef9SDimitry Andric         ConstantVec.push_back(ConstantInt::getNullValue(SVT));
410e8d8bef9SDimitry Andric       }
411e8d8bef9SDimitry Andric     }
412e8d8bef9SDimitry Andric     return ConstantVector::get(ConstantVec);
413e8d8bef9SDimitry Andric   }
414e8d8bef9SDimitry Andric 
415e8d8bef9SDimitry Andric   // We can't handle only some out of range values with generic logical shifts.
416e8d8bef9SDimitry Andric   if (AnyOutOfRange)
417e8d8bef9SDimitry Andric     return nullptr;
418e8d8bef9SDimitry Andric 
419e8d8bef9SDimitry Andric   // Build the shift amount constant vector.
420e8d8bef9SDimitry Andric   SmallVector<Constant *, 8> ShiftVecAmts;
421e8d8bef9SDimitry Andric   for (int Idx : ShiftAmts) {
422e8d8bef9SDimitry Andric     if (Idx < 0)
423e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(UndefValue::get(SVT));
424e8d8bef9SDimitry Andric     else
425e8d8bef9SDimitry Andric       ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
426e8d8bef9SDimitry Andric   }
427e8d8bef9SDimitry Andric   auto ShiftVec = ConstantVector::get(ShiftVecAmts);
428e8d8bef9SDimitry Andric 
429e8d8bef9SDimitry Andric   if (ShiftLeft)
430e8d8bef9SDimitry Andric     return Builder.CreateShl(Vec, ShiftVec);
431e8d8bef9SDimitry Andric 
432e8d8bef9SDimitry Andric   if (LogicalShift)
433e8d8bef9SDimitry Andric     return Builder.CreateLShr(Vec, ShiftVec);
434e8d8bef9SDimitry Andric 
435e8d8bef9SDimitry Andric   return Builder.CreateAShr(Vec, ShiftVec);
436e8d8bef9SDimitry Andric }
437e8d8bef9SDimitry Andric 
438e8d8bef9SDimitry Andric static Value *simplifyX86pack(IntrinsicInst &II,
439e8d8bef9SDimitry Andric                               InstCombiner::BuilderTy &Builder, bool IsSigned) {
440e8d8bef9SDimitry Andric   Value *Arg0 = II.getArgOperand(0);
441e8d8bef9SDimitry Andric   Value *Arg1 = II.getArgOperand(1);
442e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
443e8d8bef9SDimitry Andric 
444e8d8bef9SDimitry Andric   // Fast all undef handling.
445e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
446e8d8bef9SDimitry Andric     return UndefValue::get(ResTy);
447e8d8bef9SDimitry Andric 
448e8d8bef9SDimitry Andric   auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
449e8d8bef9SDimitry Andric   unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
450e8d8bef9SDimitry Andric   unsigned NumSrcElts = ArgTy->getNumElements();
451e8d8bef9SDimitry Andric   assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
452e8d8bef9SDimitry Andric          "Unexpected packing types");
453e8d8bef9SDimitry Andric 
454e8d8bef9SDimitry Andric   unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
455e8d8bef9SDimitry Andric   unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
456e8d8bef9SDimitry Andric   unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
457e8d8bef9SDimitry Andric   assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
458e8d8bef9SDimitry Andric          "Unexpected packing types");
459e8d8bef9SDimitry Andric 
460e8d8bef9SDimitry Andric   // Constant folding.
461e8d8bef9SDimitry Andric   if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
462e8d8bef9SDimitry Andric     return nullptr;
463e8d8bef9SDimitry Andric 
464e8d8bef9SDimitry Andric   // Clamp Values - signed/unsigned both use signed clamp values, but they
465e8d8bef9SDimitry Andric   // differ on the min/max values.
466e8d8bef9SDimitry Andric   APInt MinValue, MaxValue;
467e8d8bef9SDimitry Andric   if (IsSigned) {
468e8d8bef9SDimitry Andric     // PACKSS: Truncate signed value with signed saturation.
469e8d8bef9SDimitry Andric     // Source values less than dst minint are saturated to minint.
470e8d8bef9SDimitry Andric     // Source values greater than dst maxint are saturated to maxint.
471e8d8bef9SDimitry Andric     MinValue =
472e8d8bef9SDimitry Andric         APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
473e8d8bef9SDimitry Andric     MaxValue =
474e8d8bef9SDimitry Andric         APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
475e8d8bef9SDimitry Andric   } else {
476e8d8bef9SDimitry Andric     // PACKUS: Truncate signed value with unsigned saturation.
477e8d8bef9SDimitry Andric     // Source values less than zero are saturated to zero.
478e8d8bef9SDimitry Andric     // Source values greater than dst maxuint are saturated to maxuint.
479349cc55cSDimitry Andric     MinValue = APInt::getZero(SrcScalarSizeInBits);
480e8d8bef9SDimitry Andric     MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
481e8d8bef9SDimitry Andric   }
482e8d8bef9SDimitry Andric 
483e8d8bef9SDimitry Andric   auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
484e8d8bef9SDimitry Andric   auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
485e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
486e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
487e8d8bef9SDimitry Andric   Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
488e8d8bef9SDimitry Andric   Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
489e8d8bef9SDimitry Andric 
490e8d8bef9SDimitry Andric   // Shuffle clamped args together at the lane level.
491e8d8bef9SDimitry Andric   SmallVector<int, 32> PackMask;
492e8d8bef9SDimitry Andric   for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
493e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
494e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
495e8d8bef9SDimitry Andric     for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
496e8d8bef9SDimitry Andric       PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
497e8d8bef9SDimitry Andric   }
498e8d8bef9SDimitry Andric   auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
499e8d8bef9SDimitry Andric 
500e8d8bef9SDimitry Andric   // Truncate to dst size.
501e8d8bef9SDimitry Andric   return Builder.CreateTrunc(Shuffle, ResTy);
502e8d8bef9SDimitry Andric }
503e8d8bef9SDimitry Andric 
504e8d8bef9SDimitry Andric static Value *simplifyX86movmsk(const IntrinsicInst &II,
505e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
506e8d8bef9SDimitry Andric   Value *Arg = II.getArgOperand(0);
507e8d8bef9SDimitry Andric   Type *ResTy = II.getType();
508e8d8bef9SDimitry Andric 
509e8d8bef9SDimitry Andric   // movmsk(undef) -> zero as we must ensure the upper bits are zero.
510e8d8bef9SDimitry Andric   if (isa<UndefValue>(Arg))
511e8d8bef9SDimitry Andric     return Constant::getNullValue(ResTy);
512e8d8bef9SDimitry Andric 
513e8d8bef9SDimitry Andric   auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
514e8d8bef9SDimitry Andric   // We can't easily peek through x86_mmx types.
515e8d8bef9SDimitry Andric   if (!ArgTy)
516e8d8bef9SDimitry Andric     return nullptr;
517e8d8bef9SDimitry Andric 
518e8d8bef9SDimitry Andric   // Expand MOVMSK to compare/bitcast/zext:
519e8d8bef9SDimitry Andric   // e.g. PMOVMSKB(v16i8 x):
520e8d8bef9SDimitry Andric   // %cmp = icmp slt <16 x i8> %x, zeroinitializer
521e8d8bef9SDimitry Andric   // %int = bitcast <16 x i1> %cmp to i16
522e8d8bef9SDimitry Andric   // %res = zext i16 %int to i32
523e8d8bef9SDimitry Andric   unsigned NumElts = ArgTy->getNumElements();
524e8d8bef9SDimitry Andric   Type *IntegerTy = Builder.getIntNTy(NumElts);
525e8d8bef9SDimitry Andric 
52681ad6265SDimitry Andric   Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy));
52781ad6265SDimitry Andric   Res = Builder.CreateIsNeg(Res);
528e8d8bef9SDimitry Andric   Res = Builder.CreateBitCast(Res, IntegerTy);
529e8d8bef9SDimitry Andric   Res = Builder.CreateZExtOrTrunc(Res, ResTy);
530e8d8bef9SDimitry Andric   return Res;
531e8d8bef9SDimitry Andric }
532e8d8bef9SDimitry Andric 
533e8d8bef9SDimitry Andric static Value *simplifyX86addcarry(const IntrinsicInst &II,
534e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
535e8d8bef9SDimitry Andric   Value *CarryIn = II.getArgOperand(0);
536e8d8bef9SDimitry Andric   Value *Op1 = II.getArgOperand(1);
537e8d8bef9SDimitry Andric   Value *Op2 = II.getArgOperand(2);
538e8d8bef9SDimitry Andric   Type *RetTy = II.getType();
539e8d8bef9SDimitry Andric   Type *OpTy = Op1->getType();
540e8d8bef9SDimitry Andric   assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
541e8d8bef9SDimitry Andric          RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
542e8d8bef9SDimitry Andric          "Unexpected types for x86 addcarry");
543e8d8bef9SDimitry Andric 
544e8d8bef9SDimitry Andric   // If carry-in is zero, this is just an unsigned add with overflow.
545e8d8bef9SDimitry Andric   if (match(CarryIn, PatternMatch::m_ZeroInt())) {
546e8d8bef9SDimitry Andric     Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
547e8d8bef9SDimitry Andric                                           {Op1, Op2});
548e8d8bef9SDimitry Andric     // The types have to be adjusted to match the x86 call types.
549e8d8bef9SDimitry Andric     Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
550e8d8bef9SDimitry Andric     Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
551e8d8bef9SDimitry Andric                                        Builder.getInt8Ty());
552bdd1243dSDimitry Andric     Value *Res = PoisonValue::get(RetTy);
553e8d8bef9SDimitry Andric     Res = Builder.CreateInsertValue(Res, UAddOV, 0);
554e8d8bef9SDimitry Andric     return Builder.CreateInsertValue(Res, UAddResult, 1);
555e8d8bef9SDimitry Andric   }
556e8d8bef9SDimitry Andric 
557e8d8bef9SDimitry Andric   return nullptr;
558e8d8bef9SDimitry Andric }
559e8d8bef9SDimitry Andric 
560*06c3fb27SDimitry Andric static Value *simplifyTernarylogic(const IntrinsicInst &II,
561*06c3fb27SDimitry Andric                                    InstCombiner::BuilderTy &Builder) {
562*06c3fb27SDimitry Andric 
563*06c3fb27SDimitry Andric   auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3));
564*06c3fb27SDimitry Andric   if (!ArgImm || ArgImm->getValue().uge(256))
565*06c3fb27SDimitry Andric     return nullptr;
566*06c3fb27SDimitry Andric 
567*06c3fb27SDimitry Andric   Value *ArgA = II.getArgOperand(0);
568*06c3fb27SDimitry Andric   Value *ArgB = II.getArgOperand(1);
569*06c3fb27SDimitry Andric   Value *ArgC = II.getArgOperand(2);
570*06c3fb27SDimitry Andric 
571*06c3fb27SDimitry Andric   Type *Ty = II.getType();
572*06c3fb27SDimitry Andric 
573*06c3fb27SDimitry Andric   auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
574*06c3fb27SDimitry Andric     return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
575*06c3fb27SDimitry Andric   };
576*06c3fb27SDimitry Andric   auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
577*06c3fb27SDimitry Andric     return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
578*06c3fb27SDimitry Andric   };
579*06c3fb27SDimitry Andric   auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
580*06c3fb27SDimitry Andric     return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
581*06c3fb27SDimitry Andric   };
582*06c3fb27SDimitry Andric   auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {
583*06c3fb27SDimitry Andric     return {Builder.CreateNot(V.first), ~V.second};
584*06c3fb27SDimitry Andric   };
585*06c3fb27SDimitry Andric   auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };
586*06c3fb27SDimitry Andric   auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };
587*06c3fb27SDimitry Andric   auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };
588*06c3fb27SDimitry Andric 
589*06c3fb27SDimitry Andric   bool AIsConst = match(ArgA, PatternMatch::m_ImmConstant());
590*06c3fb27SDimitry Andric   bool BIsConst = match(ArgB, PatternMatch::m_ImmConstant());
591*06c3fb27SDimitry Andric   bool CIsConst = match(ArgC, PatternMatch::m_ImmConstant());
592*06c3fb27SDimitry Andric 
593*06c3fb27SDimitry Andric   bool ABIsConst = AIsConst && BIsConst;
594*06c3fb27SDimitry Andric   bool ACIsConst = AIsConst && CIsConst;
595*06c3fb27SDimitry Andric   bool BCIsConst = BIsConst && CIsConst;
596*06c3fb27SDimitry Andric   bool ABCIsConst = AIsConst && BIsConst && CIsConst;
597*06c3fb27SDimitry Andric 
598*06c3fb27SDimitry Andric   // Use for verification. Its a big table. Its difficult to go from Imm ->
599*06c3fb27SDimitry Andric   // logic ops, but easy to verify that a set of logic ops is correct. We track
600*06c3fb27SDimitry Andric   // the logic ops through the second value in the pair. At the end it should
601*06c3fb27SDimitry Andric   // equal Imm.
602*06c3fb27SDimitry Andric   std::pair<Value *, uint8_t> A = {ArgA, 0xf0};
603*06c3fb27SDimitry Andric   std::pair<Value *, uint8_t> B = {ArgB, 0xcc};
604*06c3fb27SDimitry Andric   std::pair<Value *, uint8_t> C = {ArgC, 0xaa};
605*06c3fb27SDimitry Andric   std::pair<Value *, uint8_t> Res = {nullptr, 0};
606*06c3fb27SDimitry Andric 
607*06c3fb27SDimitry Andric   // Currently we only handle cases that convert directly to another instruction
608*06c3fb27SDimitry Andric   // or cases where all the ops are constant.  This is because we don't properly
609*06c3fb27SDimitry Andric   // handle creating ternary ops in the backend, so splitting them here may
610*06c3fb27SDimitry Andric   // cause regressions. As the backend improves, uncomment more cases.
611*06c3fb27SDimitry Andric 
612*06c3fb27SDimitry Andric   uint8_t Imm = ArgImm->getValue().getZExtValue();
613*06c3fb27SDimitry Andric   switch (Imm) {
614*06c3fb27SDimitry Andric   case 0x0:
615*06c3fb27SDimitry Andric     Res = {Constant::getNullValue(Ty), 0};
616*06c3fb27SDimitry Andric     break;
617*06c3fb27SDimitry Andric   case 0x1:
618*06c3fb27SDimitry Andric     if (ABCIsConst)
619*06c3fb27SDimitry Andric       Res = Nor(Or(A, B), C);
620*06c3fb27SDimitry Andric     break;
621*06c3fb27SDimitry Andric   case 0x2:
622*06c3fb27SDimitry Andric     if (ABCIsConst)
623*06c3fb27SDimitry Andric       Res = And(Nor(A, B), C);
624*06c3fb27SDimitry Andric     break;
625*06c3fb27SDimitry Andric   case 0x3:
626*06c3fb27SDimitry Andric     if (ABIsConst)
627*06c3fb27SDimitry Andric       Res = Nor(A, B);
628*06c3fb27SDimitry Andric     break;
629*06c3fb27SDimitry Andric   case 0x4:
630*06c3fb27SDimitry Andric     if (ABCIsConst)
631*06c3fb27SDimitry Andric       Res = And(Nor(A, C), B);
632*06c3fb27SDimitry Andric     break;
633*06c3fb27SDimitry Andric   case 0x5:
634*06c3fb27SDimitry Andric     if (ACIsConst)
635*06c3fb27SDimitry Andric       Res = Nor(A, C);
636*06c3fb27SDimitry Andric     break;
637*06c3fb27SDimitry Andric   case 0x6:
638*06c3fb27SDimitry Andric     if (ABCIsConst)
639*06c3fb27SDimitry Andric       Res = Nor(A, Xnor(B, C));
640*06c3fb27SDimitry Andric     break;
641*06c3fb27SDimitry Andric   case 0x7:
642*06c3fb27SDimitry Andric     if (ABCIsConst)
643*06c3fb27SDimitry Andric       Res = Nor(A, And(B, C));
644*06c3fb27SDimitry Andric     break;
645*06c3fb27SDimitry Andric   case 0x8:
646*06c3fb27SDimitry Andric     if (ABCIsConst)
647*06c3fb27SDimitry Andric       Res = Nor(A, Nand(B, C));
648*06c3fb27SDimitry Andric     break;
649*06c3fb27SDimitry Andric   case 0x9:
650*06c3fb27SDimitry Andric     if (ABCIsConst)
651*06c3fb27SDimitry Andric       Res = Nor(A, Xor(B, C));
652*06c3fb27SDimitry Andric     break;
653*06c3fb27SDimitry Andric   case 0xa:
654*06c3fb27SDimitry Andric     if (ACIsConst)
655*06c3fb27SDimitry Andric       Res = Nor(A, Not(C));
656*06c3fb27SDimitry Andric     break;
657*06c3fb27SDimitry Andric   case 0xb:
658*06c3fb27SDimitry Andric     if (ABCIsConst)
659*06c3fb27SDimitry Andric       Res = Nor(A, Nor(C, Not(B)));
660*06c3fb27SDimitry Andric     break;
661*06c3fb27SDimitry Andric   case 0xc:
662*06c3fb27SDimitry Andric     if (ABIsConst)
663*06c3fb27SDimitry Andric       Res = Nor(A, Not(B));
664*06c3fb27SDimitry Andric     break;
665*06c3fb27SDimitry Andric   case 0xd:
666*06c3fb27SDimitry Andric     if (ABCIsConst)
667*06c3fb27SDimitry Andric       Res = Nor(A, Nor(B, Not(C)));
668*06c3fb27SDimitry Andric     break;
669*06c3fb27SDimitry Andric   case 0xe:
670*06c3fb27SDimitry Andric     if (ABCIsConst)
671*06c3fb27SDimitry Andric       Res = Nor(A, Nor(B, C));
672*06c3fb27SDimitry Andric     break;
673*06c3fb27SDimitry Andric   case 0xf:
674*06c3fb27SDimitry Andric     Res = Not(A);
675*06c3fb27SDimitry Andric     break;
676*06c3fb27SDimitry Andric   case 0x10:
677*06c3fb27SDimitry Andric     if (ABCIsConst)
678*06c3fb27SDimitry Andric       Res = And(A, Nor(B, C));
679*06c3fb27SDimitry Andric     break;
680*06c3fb27SDimitry Andric   case 0x11:
681*06c3fb27SDimitry Andric     if (BCIsConst)
682*06c3fb27SDimitry Andric       Res = Nor(B, C);
683*06c3fb27SDimitry Andric     break;
684*06c3fb27SDimitry Andric   case 0x12:
685*06c3fb27SDimitry Andric     if (ABCIsConst)
686*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, C), B);
687*06c3fb27SDimitry Andric     break;
688*06c3fb27SDimitry Andric   case 0x13:
689*06c3fb27SDimitry Andric     if (ABCIsConst)
690*06c3fb27SDimitry Andric       Res = Nor(And(A, C), B);
691*06c3fb27SDimitry Andric     break;
692*06c3fb27SDimitry Andric   case 0x14:
693*06c3fb27SDimitry Andric     if (ABCIsConst)
694*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), C);
695*06c3fb27SDimitry Andric     break;
696*06c3fb27SDimitry Andric   case 0x15:
697*06c3fb27SDimitry Andric     if (ABCIsConst)
698*06c3fb27SDimitry Andric       Res = Nor(And(A, B), C);
699*06c3fb27SDimitry Andric     break;
700*06c3fb27SDimitry Andric   case 0x16:
701*06c3fb27SDimitry Andric     if (ABCIsConst)
702*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), And(Nand(A, B), C));
703*06c3fb27SDimitry Andric     break;
704*06c3fb27SDimitry Andric   case 0x17:
705*06c3fb27SDimitry Andric     if (ABCIsConst)
706*06c3fb27SDimitry Andric       Res = Xor(Or(A, B), Or(Xnor(A, B), C));
707*06c3fb27SDimitry Andric     break;
708*06c3fb27SDimitry Andric   case 0x18:
709*06c3fb27SDimitry Andric     if (ABCIsConst)
710*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Xnor(A, C));
711*06c3fb27SDimitry Andric     break;
712*06c3fb27SDimitry Andric   case 0x19:
713*06c3fb27SDimitry Andric     if (ABCIsConst)
714*06c3fb27SDimitry Andric       Res = And(Nand(A, B), Xnor(B, C));
715*06c3fb27SDimitry Andric     break;
716*06c3fb27SDimitry Andric   case 0x1a:
717*06c3fb27SDimitry Andric     if (ABCIsConst)
718*06c3fb27SDimitry Andric       Res = Xor(A, Or(And(A, B), C));
719*06c3fb27SDimitry Andric     break;
720*06c3fb27SDimitry Andric   case 0x1b:
721*06c3fb27SDimitry Andric     if (ABCIsConst)
722*06c3fb27SDimitry Andric       Res = Xor(A, Or(Xnor(A, B), C));
723*06c3fb27SDimitry Andric     break;
724*06c3fb27SDimitry Andric   case 0x1c:
725*06c3fb27SDimitry Andric     if (ABCIsConst)
726*06c3fb27SDimitry Andric       Res = Xor(A, Or(And(A, C), B));
727*06c3fb27SDimitry Andric     break;
728*06c3fb27SDimitry Andric   case 0x1d:
729*06c3fb27SDimitry Andric     if (ABCIsConst)
730*06c3fb27SDimitry Andric       Res = Xor(A, Or(Xnor(A, C), B));
731*06c3fb27SDimitry Andric     break;
732*06c3fb27SDimitry Andric   case 0x1e:
733*06c3fb27SDimitry Andric     if (ABCIsConst)
734*06c3fb27SDimitry Andric       Res = Xor(A, Or(B, C));
735*06c3fb27SDimitry Andric     break;
736*06c3fb27SDimitry Andric   case 0x1f:
737*06c3fb27SDimitry Andric     if (ABCIsConst)
738*06c3fb27SDimitry Andric       Res = Nand(A, Or(B, C));
739*06c3fb27SDimitry Andric     break;
740*06c3fb27SDimitry Andric   case 0x20:
741*06c3fb27SDimitry Andric     if (ABCIsConst)
742*06c3fb27SDimitry Andric       Res = Nor(Nand(A, C), B);
743*06c3fb27SDimitry Andric     break;
744*06c3fb27SDimitry Andric   case 0x21:
745*06c3fb27SDimitry Andric     if (ABCIsConst)
746*06c3fb27SDimitry Andric       Res = Nor(Xor(A, C), B);
747*06c3fb27SDimitry Andric     break;
748*06c3fb27SDimitry Andric   case 0x22:
749*06c3fb27SDimitry Andric     if (BCIsConst)
750*06c3fb27SDimitry Andric       Res = Nor(B, Not(C));
751*06c3fb27SDimitry Andric     break;
752*06c3fb27SDimitry Andric   case 0x23:
753*06c3fb27SDimitry Andric     if (ABCIsConst)
754*06c3fb27SDimitry Andric       Res = Nor(B, Nor(C, Not(A)));
755*06c3fb27SDimitry Andric     break;
756*06c3fb27SDimitry Andric   case 0x24:
757*06c3fb27SDimitry Andric     if (ABCIsConst)
758*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Xor(A, C));
759*06c3fb27SDimitry Andric     break;
760*06c3fb27SDimitry Andric   case 0x25:
761*06c3fb27SDimitry Andric     if (ABCIsConst)
762*06c3fb27SDimitry Andric       Res = Xor(A, Nand(Nand(A, B), C));
763*06c3fb27SDimitry Andric     break;
764*06c3fb27SDimitry Andric   case 0x26:
765*06c3fb27SDimitry Andric     if (ABCIsConst)
766*06c3fb27SDimitry Andric       Res = And(Nand(A, B), Xor(B, C));
767*06c3fb27SDimitry Andric     break;
768*06c3fb27SDimitry Andric   case 0x27:
769*06c3fb27SDimitry Andric     if (ABCIsConst)
770*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), C), B);
771*06c3fb27SDimitry Andric     break;
772*06c3fb27SDimitry Andric   case 0x28:
773*06c3fb27SDimitry Andric     if (ABCIsConst)
774*06c3fb27SDimitry Andric       Res = And(Xor(A, B), C);
775*06c3fb27SDimitry Andric     break;
776*06c3fb27SDimitry Andric   case 0x29:
777*06c3fb27SDimitry Andric     if (ABCIsConst)
778*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nor(And(A, B), C));
779*06c3fb27SDimitry Andric     break;
780*06c3fb27SDimitry Andric   case 0x2a:
781*06c3fb27SDimitry Andric     if (ABCIsConst)
782*06c3fb27SDimitry Andric       Res = And(Nand(A, B), C);
783*06c3fb27SDimitry Andric     break;
784*06c3fb27SDimitry Andric   case 0x2b:
785*06c3fb27SDimitry Andric     if (ABCIsConst)
786*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Xor(A, C)), A);
787*06c3fb27SDimitry Andric     break;
788*06c3fb27SDimitry Andric   case 0x2c:
789*06c3fb27SDimitry Andric     if (ABCIsConst)
790*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Nor(B, C));
791*06c3fb27SDimitry Andric     break;
792*06c3fb27SDimitry Andric   case 0x2d:
793*06c3fb27SDimitry Andric     if (ABCIsConst)
794*06c3fb27SDimitry Andric       Res = Xor(A, Or(B, Not(C)));
795*06c3fb27SDimitry Andric     break;
796*06c3fb27SDimitry Andric   case 0x2e:
797*06c3fb27SDimitry Andric     if (ABCIsConst)
798*06c3fb27SDimitry Andric       Res = Xor(A, Or(Xor(A, C), B));
799*06c3fb27SDimitry Andric     break;
800*06c3fb27SDimitry Andric   case 0x2f:
801*06c3fb27SDimitry Andric     if (ABCIsConst)
802*06c3fb27SDimitry Andric       Res = Nand(A, Or(B, Not(C)));
803*06c3fb27SDimitry Andric     break;
804*06c3fb27SDimitry Andric   case 0x30:
805*06c3fb27SDimitry Andric     if (ABIsConst)
806*06c3fb27SDimitry Andric       Res = Nor(B, Not(A));
807*06c3fb27SDimitry Andric     break;
808*06c3fb27SDimitry Andric   case 0x31:
809*06c3fb27SDimitry Andric     if (ABCIsConst)
810*06c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(C)), B);
811*06c3fb27SDimitry Andric     break;
812*06c3fb27SDimitry Andric   case 0x32:
813*06c3fb27SDimitry Andric     if (ABCIsConst)
814*06c3fb27SDimitry Andric       Res = Nor(Nor(A, C), B);
815*06c3fb27SDimitry Andric     break;
816*06c3fb27SDimitry Andric   case 0x33:
817*06c3fb27SDimitry Andric     Res = Not(B);
818*06c3fb27SDimitry Andric     break;
819*06c3fb27SDimitry Andric   case 0x34:
820*06c3fb27SDimitry Andric     if (ABCIsConst)
821*06c3fb27SDimitry Andric       Res = And(Xor(A, B), Nand(B, C));
822*06c3fb27SDimitry Andric     break;
823*06c3fb27SDimitry Andric   case 0x35:
824*06c3fb27SDimitry Andric     if (ABCIsConst)
825*06c3fb27SDimitry Andric       Res = Xor(B, Or(A, Xnor(B, C)));
826*06c3fb27SDimitry Andric     break;
827*06c3fb27SDimitry Andric   case 0x36:
828*06c3fb27SDimitry Andric     if (ABCIsConst)
829*06c3fb27SDimitry Andric       Res = Xor(Or(A, C), B);
830*06c3fb27SDimitry Andric     break;
831*06c3fb27SDimitry Andric   case 0x37:
832*06c3fb27SDimitry Andric     if (ABCIsConst)
833*06c3fb27SDimitry Andric       Res = Nand(Or(A, C), B);
834*06c3fb27SDimitry Andric     break;
835*06c3fb27SDimitry Andric   case 0x38:
836*06c3fb27SDimitry Andric     if (ABCIsConst)
837*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, B), Nor(A, C));
838*06c3fb27SDimitry Andric     break;
839*06c3fb27SDimitry Andric   case 0x39:
840*06c3fb27SDimitry Andric     if (ABCIsConst)
841*06c3fb27SDimitry Andric       Res = Xor(Or(A, Not(C)), B);
842*06c3fb27SDimitry Andric     break;
843*06c3fb27SDimitry Andric   case 0x3a:
844*06c3fb27SDimitry Andric     if (ABCIsConst)
845*06c3fb27SDimitry Andric       Res = Xor(B, Or(A, Xor(B, C)));
846*06c3fb27SDimitry Andric     break;
847*06c3fb27SDimitry Andric   case 0x3b:
848*06c3fb27SDimitry Andric     if (ABCIsConst)
849*06c3fb27SDimitry Andric       Res = Nand(Or(A, Not(C)), B);
850*06c3fb27SDimitry Andric     break;
851*06c3fb27SDimitry Andric   case 0x3c:
852*06c3fb27SDimitry Andric     Res = Xor(A, B);
853*06c3fb27SDimitry Andric     break;
854*06c3fb27SDimitry Andric   case 0x3d:
855*06c3fb27SDimitry Andric     if (ABCIsConst)
856*06c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, C), B));
857*06c3fb27SDimitry Andric     break;
858*06c3fb27SDimitry Andric   case 0x3e:
859*06c3fb27SDimitry Andric     if (ABCIsConst)
860*06c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, Not(C)), B));
861*06c3fb27SDimitry Andric     break;
862*06c3fb27SDimitry Andric   case 0x3f:
863*06c3fb27SDimitry Andric     if (ABIsConst)
864*06c3fb27SDimitry Andric       Res = Nand(A, B);
865*06c3fb27SDimitry Andric     break;
866*06c3fb27SDimitry Andric   case 0x40:
867*06c3fb27SDimitry Andric     if (ABCIsConst)
868*06c3fb27SDimitry Andric       Res = Nor(Nand(A, B), C);
869*06c3fb27SDimitry Andric     break;
870*06c3fb27SDimitry Andric   case 0x41:
871*06c3fb27SDimitry Andric     if (ABCIsConst)
872*06c3fb27SDimitry Andric       Res = Nor(Xor(A, B), C);
873*06c3fb27SDimitry Andric     break;
874*06c3fb27SDimitry Andric   case 0x42:
875*06c3fb27SDimitry Andric     if (ABCIsConst)
876*06c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Xnor(A, C));
877*06c3fb27SDimitry Andric     break;
878*06c3fb27SDimitry Andric   case 0x43:
879*06c3fb27SDimitry Andric     if (ABCIsConst)
880*06c3fb27SDimitry Andric       Res = Xor(A, Nand(Nand(A, C), B));
881*06c3fb27SDimitry Andric     break;
882*06c3fb27SDimitry Andric   case 0x44:
883*06c3fb27SDimitry Andric     if (BCIsConst)
884*06c3fb27SDimitry Andric       Res = Nor(C, Not(B));
885*06c3fb27SDimitry Andric     break;
886*06c3fb27SDimitry Andric   case 0x45:
887*06c3fb27SDimitry Andric     if (ABCIsConst)
888*06c3fb27SDimitry Andric       Res = Nor(Nor(B, Not(A)), C);
889*06c3fb27SDimitry Andric     break;
890*06c3fb27SDimitry Andric   case 0x46:
891*06c3fb27SDimitry Andric     if (ABCIsConst)
892*06c3fb27SDimitry Andric       Res = Xor(Or(And(A, C), B), C);
893*06c3fb27SDimitry Andric     break;
894*06c3fb27SDimitry Andric   case 0x47:
895*06c3fb27SDimitry Andric     if (ABCIsConst)
896*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, C), B), C);
897*06c3fb27SDimitry Andric     break;
898*06c3fb27SDimitry Andric   case 0x48:
899*06c3fb27SDimitry Andric     if (ABCIsConst)
900*06c3fb27SDimitry Andric       Res = And(Xor(A, C), B);
901*06c3fb27SDimitry Andric     break;
902*06c3fb27SDimitry Andric   case 0x49:
903*06c3fb27SDimitry Andric     if (ABCIsConst)
904*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), And(A, C)), C);
905*06c3fb27SDimitry Andric     break;
906*06c3fb27SDimitry Andric   case 0x4a:
907*06c3fb27SDimitry Andric     if (ABCIsConst)
908*06c3fb27SDimitry Andric       Res = Nor(Xnor(A, C), Nor(B, C));
909*06c3fb27SDimitry Andric     break;
910*06c3fb27SDimitry Andric   case 0x4b:
911*06c3fb27SDimitry Andric     if (ABCIsConst)
912*06c3fb27SDimitry Andric       Res = Xor(A, Or(C, Not(B)));
913*06c3fb27SDimitry Andric     break;
914*06c3fb27SDimitry Andric   case 0x4c:
915*06c3fb27SDimitry Andric     if (ABCIsConst)
916*06c3fb27SDimitry Andric       Res = And(Nand(A, C), B);
917*06c3fb27SDimitry Andric     break;
918*06c3fb27SDimitry Andric   case 0x4d:
919*06c3fb27SDimitry Andric     if (ABCIsConst)
920*06c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), Xnor(A, C)), A);
921*06c3fb27SDimitry Andric     break;
922*06c3fb27SDimitry Andric   case 0x4e:
923*06c3fb27SDimitry Andric     if (ABCIsConst)
924*06c3fb27SDimitry Andric       Res = Xor(A, Or(Xor(A, B), C));
925*06c3fb27SDimitry Andric     break;
926*06c3fb27SDimitry Andric   case 0x4f:
927*06c3fb27SDimitry Andric     if (ABCIsConst)
928*06c3fb27SDimitry Andric       Res = Nand(A, Nand(B, Not(C)));
929*06c3fb27SDimitry Andric     break;
930*06c3fb27SDimitry Andric   case 0x50:
931*06c3fb27SDimitry Andric     if (ACIsConst)
932*06c3fb27SDimitry Andric       Res = Nor(C, Not(A));
933*06c3fb27SDimitry Andric     break;
934*06c3fb27SDimitry Andric   case 0x51:
935*06c3fb27SDimitry Andric     if (ABCIsConst)
936*06c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(B)), C);
937*06c3fb27SDimitry Andric     break;
938*06c3fb27SDimitry Andric   case 0x52:
939*06c3fb27SDimitry Andric     if (ABCIsConst)
940*06c3fb27SDimitry Andric       Res = And(Xor(A, C), Nand(B, C));
941*06c3fb27SDimitry Andric     break;
942*06c3fb27SDimitry Andric   case 0x53:
943*06c3fb27SDimitry Andric     if (ABCIsConst)
944*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(B, C), A), C);
945*06c3fb27SDimitry Andric     break;
946*06c3fb27SDimitry Andric   case 0x54:
947*06c3fb27SDimitry Andric     if (ABCIsConst)
948*06c3fb27SDimitry Andric       Res = Nor(Nor(A, B), C);
949*06c3fb27SDimitry Andric     break;
950*06c3fb27SDimitry Andric   case 0x55:
951*06c3fb27SDimitry Andric     Res = Not(C);
952*06c3fb27SDimitry Andric     break;
953*06c3fb27SDimitry Andric   case 0x56:
954*06c3fb27SDimitry Andric     if (ABCIsConst)
955*06c3fb27SDimitry Andric       Res = Xor(Or(A, B), C);
956*06c3fb27SDimitry Andric     break;
957*06c3fb27SDimitry Andric   case 0x57:
958*06c3fb27SDimitry Andric     if (ABCIsConst)
959*06c3fb27SDimitry Andric       Res = Nand(Or(A, B), C);
960*06c3fb27SDimitry Andric     break;
961*06c3fb27SDimitry Andric   case 0x58:
962*06c3fb27SDimitry Andric     if (ABCIsConst)
963*06c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xnor(A, C));
964*06c3fb27SDimitry Andric     break;
965*06c3fb27SDimitry Andric   case 0x59:
966*06c3fb27SDimitry Andric     if (ABCIsConst)
967*06c3fb27SDimitry Andric       Res = Xor(Or(A, Not(B)), C);
968*06c3fb27SDimitry Andric     break;
969*06c3fb27SDimitry Andric   case 0x5a:
970*06c3fb27SDimitry Andric     Res = Xor(A, C);
971*06c3fb27SDimitry Andric     break;
972*06c3fb27SDimitry Andric   case 0x5b:
973*06c3fb27SDimitry Andric     if (ABCIsConst)
974*06c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, B), C));
975*06c3fb27SDimitry Andric     break;
976*06c3fb27SDimitry Andric   case 0x5c:
977*06c3fb27SDimitry Andric     if (ABCIsConst)
978*06c3fb27SDimitry Andric       Res = Xor(Or(Xor(B, C), A), C);
979*06c3fb27SDimitry Andric     break;
980*06c3fb27SDimitry Andric   case 0x5d:
981*06c3fb27SDimitry Andric     if (ABCIsConst)
982*06c3fb27SDimitry Andric       Res = Nand(Or(A, Not(B)), C);
983*06c3fb27SDimitry Andric     break;
984*06c3fb27SDimitry Andric   case 0x5e:
985*06c3fb27SDimitry Andric     if (ABCIsConst)
986*06c3fb27SDimitry Andric       Res = Xor(A, Or(Nor(A, Not(B)), C));
987*06c3fb27SDimitry Andric     break;
988*06c3fb27SDimitry Andric   case 0x5f:
989*06c3fb27SDimitry Andric     if (ACIsConst)
990*06c3fb27SDimitry Andric       Res = Nand(A, C);
991*06c3fb27SDimitry Andric     break;
992*06c3fb27SDimitry Andric   case 0x60:
993*06c3fb27SDimitry Andric     if (ABCIsConst)
994*06c3fb27SDimitry Andric       Res = And(A, Xor(B, C));
995*06c3fb27SDimitry Andric     break;
996*06c3fb27SDimitry Andric   case 0x61:
997*06c3fb27SDimitry Andric     if (ABCIsConst)
998*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), And(B, C)), C);
999*06c3fb27SDimitry Andric     break;
1000*06c3fb27SDimitry Andric   case 0x62:
1001*06c3fb27SDimitry Andric     if (ABCIsConst)
1002*06c3fb27SDimitry Andric       Res = Nor(Nor(A, C), Xnor(B, C));
1003*06c3fb27SDimitry Andric     break;
1004*06c3fb27SDimitry Andric   case 0x63:
1005*06c3fb27SDimitry Andric     if (ABCIsConst)
1006*06c3fb27SDimitry Andric       Res = Xor(B, Or(C, Not(A)));
1007*06c3fb27SDimitry Andric     break;
1008*06c3fb27SDimitry Andric   case 0x64:
1009*06c3fb27SDimitry Andric     if (ABCIsConst)
1010*06c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xnor(B, C));
1011*06c3fb27SDimitry Andric     break;
1012*06c3fb27SDimitry Andric   case 0x65:
1013*06c3fb27SDimitry Andric     if (ABCIsConst)
1014*06c3fb27SDimitry Andric       Res = Xor(Or(B, Not(A)), C);
1015*06c3fb27SDimitry Andric     break;
1016*06c3fb27SDimitry Andric   case 0x66:
1017*06c3fb27SDimitry Andric     Res = Xor(B, C);
1018*06c3fb27SDimitry Andric     break;
1019*06c3fb27SDimitry Andric   case 0x67:
1020*06c3fb27SDimitry Andric     if (ABCIsConst)
1021*06c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xor(B, C));
1022*06c3fb27SDimitry Andric     break;
1023*06c3fb27SDimitry Andric   case 0x68:
1024*06c3fb27SDimitry Andric     if (ABCIsConst)
1025*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nor(Nor(A, B), C));
1026*06c3fb27SDimitry Andric     break;
1027*06c3fb27SDimitry Andric   case 0x69:
1028*06c3fb27SDimitry Andric     if (ABCIsConst)
1029*06c3fb27SDimitry Andric       Res = Xor(Xnor(A, B), C);
1030*06c3fb27SDimitry Andric     break;
1031*06c3fb27SDimitry Andric   case 0x6a:
1032*06c3fb27SDimitry Andric     if (ABCIsConst)
1033*06c3fb27SDimitry Andric       Res = Xor(And(A, B), C);
1034*06c3fb27SDimitry Andric     break;
1035*06c3fb27SDimitry Andric   case 0x6b:
1036*06c3fb27SDimitry Andric     if (ABCIsConst)
1037*06c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xor(Xnor(A, B), C));
1038*06c3fb27SDimitry Andric     break;
1039*06c3fb27SDimitry Andric   case 0x6c:
1040*06c3fb27SDimitry Andric     if (ABCIsConst)
1041*06c3fb27SDimitry Andric       Res = Xor(And(A, C), B);
1042*06c3fb27SDimitry Andric     break;
1043*06c3fb27SDimitry Andric   case 0x6d:
1044*06c3fb27SDimitry Andric     if (ABCIsConst)
1045*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);
1046*06c3fb27SDimitry Andric     break;
1047*06c3fb27SDimitry Andric   case 0x6e:
1048*06c3fb27SDimitry Andric     if (ABCIsConst)
1049*06c3fb27SDimitry Andric       Res = Or(Nor(A, Not(B)), Xor(B, C));
1050*06c3fb27SDimitry Andric     break;
1051*06c3fb27SDimitry Andric   case 0x6f:
1052*06c3fb27SDimitry Andric     if (ABCIsConst)
1053*06c3fb27SDimitry Andric       Res = Nand(A, Xnor(B, C));
1054*06c3fb27SDimitry Andric     break;
1055*06c3fb27SDimitry Andric   case 0x70:
1056*06c3fb27SDimitry Andric     if (ABCIsConst)
1057*06c3fb27SDimitry Andric       Res = And(A, Nand(B, C));
1058*06c3fb27SDimitry Andric     break;
1059*06c3fb27SDimitry Andric   case 0x71:
1060*06c3fb27SDimitry Andric     if (ABCIsConst)
1061*06c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), Xor(A, C)), A);
1062*06c3fb27SDimitry Andric     break;
1063*06c3fb27SDimitry Andric   case 0x72:
1064*06c3fb27SDimitry Andric     if (ABCIsConst)
1065*06c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), C), B);
1066*06c3fb27SDimitry Andric     break;
1067*06c3fb27SDimitry Andric   case 0x73:
1068*06c3fb27SDimitry Andric     if (ABCIsConst)
1069*06c3fb27SDimitry Andric       Res = Nand(Nand(A, Not(C)), B);
1070*06c3fb27SDimitry Andric     break;
1071*06c3fb27SDimitry Andric   case 0x74:
1072*06c3fb27SDimitry Andric     if (ABCIsConst)
1073*06c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, C), B), C);
1074*06c3fb27SDimitry Andric     break;
1075*06c3fb27SDimitry Andric   case 0x75:
1076*06c3fb27SDimitry Andric     if (ABCIsConst)
1077*06c3fb27SDimitry Andric       Res = Nand(Nand(A, Not(B)), C);
1078*06c3fb27SDimitry Andric     break;
1079*06c3fb27SDimitry Andric   case 0x76:
1080*06c3fb27SDimitry Andric     if (ABCIsConst)
1081*06c3fb27SDimitry Andric       Res = Xor(B, Or(Nor(B, Not(A)), C));
1082*06c3fb27SDimitry Andric     break;
1083*06c3fb27SDimitry Andric   case 0x77:
1084*06c3fb27SDimitry Andric     if (BCIsConst)
1085*06c3fb27SDimitry Andric       Res = Nand(B, C);
1086*06c3fb27SDimitry Andric     break;
1087*06c3fb27SDimitry Andric   case 0x78:
1088*06c3fb27SDimitry Andric     if (ABCIsConst)
1089*06c3fb27SDimitry Andric       Res = Xor(A, And(B, C));
1090*06c3fb27SDimitry Andric     break;
1091*06c3fb27SDimitry Andric   case 0x79:
1092*06c3fb27SDimitry Andric     if (ABCIsConst)
1093*06c3fb27SDimitry Andric       Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);
1094*06c3fb27SDimitry Andric     break;
1095*06c3fb27SDimitry Andric   case 0x7a:
1096*06c3fb27SDimitry Andric     if (ABCIsConst)
1097*06c3fb27SDimitry Andric       Res = Or(Xor(A, C), Nor(B, Not(A)));
1098*06c3fb27SDimitry Andric     break;
1099*06c3fb27SDimitry Andric   case 0x7b:
1100*06c3fb27SDimitry Andric     if (ABCIsConst)
1101*06c3fb27SDimitry Andric       Res = Nand(Xnor(A, C), B);
1102*06c3fb27SDimitry Andric     break;
1103*06c3fb27SDimitry Andric   case 0x7c:
1104*06c3fb27SDimitry Andric     if (ABCIsConst)
1105*06c3fb27SDimitry Andric       Res = Or(Xor(A, B), Nor(C, Not(A)));
1106*06c3fb27SDimitry Andric     break;
1107*06c3fb27SDimitry Andric   case 0x7d:
1108*06c3fb27SDimitry Andric     if (ABCIsConst)
1109*06c3fb27SDimitry Andric       Res = Nand(Xnor(A, B), C);
1110*06c3fb27SDimitry Andric     break;
1111*06c3fb27SDimitry Andric   case 0x7e:
1112*06c3fb27SDimitry Andric     if (ABCIsConst)
1113*06c3fb27SDimitry Andric       Res = Or(Xor(A, B), Xor(A, C));
1114*06c3fb27SDimitry Andric     break;
1115*06c3fb27SDimitry Andric   case 0x7f:
1116*06c3fb27SDimitry Andric     if (ABCIsConst)
1117*06c3fb27SDimitry Andric       Res = Nand(And(A, B), C);
1118*06c3fb27SDimitry Andric     break;
1119*06c3fb27SDimitry Andric   case 0x80:
1120*06c3fb27SDimitry Andric     if (ABCIsConst)
1121*06c3fb27SDimitry Andric       Res = And(And(A, B), C);
1122*06c3fb27SDimitry Andric     break;
1123*06c3fb27SDimitry Andric   case 0x81:
1124*06c3fb27SDimitry Andric     if (ABCIsConst)
1125*06c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Xor(A, C));
1126*06c3fb27SDimitry Andric     break;
1127*06c3fb27SDimitry Andric   case 0x82:
1128*06c3fb27SDimitry Andric     if (ABCIsConst)
1129*06c3fb27SDimitry Andric       Res = And(Xnor(A, B), C);
1130*06c3fb27SDimitry Andric     break;
1131*06c3fb27SDimitry Andric   case 0x83:
1132*06c3fb27SDimitry Andric     if (ABCIsConst)
1133*06c3fb27SDimitry Andric       Res = Nor(Xor(A, B), Nor(C, Not(A)));
1134*06c3fb27SDimitry Andric     break;
1135*06c3fb27SDimitry Andric   case 0x84:
1136*06c3fb27SDimitry Andric     if (ABCIsConst)
1137*06c3fb27SDimitry Andric       Res = And(Xnor(A, C), B);
1138*06c3fb27SDimitry Andric     break;
1139*06c3fb27SDimitry Andric   case 0x85:
1140*06c3fb27SDimitry Andric     if (ABCIsConst)
1141*06c3fb27SDimitry Andric       Res = Nor(Xor(A, C), Nor(B, Not(A)));
1142*06c3fb27SDimitry Andric     break;
1143*06c3fb27SDimitry Andric   case 0x86:
1144*06c3fb27SDimitry Andric     if (ABCIsConst)
1145*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);
1146*06c3fb27SDimitry Andric     break;
1147*06c3fb27SDimitry Andric   case 0x87:
1148*06c3fb27SDimitry Andric     if (ABCIsConst)
1149*06c3fb27SDimitry Andric       Res = Xor(A, Nand(B, C));
1150*06c3fb27SDimitry Andric     break;
1151*06c3fb27SDimitry Andric   case 0x88:
1152*06c3fb27SDimitry Andric     Res = And(B, C);
1153*06c3fb27SDimitry Andric     break;
1154*06c3fb27SDimitry Andric   case 0x89:
1155*06c3fb27SDimitry Andric     if (ABCIsConst)
1156*06c3fb27SDimitry Andric       Res = Xor(B, Nor(Nor(B, Not(A)), C));
1157*06c3fb27SDimitry Andric     break;
1158*06c3fb27SDimitry Andric   case 0x8a:
1159*06c3fb27SDimitry Andric     if (ABCIsConst)
1160*06c3fb27SDimitry Andric       Res = And(Nand(A, Not(B)), C);
1161*06c3fb27SDimitry Andric     break;
1162*06c3fb27SDimitry Andric   case 0x8b:
1163*06c3fb27SDimitry Andric     if (ABCIsConst)
1164*06c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, C), B), C);
1165*06c3fb27SDimitry Andric     break;
1166*06c3fb27SDimitry Andric   case 0x8c:
1167*06c3fb27SDimitry Andric     if (ABCIsConst)
1168*06c3fb27SDimitry Andric       Res = And(Nand(A, Not(C)), B);
1169*06c3fb27SDimitry Andric     break;
1170*06c3fb27SDimitry Andric   case 0x8d:
1171*06c3fb27SDimitry Andric     if (ABCIsConst)
1172*06c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), C), B);
1173*06c3fb27SDimitry Andric     break;
1174*06c3fb27SDimitry Andric   case 0x8e:
1175*06c3fb27SDimitry Andric     if (ABCIsConst)
1176*06c3fb27SDimitry Andric       Res = Xor(Or(Xor(A, B), Xor(A, C)), A);
1177*06c3fb27SDimitry Andric     break;
1178*06c3fb27SDimitry Andric   case 0x8f:
1179*06c3fb27SDimitry Andric     if (ABCIsConst)
1180*06c3fb27SDimitry Andric       Res = Nand(A, Nand(B, C));
1181*06c3fb27SDimitry Andric     break;
1182*06c3fb27SDimitry Andric   case 0x90:
1183*06c3fb27SDimitry Andric     if (ABCIsConst)
1184*06c3fb27SDimitry Andric       Res = And(A, Xnor(B, C));
1185*06c3fb27SDimitry Andric     break;
1186*06c3fb27SDimitry Andric   case 0x91:
1187*06c3fb27SDimitry Andric     if (ABCIsConst)
1188*06c3fb27SDimitry Andric       Res = Nor(Nor(A, Not(B)), Xor(B, C));
1189*06c3fb27SDimitry Andric     break;
1190*06c3fb27SDimitry Andric   case 0x92:
1191*06c3fb27SDimitry Andric     if (ABCIsConst)
1192*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);
1193*06c3fb27SDimitry Andric     break;
1194*06c3fb27SDimitry Andric   case 0x93:
1195*06c3fb27SDimitry Andric     if (ABCIsConst)
1196*06c3fb27SDimitry Andric       Res = Xor(Nand(A, C), B);
1197*06c3fb27SDimitry Andric     break;
1198*06c3fb27SDimitry Andric   case 0x94:
1199*06c3fb27SDimitry Andric     if (ABCIsConst)
1200*06c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));
1201*06c3fb27SDimitry Andric     break;
1202*06c3fb27SDimitry Andric   case 0x95:
1203*06c3fb27SDimitry Andric     if (ABCIsConst)
1204*06c3fb27SDimitry Andric       Res = Xor(Nand(A, B), C);
1205*06c3fb27SDimitry Andric     break;
1206*06c3fb27SDimitry Andric   case 0x96:
1207*06c3fb27SDimitry Andric     if (ABCIsConst)
1208*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), C);
1209*06c3fb27SDimitry Andric     break;
1210*06c3fb27SDimitry Andric   case 0x97:
1211*06c3fb27SDimitry Andric     if (ABCIsConst)
1212*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Or(Nor(A, B), C));
1213*06c3fb27SDimitry Andric     break;
1214*06c3fb27SDimitry Andric   case 0x98:
1215*06c3fb27SDimitry Andric     if (ABCIsConst)
1216*06c3fb27SDimitry Andric       Res = Nor(Nor(A, B), Xor(B, C));
1217*06c3fb27SDimitry Andric     break;
1218*06c3fb27SDimitry Andric   case 0x99:
1219*06c3fb27SDimitry Andric     if (BCIsConst)
1220*06c3fb27SDimitry Andric       Res = Xnor(B, C);
1221*06c3fb27SDimitry Andric     break;
1222*06c3fb27SDimitry Andric   case 0x9a:
1223*06c3fb27SDimitry Andric     if (ABCIsConst)
1224*06c3fb27SDimitry Andric       Res = Xor(Nor(B, Not(A)), C);
1225*06c3fb27SDimitry Andric     break;
1226*06c3fb27SDimitry Andric   case 0x9b:
1227*06c3fb27SDimitry Andric     if (ABCIsConst)
1228*06c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xnor(B, C));
1229*06c3fb27SDimitry Andric     break;
1230*06c3fb27SDimitry Andric   case 0x9c:
1231*06c3fb27SDimitry Andric     if (ABCIsConst)
1232*06c3fb27SDimitry Andric       Res = Xor(B, Nor(C, Not(A)));
1233*06c3fb27SDimitry Andric     break;
1234*06c3fb27SDimitry Andric   case 0x9d:
1235*06c3fb27SDimitry Andric     if (ABCIsConst)
1236*06c3fb27SDimitry Andric       Res = Or(Nor(A, C), Xnor(B, C));
1237*06c3fb27SDimitry Andric     break;
1238*06c3fb27SDimitry Andric   case 0x9e:
1239*06c3fb27SDimitry Andric     if (ABCIsConst)
1240*06c3fb27SDimitry Andric       Res = Xor(And(Xor(A, B), Nand(B, C)), C);
1241*06c3fb27SDimitry Andric     break;
1242*06c3fb27SDimitry Andric   case 0x9f:
1243*06c3fb27SDimitry Andric     if (ABCIsConst)
1244*06c3fb27SDimitry Andric       Res = Nand(A, Xor(B, C));
1245*06c3fb27SDimitry Andric     break;
1246*06c3fb27SDimitry Andric   case 0xa0:
1247*06c3fb27SDimitry Andric     Res = And(A, C);
1248*06c3fb27SDimitry Andric     break;
1249*06c3fb27SDimitry Andric   case 0xa1:
1250*06c3fb27SDimitry Andric     if (ABCIsConst)
1251*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, Not(B)), C));
1252*06c3fb27SDimitry Andric     break;
1253*06c3fb27SDimitry Andric   case 0xa2:
1254*06c3fb27SDimitry Andric     if (ABCIsConst)
1255*06c3fb27SDimitry Andric       Res = And(Or(A, Not(B)), C);
1256*06c3fb27SDimitry Andric     break;
1257*06c3fb27SDimitry Andric   case 0xa3:
1258*06c3fb27SDimitry Andric     if (ABCIsConst)
1259*06c3fb27SDimitry Andric       Res = Xor(Nor(Xor(B, C), A), C);
1260*06c3fb27SDimitry Andric     break;
1261*06c3fb27SDimitry Andric   case 0xa4:
1262*06c3fb27SDimitry Andric     if (ABCIsConst)
1263*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, B), C));
1264*06c3fb27SDimitry Andric     break;
1265*06c3fb27SDimitry Andric   case 0xa5:
1266*06c3fb27SDimitry Andric     if (ACIsConst)
1267*06c3fb27SDimitry Andric       Res = Xnor(A, C);
1268*06c3fb27SDimitry Andric     break;
1269*06c3fb27SDimitry Andric   case 0xa6:
1270*06c3fb27SDimitry Andric     if (ABCIsConst)
1271*06c3fb27SDimitry Andric       Res = Xor(Nor(A, Not(B)), C);
1272*06c3fb27SDimitry Andric     break;
1273*06c3fb27SDimitry Andric   case 0xa7:
1274*06c3fb27SDimitry Andric     if (ABCIsConst)
1275*06c3fb27SDimitry Andric       Res = Or(Nor(A, B), Xnor(A, C));
1276*06c3fb27SDimitry Andric     break;
1277*06c3fb27SDimitry Andric   case 0xa8:
1278*06c3fb27SDimitry Andric     if (ABCIsConst)
1279*06c3fb27SDimitry Andric       Res = And(Or(A, B), C);
1280*06c3fb27SDimitry Andric     break;
1281*06c3fb27SDimitry Andric   case 0xa9:
1282*06c3fb27SDimitry Andric     if (ABCIsConst)
1283*06c3fb27SDimitry Andric       Res = Xor(Nor(A, B), C);
1284*06c3fb27SDimitry Andric     break;
1285*06c3fb27SDimitry Andric   case 0xaa:
1286*06c3fb27SDimitry Andric     Res = C;
1287*06c3fb27SDimitry Andric     break;
1288*06c3fb27SDimitry Andric   case 0xab:
1289*06c3fb27SDimitry Andric     if (ABCIsConst)
1290*06c3fb27SDimitry Andric       Res = Or(Nor(A, B), C);
1291*06c3fb27SDimitry Andric     break;
1292*06c3fb27SDimitry Andric   case 0xac:
1293*06c3fb27SDimitry Andric     if (ABCIsConst)
1294*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(B, C), A), C);
1295*06c3fb27SDimitry Andric     break;
1296*06c3fb27SDimitry Andric   case 0xad:
1297*06c3fb27SDimitry Andric     if (ABCIsConst)
1298*06c3fb27SDimitry Andric       Res = Or(Xnor(A, C), And(B, C));
1299*06c3fb27SDimitry Andric     break;
1300*06c3fb27SDimitry Andric   case 0xae:
1301*06c3fb27SDimitry Andric     if (ABCIsConst)
1302*06c3fb27SDimitry Andric       Res = Or(Nor(A, Not(B)), C);
1303*06c3fb27SDimitry Andric     break;
1304*06c3fb27SDimitry Andric   case 0xaf:
1305*06c3fb27SDimitry Andric     if (ACIsConst)
1306*06c3fb27SDimitry Andric       Res = Or(C, Not(A));
1307*06c3fb27SDimitry Andric     break;
1308*06c3fb27SDimitry Andric   case 0xb0:
1309*06c3fb27SDimitry Andric     if (ABCIsConst)
1310*06c3fb27SDimitry Andric       Res = And(A, Nand(B, Not(C)));
1311*06c3fb27SDimitry Andric     break;
1312*06c3fb27SDimitry Andric   case 0xb1:
1313*06c3fb27SDimitry Andric     if (ABCIsConst)
1314*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Xor(A, B), C));
1315*06c3fb27SDimitry Andric     break;
1316*06c3fb27SDimitry Andric   case 0xb2:
1317*06c3fb27SDimitry Andric     if (ABCIsConst)
1318*06c3fb27SDimitry Andric       Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);
1319*06c3fb27SDimitry Andric     break;
1320*06c3fb27SDimitry Andric   case 0xb3:
1321*06c3fb27SDimitry Andric     if (ABCIsConst)
1322*06c3fb27SDimitry Andric       Res = Nand(Nand(A, C), B);
1323*06c3fb27SDimitry Andric     break;
1324*06c3fb27SDimitry Andric   case 0xb4:
1325*06c3fb27SDimitry Andric     if (ABCIsConst)
1326*06c3fb27SDimitry Andric       Res = Xor(A, Nor(C, Not(B)));
1327*06c3fb27SDimitry Andric     break;
1328*06c3fb27SDimitry Andric   case 0xb5:
1329*06c3fb27SDimitry Andric     if (ABCIsConst)
1330*06c3fb27SDimitry Andric       Res = Or(Xnor(A, C), Nor(B, C));
1331*06c3fb27SDimitry Andric     break;
1332*06c3fb27SDimitry Andric   case 0xb6:
1333*06c3fb27SDimitry Andric     if (ABCIsConst)
1334*06c3fb27SDimitry Andric       Res = Xor(And(Xor(A, B), Nand(A, C)), C);
1335*06c3fb27SDimitry Andric     break;
1336*06c3fb27SDimitry Andric   case 0xb7:
1337*06c3fb27SDimitry Andric     if (ABCIsConst)
1338*06c3fb27SDimitry Andric       Res = Nand(Xor(A, C), B);
1339*06c3fb27SDimitry Andric     break;
1340*06c3fb27SDimitry Andric   case 0xb8:
1341*06c3fb27SDimitry Andric     if (ABCIsConst)
1342*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, C), B), C);
1343*06c3fb27SDimitry Andric     break;
1344*06c3fb27SDimitry Andric   case 0xb9:
1345*06c3fb27SDimitry Andric     if (ABCIsConst)
1346*06c3fb27SDimitry Andric       Res = Xor(Nor(And(A, C), B), C);
1347*06c3fb27SDimitry Andric     break;
1348*06c3fb27SDimitry Andric   case 0xba:
1349*06c3fb27SDimitry Andric     if (ABCIsConst)
1350*06c3fb27SDimitry Andric       Res = Or(Nor(B, Not(A)), C);
1351*06c3fb27SDimitry Andric     break;
1352*06c3fb27SDimitry Andric   case 0xbb:
1353*06c3fb27SDimitry Andric     if (BCIsConst)
1354*06c3fb27SDimitry Andric       Res = Or(C, Not(B));
1355*06c3fb27SDimitry Andric     break;
1356*06c3fb27SDimitry Andric   case 0xbc:
1357*06c3fb27SDimitry Andric     if (ABCIsConst)
1358*06c3fb27SDimitry Andric       Res = Xor(A, And(Nand(A, C), B));
1359*06c3fb27SDimitry Andric     break;
1360*06c3fb27SDimitry Andric   case 0xbd:
1361*06c3fb27SDimitry Andric     if (ABCIsConst)
1362*06c3fb27SDimitry Andric       Res = Or(Xor(A, B), Xnor(A, C));
1363*06c3fb27SDimitry Andric     break;
1364*06c3fb27SDimitry Andric   case 0xbe:
1365*06c3fb27SDimitry Andric     if (ABCIsConst)
1366*06c3fb27SDimitry Andric       Res = Or(Xor(A, B), C);
1367*06c3fb27SDimitry Andric     break;
1368*06c3fb27SDimitry Andric   case 0xbf:
1369*06c3fb27SDimitry Andric     if (ABCIsConst)
1370*06c3fb27SDimitry Andric       Res = Or(Nand(A, B), C);
1371*06c3fb27SDimitry Andric     break;
1372*06c3fb27SDimitry Andric   case 0xc0:
1373*06c3fb27SDimitry Andric     Res = And(A, B);
1374*06c3fb27SDimitry Andric     break;
1375*06c3fb27SDimitry Andric   case 0xc1:
1376*06c3fb27SDimitry Andric     if (ABCIsConst)
1377*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, Not(C)), B));
1378*06c3fb27SDimitry Andric     break;
1379*06c3fb27SDimitry Andric   case 0xc2:
1380*06c3fb27SDimitry Andric     if (ABCIsConst)
1381*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Nor(A, C), B));
1382*06c3fb27SDimitry Andric     break;
1383*06c3fb27SDimitry Andric   case 0xc3:
1384*06c3fb27SDimitry Andric     if (ABIsConst)
1385*06c3fb27SDimitry Andric       Res = Xnor(A, B);
1386*06c3fb27SDimitry Andric     break;
1387*06c3fb27SDimitry Andric   case 0xc4:
1388*06c3fb27SDimitry Andric     if (ABCIsConst)
1389*06c3fb27SDimitry Andric       Res = And(Or(A, Not(C)), B);
1390*06c3fb27SDimitry Andric     break;
1391*06c3fb27SDimitry Andric   case 0xc5:
1392*06c3fb27SDimitry Andric     if (ABCIsConst)
1393*06c3fb27SDimitry Andric       Res = Xor(B, Nor(A, Xor(B, C)));
1394*06c3fb27SDimitry Andric     break;
1395*06c3fb27SDimitry Andric   case 0xc6:
1396*06c3fb27SDimitry Andric     if (ABCIsConst)
1397*06c3fb27SDimitry Andric       Res = Xor(Nor(A, Not(C)), B);
1398*06c3fb27SDimitry Andric     break;
1399*06c3fb27SDimitry Andric   case 0xc7:
1400*06c3fb27SDimitry Andric     if (ABCIsConst)
1401*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Nor(A, C));
1402*06c3fb27SDimitry Andric     break;
1403*06c3fb27SDimitry Andric   case 0xc8:
1404*06c3fb27SDimitry Andric     if (ABCIsConst)
1405*06c3fb27SDimitry Andric       Res = And(Or(A, C), B);
1406*06c3fb27SDimitry Andric     break;
1407*06c3fb27SDimitry Andric   case 0xc9:
1408*06c3fb27SDimitry Andric     if (ABCIsConst)
1409*06c3fb27SDimitry Andric       Res = Xor(Nor(A, C), B);
1410*06c3fb27SDimitry Andric     break;
1411*06c3fb27SDimitry Andric   case 0xca:
1412*06c3fb27SDimitry Andric     if (ABCIsConst)
1413*06c3fb27SDimitry Andric       Res = Xor(B, Nor(A, Xnor(B, C)));
1414*06c3fb27SDimitry Andric     break;
1415*06c3fb27SDimitry Andric   case 0xcb:
1416*06c3fb27SDimitry Andric     if (ABCIsConst)
1417*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), And(B, C));
1418*06c3fb27SDimitry Andric     break;
1419*06c3fb27SDimitry Andric   case 0xcc:
1420*06c3fb27SDimitry Andric     Res = B;
1421*06c3fb27SDimitry Andric     break;
1422*06c3fb27SDimitry Andric   case 0xcd:
1423*06c3fb27SDimitry Andric     if (ABCIsConst)
1424*06c3fb27SDimitry Andric       Res = Or(Nor(A, C), B);
1425*06c3fb27SDimitry Andric     break;
1426*06c3fb27SDimitry Andric   case 0xce:
1427*06c3fb27SDimitry Andric     if (ABCIsConst)
1428*06c3fb27SDimitry Andric       Res = Or(Nor(A, Not(C)), B);
1429*06c3fb27SDimitry Andric     break;
1430*06c3fb27SDimitry Andric   case 0xcf:
1431*06c3fb27SDimitry Andric     if (ABIsConst)
1432*06c3fb27SDimitry Andric       Res = Or(B, Not(A));
1433*06c3fb27SDimitry Andric     break;
1434*06c3fb27SDimitry Andric   case 0xd0:
1435*06c3fb27SDimitry Andric     if (ABCIsConst)
1436*06c3fb27SDimitry Andric       Res = And(A, Or(B, Not(C)));
1437*06c3fb27SDimitry Andric     break;
1438*06c3fb27SDimitry Andric   case 0xd1:
1439*06c3fb27SDimitry Andric     if (ABCIsConst)
1440*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Xor(A, C), B));
1441*06c3fb27SDimitry Andric     break;
1442*06c3fb27SDimitry Andric   case 0xd2:
1443*06c3fb27SDimitry Andric     if (ABCIsConst)
1444*06c3fb27SDimitry Andric       Res = Xor(A, Nor(B, Not(C)));
1445*06c3fb27SDimitry Andric     break;
1446*06c3fb27SDimitry Andric   case 0xd3:
1447*06c3fb27SDimitry Andric     if (ABCIsConst)
1448*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Nor(B, C));
1449*06c3fb27SDimitry Andric     break;
1450*06c3fb27SDimitry Andric   case 0xd4:
1451*06c3fb27SDimitry Andric     if (ABCIsConst)
1452*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);
1453*06c3fb27SDimitry Andric     break;
1454*06c3fb27SDimitry Andric   case 0xd5:
1455*06c3fb27SDimitry Andric     if (ABCIsConst)
1456*06c3fb27SDimitry Andric       Res = Nand(Nand(A, B), C);
1457*06c3fb27SDimitry Andric     break;
1458*06c3fb27SDimitry Andric   case 0xd6:
1459*06c3fb27SDimitry Andric     if (ABCIsConst)
1460*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Or(And(A, B), C));
1461*06c3fb27SDimitry Andric     break;
1462*06c3fb27SDimitry Andric   case 0xd7:
1463*06c3fb27SDimitry Andric     if (ABCIsConst)
1464*06c3fb27SDimitry Andric       Res = Nand(Xor(A, B), C);
1465*06c3fb27SDimitry Andric     break;
1466*06c3fb27SDimitry Andric   case 0xd8:
1467*06c3fb27SDimitry Andric     if (ABCIsConst)
1468*06c3fb27SDimitry Andric       Res = Xor(Nor(Xnor(A, B), C), B);
1469*06c3fb27SDimitry Andric     break;
1470*06c3fb27SDimitry Andric   case 0xd9:
1471*06c3fb27SDimitry Andric     if (ABCIsConst)
1472*06c3fb27SDimitry Andric       Res = Or(And(A, B), Xnor(B, C));
1473*06c3fb27SDimitry Andric     break;
1474*06c3fb27SDimitry Andric   case 0xda:
1475*06c3fb27SDimitry Andric     if (ABCIsConst)
1476*06c3fb27SDimitry Andric       Res = Xor(A, And(Nand(A, B), C));
1477*06c3fb27SDimitry Andric     break;
1478*06c3fb27SDimitry Andric   case 0xdb:
1479*06c3fb27SDimitry Andric     if (ABCIsConst)
1480*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Xor(A, C));
1481*06c3fb27SDimitry Andric     break;
1482*06c3fb27SDimitry Andric   case 0xdc:
1483*06c3fb27SDimitry Andric     if (ABCIsConst)
1484*06c3fb27SDimitry Andric       Res = Or(B, Nor(C, Not(A)));
1485*06c3fb27SDimitry Andric     break;
1486*06c3fb27SDimitry Andric   case 0xdd:
1487*06c3fb27SDimitry Andric     if (BCIsConst)
1488*06c3fb27SDimitry Andric       Res = Or(B, Not(C));
1489*06c3fb27SDimitry Andric     break;
1490*06c3fb27SDimitry Andric   case 0xde:
1491*06c3fb27SDimitry Andric     if (ABCIsConst)
1492*06c3fb27SDimitry Andric       Res = Or(Xor(A, C), B);
1493*06c3fb27SDimitry Andric     break;
1494*06c3fb27SDimitry Andric   case 0xdf:
1495*06c3fb27SDimitry Andric     if (ABCIsConst)
1496*06c3fb27SDimitry Andric       Res = Or(Nand(A, C), B);
1497*06c3fb27SDimitry Andric     break;
1498*06c3fb27SDimitry Andric   case 0xe0:
1499*06c3fb27SDimitry Andric     if (ABCIsConst)
1500*06c3fb27SDimitry Andric       Res = And(A, Or(B, C));
1501*06c3fb27SDimitry Andric     break;
1502*06c3fb27SDimitry Andric   case 0xe1:
1503*06c3fb27SDimitry Andric     if (ABCIsConst)
1504*06c3fb27SDimitry Andric       Res = Xor(A, Nor(B, C));
1505*06c3fb27SDimitry Andric     break;
1506*06c3fb27SDimitry Andric   case 0xe2:
1507*06c3fb27SDimitry Andric     if (ABCIsConst)
1508*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Xnor(A, C), B));
1509*06c3fb27SDimitry Andric     break;
1510*06c3fb27SDimitry Andric   case 0xe3:
1511*06c3fb27SDimitry Andric     if (ABCIsConst)
1512*06c3fb27SDimitry Andric       Res = Xor(A, Nor(And(A, C), B));
1513*06c3fb27SDimitry Andric     break;
1514*06c3fb27SDimitry Andric   case 0xe4:
1515*06c3fb27SDimitry Andric     if (ABCIsConst)
1516*06c3fb27SDimitry Andric       Res = Xor(A, Nor(Xnor(A, B), C));
1517*06c3fb27SDimitry Andric     break;
1518*06c3fb27SDimitry Andric   case 0xe5:
1519*06c3fb27SDimitry Andric     if (ABCIsConst)
1520*06c3fb27SDimitry Andric       Res = Xor(A, Nor(And(A, B), C));
1521*06c3fb27SDimitry Andric     break;
1522*06c3fb27SDimitry Andric   case 0xe6:
1523*06c3fb27SDimitry Andric     if (ABCIsConst)
1524*06c3fb27SDimitry Andric       Res = Or(And(A, B), Xor(B, C));
1525*06c3fb27SDimitry Andric     break;
1526*06c3fb27SDimitry Andric   case 0xe7:
1527*06c3fb27SDimitry Andric     if (ABCIsConst)
1528*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), Xnor(A, C));
1529*06c3fb27SDimitry Andric     break;
1530*06c3fb27SDimitry Andric   case 0xe8:
1531*06c3fb27SDimitry Andric     if (ABCIsConst)
1532*06c3fb27SDimitry Andric       Res = Xor(Or(A, B), Nor(Xnor(A, B), C));
1533*06c3fb27SDimitry Andric     break;
1534*06c3fb27SDimitry Andric   case 0xe9:
1535*06c3fb27SDimitry Andric     if (ABCIsConst)
1536*06c3fb27SDimitry Andric       Res = Xor(Xor(A, B), Nand(Nand(A, B), C));
1537*06c3fb27SDimitry Andric     break;
1538*06c3fb27SDimitry Andric   case 0xea:
1539*06c3fb27SDimitry Andric     if (ABCIsConst)
1540*06c3fb27SDimitry Andric       Res = Or(And(A, B), C);
1541*06c3fb27SDimitry Andric     break;
1542*06c3fb27SDimitry Andric   case 0xeb:
1543*06c3fb27SDimitry Andric     if (ABCIsConst)
1544*06c3fb27SDimitry Andric       Res = Or(Xnor(A, B), C);
1545*06c3fb27SDimitry Andric     break;
1546*06c3fb27SDimitry Andric   case 0xec:
1547*06c3fb27SDimitry Andric     if (ABCIsConst)
1548*06c3fb27SDimitry Andric       Res = Or(And(A, C), B);
1549*06c3fb27SDimitry Andric     break;
1550*06c3fb27SDimitry Andric   case 0xed:
1551*06c3fb27SDimitry Andric     if (ABCIsConst)
1552*06c3fb27SDimitry Andric       Res = Or(Xnor(A, C), B);
1553*06c3fb27SDimitry Andric     break;
1554*06c3fb27SDimitry Andric   case 0xee:
1555*06c3fb27SDimitry Andric     Res = Or(B, C);
1556*06c3fb27SDimitry Andric     break;
1557*06c3fb27SDimitry Andric   case 0xef:
1558*06c3fb27SDimitry Andric     if (ABCIsConst)
1559*06c3fb27SDimitry Andric       Res = Nand(A, Nor(B, C));
1560*06c3fb27SDimitry Andric     break;
1561*06c3fb27SDimitry Andric   case 0xf0:
1562*06c3fb27SDimitry Andric     Res = A;
1563*06c3fb27SDimitry Andric     break;
1564*06c3fb27SDimitry Andric   case 0xf1:
1565*06c3fb27SDimitry Andric     if (ABCIsConst)
1566*06c3fb27SDimitry Andric       Res = Or(A, Nor(B, C));
1567*06c3fb27SDimitry Andric     break;
1568*06c3fb27SDimitry Andric   case 0xf2:
1569*06c3fb27SDimitry Andric     if (ABCIsConst)
1570*06c3fb27SDimitry Andric       Res = Or(A, Nor(B, Not(C)));
1571*06c3fb27SDimitry Andric     break;
1572*06c3fb27SDimitry Andric   case 0xf3:
1573*06c3fb27SDimitry Andric     if (ABIsConst)
1574*06c3fb27SDimitry Andric       Res = Or(A, Not(B));
1575*06c3fb27SDimitry Andric     break;
1576*06c3fb27SDimitry Andric   case 0xf4:
1577*06c3fb27SDimitry Andric     if (ABCIsConst)
1578*06c3fb27SDimitry Andric       Res = Or(A, Nor(C, Not(B)));
1579*06c3fb27SDimitry Andric     break;
1580*06c3fb27SDimitry Andric   case 0xf5:
1581*06c3fb27SDimitry Andric     if (ACIsConst)
1582*06c3fb27SDimitry Andric       Res = Or(A, Not(C));
1583*06c3fb27SDimitry Andric     break;
1584*06c3fb27SDimitry Andric   case 0xf6:
1585*06c3fb27SDimitry Andric     if (ABCIsConst)
1586*06c3fb27SDimitry Andric       Res = Or(A, Xor(B, C));
1587*06c3fb27SDimitry Andric     break;
1588*06c3fb27SDimitry Andric   case 0xf7:
1589*06c3fb27SDimitry Andric     if (ABCIsConst)
1590*06c3fb27SDimitry Andric       Res = Or(A, Nand(B, C));
1591*06c3fb27SDimitry Andric     break;
1592*06c3fb27SDimitry Andric   case 0xf8:
1593*06c3fb27SDimitry Andric     if (ABCIsConst)
1594*06c3fb27SDimitry Andric       Res = Or(A, And(B, C));
1595*06c3fb27SDimitry Andric     break;
1596*06c3fb27SDimitry Andric   case 0xf9:
1597*06c3fb27SDimitry Andric     if (ABCIsConst)
1598*06c3fb27SDimitry Andric       Res = Or(A, Xnor(B, C));
1599*06c3fb27SDimitry Andric     break;
1600*06c3fb27SDimitry Andric   case 0xfa:
1601*06c3fb27SDimitry Andric     Res = Or(A, C);
1602*06c3fb27SDimitry Andric     break;
1603*06c3fb27SDimitry Andric   case 0xfb:
1604*06c3fb27SDimitry Andric     if (ABCIsConst)
1605*06c3fb27SDimitry Andric       Res = Nand(Nor(A, C), B);
1606*06c3fb27SDimitry Andric     break;
1607*06c3fb27SDimitry Andric   case 0xfc:
1608*06c3fb27SDimitry Andric     Res = Or(A, B);
1609*06c3fb27SDimitry Andric     break;
1610*06c3fb27SDimitry Andric   case 0xfd:
1611*06c3fb27SDimitry Andric     if (ABCIsConst)
1612*06c3fb27SDimitry Andric       Res = Nand(Nor(A, B), C);
1613*06c3fb27SDimitry Andric     break;
1614*06c3fb27SDimitry Andric   case 0xfe:
1615*06c3fb27SDimitry Andric     if (ABCIsConst)
1616*06c3fb27SDimitry Andric       Res = Or(Or(A, B), C);
1617*06c3fb27SDimitry Andric     break;
1618*06c3fb27SDimitry Andric   case 0xff:
1619*06c3fb27SDimitry Andric     Res = {Constant::getAllOnesValue(Ty), 0xff};
1620*06c3fb27SDimitry Andric     break;
1621*06c3fb27SDimitry Andric   }
1622*06c3fb27SDimitry Andric 
1623*06c3fb27SDimitry Andric   assert((Res.first == nullptr || Res.second == Imm) &&
1624*06c3fb27SDimitry Andric          "Simplification of ternary logic does not verify!");
1625*06c3fb27SDimitry Andric   return Res.first;
1626*06c3fb27SDimitry Andric }
1627*06c3fb27SDimitry Andric 
1628e8d8bef9SDimitry Andric static Value *simplifyX86insertps(const IntrinsicInst &II,
1629e8d8bef9SDimitry Andric                                   InstCombiner::BuilderTy &Builder) {
1630e8d8bef9SDimitry Andric   auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
1631e8d8bef9SDimitry Andric   if (!CInt)
1632e8d8bef9SDimitry Andric     return nullptr;
1633e8d8bef9SDimitry Andric 
1634e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1635e8d8bef9SDimitry Andric   assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
1636e8d8bef9SDimitry Andric 
1637e8d8bef9SDimitry Andric   // The immediate permute control byte looks like this:
1638e8d8bef9SDimitry Andric   //    [3:0] - zero mask for each 32-bit lane
1639e8d8bef9SDimitry Andric   //    [5:4] - select one 32-bit destination lane
1640e8d8bef9SDimitry Andric   //    [7:6] - select one 32-bit source lane
1641e8d8bef9SDimitry Andric 
1642e8d8bef9SDimitry Andric   uint8_t Imm = CInt->getZExtValue();
1643e8d8bef9SDimitry Andric   uint8_t ZMask = Imm & 0xf;
1644e8d8bef9SDimitry Andric   uint8_t DestLane = (Imm >> 4) & 0x3;
1645e8d8bef9SDimitry Andric   uint8_t SourceLane = (Imm >> 6) & 0x3;
1646e8d8bef9SDimitry Andric 
1647e8d8bef9SDimitry Andric   ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
1648e8d8bef9SDimitry Andric 
1649e8d8bef9SDimitry Andric   // If all zero mask bits are set, this was just a weird way to
1650e8d8bef9SDimitry Andric   // generate a zero vector.
1651e8d8bef9SDimitry Andric   if (ZMask == 0xf)
1652e8d8bef9SDimitry Andric     return ZeroVector;
1653e8d8bef9SDimitry Andric 
1654e8d8bef9SDimitry Andric   // Initialize by passing all of the first source bits through.
1655e8d8bef9SDimitry Andric   int ShuffleMask[4] = {0, 1, 2, 3};
1656e8d8bef9SDimitry Andric 
1657e8d8bef9SDimitry Andric   // We may replace the second operand with the zero vector.
1658e8d8bef9SDimitry Andric   Value *V1 = II.getArgOperand(1);
1659e8d8bef9SDimitry Andric 
1660e8d8bef9SDimitry Andric   if (ZMask) {
1661e8d8bef9SDimitry Andric     // If the zero mask is being used with a single input or the zero mask
1662e8d8bef9SDimitry Andric     // overrides the destination lane, this is a shuffle with the zero vector.
1663e8d8bef9SDimitry Andric     if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
1664e8d8bef9SDimitry Andric         (ZMask & (1 << DestLane))) {
1665e8d8bef9SDimitry Andric       V1 = ZeroVector;
1666e8d8bef9SDimitry Andric       // We may still move 32-bits of the first source vector from one lane
1667e8d8bef9SDimitry Andric       // to another.
1668e8d8bef9SDimitry Andric       ShuffleMask[DestLane] = SourceLane;
1669e8d8bef9SDimitry Andric       // The zero mask may override the previous insert operation.
1670e8d8bef9SDimitry Andric       for (unsigned i = 0; i < 4; ++i)
1671e8d8bef9SDimitry Andric         if ((ZMask >> i) & 0x1)
1672e8d8bef9SDimitry Andric           ShuffleMask[i] = i + 4;
1673e8d8bef9SDimitry Andric     } else {
1674e8d8bef9SDimitry Andric       // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
1675e8d8bef9SDimitry Andric       return nullptr;
1676e8d8bef9SDimitry Andric     }
1677e8d8bef9SDimitry Andric   } else {
1678e8d8bef9SDimitry Andric     // Replace the selected destination lane with the selected source lane.
1679e8d8bef9SDimitry Andric     ShuffleMask[DestLane] = SourceLane + 4;
1680e8d8bef9SDimitry Andric   }
1681e8d8bef9SDimitry Andric 
1682e8d8bef9SDimitry Andric   return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
1683e8d8bef9SDimitry Andric }
1684e8d8bef9SDimitry Andric 
1685e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
1686e8d8bef9SDimitry Andric /// or conversion to a shuffle vector.
1687e8d8bef9SDimitry Andric static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
1688e8d8bef9SDimitry Andric                                ConstantInt *CILength, ConstantInt *CIIndex,
1689e8d8bef9SDimitry Andric                                InstCombiner::BuilderTy &Builder) {
1690e8d8bef9SDimitry Andric   auto LowConstantHighUndef = [&](uint64_t Val) {
1691e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
1692e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1693e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
1694e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
1695e8d8bef9SDimitry Andric   };
1696e8d8bef9SDimitry Andric 
1697e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
1698fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
1699fe6060f1SDimitry Andric   auto *CI0 =
1700e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1701e8d8bef9SDimitry Andric          : nullptr;
1702e8d8bef9SDimitry Andric 
1703e8d8bef9SDimitry Andric   // Attempt to constant fold.
1704e8d8bef9SDimitry Andric   if (CILength && CIIndex) {
1705e8d8bef9SDimitry Andric     // From AMD documentation: "The bit index and field length are each six
1706e8d8bef9SDimitry Andric     // bits in length other bits of the field are ignored."
1707e8d8bef9SDimitry Andric     APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
1708e8d8bef9SDimitry Andric     APInt APLength = CILength->getValue().zextOrTrunc(6);
1709e8d8bef9SDimitry Andric 
1710e8d8bef9SDimitry Andric     unsigned Index = APIndex.getZExtValue();
1711e8d8bef9SDimitry Andric 
1712e8d8bef9SDimitry Andric     // From AMD documentation: "a value of zero in the field length is
1713e8d8bef9SDimitry Andric     // defined as length of 64".
1714e8d8bef9SDimitry Andric     unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1715e8d8bef9SDimitry Andric 
1716e8d8bef9SDimitry Andric     // From AMD documentation: "If the sum of the bit index + length field
1717e8d8bef9SDimitry Andric     // is greater than 64, the results are undefined".
1718e8d8bef9SDimitry Andric     unsigned End = Index + Length;
1719e8d8bef9SDimitry Andric 
1720e8d8bef9SDimitry Andric     // Note that both field index and field length are 8-bit quantities.
1721e8d8bef9SDimitry Andric     // Since variables 'Index' and 'Length' are unsigned values
1722e8d8bef9SDimitry Andric     // obtained from zero-extending field index and field length
1723e8d8bef9SDimitry Andric     // respectively, their sum should never wrap around.
1724e8d8bef9SDimitry Andric     if (End > 64)
1725e8d8bef9SDimitry Andric       return UndefValue::get(II.getType());
1726e8d8bef9SDimitry Andric 
1727e8d8bef9SDimitry Andric     // If we are inserting whole bytes, we can convert this to a shuffle.
1728e8d8bef9SDimitry Andric     // Lowering can recognize EXTRQI shuffle masks.
1729e8d8bef9SDimitry Andric     if ((Length % 8) == 0 && (Index % 8) == 0) {
1730e8d8bef9SDimitry Andric       // Convert bit indices to byte indices.
1731e8d8bef9SDimitry Andric       Length /= 8;
1732e8d8bef9SDimitry Andric       Index /= 8;
1733e8d8bef9SDimitry Andric 
1734e8d8bef9SDimitry Andric       Type *IntTy8 = Type::getInt8Ty(II.getContext());
1735e8d8bef9SDimitry Andric       auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1736e8d8bef9SDimitry Andric 
1737e8d8bef9SDimitry Andric       SmallVector<int, 16> ShuffleMask;
1738e8d8bef9SDimitry Andric       for (int i = 0; i != (int)Length; ++i)
1739e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + Index);
1740e8d8bef9SDimitry Andric       for (int i = Length; i != 8; ++i)
1741e8d8bef9SDimitry Andric         ShuffleMask.push_back(i + 16);
1742e8d8bef9SDimitry Andric       for (int i = 8; i != 16; ++i)
1743e8d8bef9SDimitry Andric         ShuffleMask.push_back(-1);
1744e8d8bef9SDimitry Andric 
1745e8d8bef9SDimitry Andric       Value *SV = Builder.CreateShuffleVector(
1746e8d8bef9SDimitry Andric           Builder.CreateBitCast(Op0, ShufTy),
1747e8d8bef9SDimitry Andric           ConstantAggregateZero::get(ShufTy), ShuffleMask);
1748e8d8bef9SDimitry Andric       return Builder.CreateBitCast(SV, II.getType());
1749e8d8bef9SDimitry Andric     }
1750e8d8bef9SDimitry Andric 
1751e8d8bef9SDimitry Andric     // Constant Fold - shift Index'th bit to lowest position and mask off
1752e8d8bef9SDimitry Andric     // Length bits.
1753e8d8bef9SDimitry Andric     if (CI0) {
1754e8d8bef9SDimitry Andric       APInt Elt = CI0->getValue();
1755e8d8bef9SDimitry Andric       Elt.lshrInPlace(Index);
1756e8d8bef9SDimitry Andric       Elt = Elt.zextOrTrunc(Length);
1757e8d8bef9SDimitry Andric       return LowConstantHighUndef(Elt.getZExtValue());
1758e8d8bef9SDimitry Andric     }
1759e8d8bef9SDimitry Andric 
1760e8d8bef9SDimitry Andric     // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
1761e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1762e8d8bef9SDimitry Andric       Value *Args[] = {Op0, CILength, CIIndex};
1763e8d8bef9SDimitry Andric       Module *M = II.getModule();
1764e8d8bef9SDimitry Andric       Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
1765e8d8bef9SDimitry Andric       return Builder.CreateCall(F, Args);
1766e8d8bef9SDimitry Andric     }
1767e8d8bef9SDimitry Andric   }
1768e8d8bef9SDimitry Andric 
1769e8d8bef9SDimitry Andric   // Constant Fold - extraction from zero is always {zero, undef}.
1770e8d8bef9SDimitry Andric   if (CI0 && CI0->isZero())
1771e8d8bef9SDimitry Andric     return LowConstantHighUndef(0);
1772e8d8bef9SDimitry Andric 
1773e8d8bef9SDimitry Andric   return nullptr;
1774e8d8bef9SDimitry Andric }
1775e8d8bef9SDimitry Andric 
1776e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
1777e8d8bef9SDimitry Andric /// folding or conversion to a shuffle vector.
1778e8d8bef9SDimitry Andric static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
1779e8d8bef9SDimitry Andric                                  APInt APLength, APInt APIndex,
1780e8d8bef9SDimitry Andric                                  InstCombiner::BuilderTy &Builder) {
1781e8d8bef9SDimitry Andric   // From AMD documentation: "The bit index and field length are each six bits
1782e8d8bef9SDimitry Andric   // in length other bits of the field are ignored."
1783e8d8bef9SDimitry Andric   APIndex = APIndex.zextOrTrunc(6);
1784e8d8bef9SDimitry Andric   APLength = APLength.zextOrTrunc(6);
1785e8d8bef9SDimitry Andric 
1786e8d8bef9SDimitry Andric   // Attempt to constant fold.
1787e8d8bef9SDimitry Andric   unsigned Index = APIndex.getZExtValue();
1788e8d8bef9SDimitry Andric 
1789e8d8bef9SDimitry Andric   // From AMD documentation: "a value of zero in the field length is
1790e8d8bef9SDimitry Andric   // defined as length of 64".
1791e8d8bef9SDimitry Andric   unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1792e8d8bef9SDimitry Andric 
1793e8d8bef9SDimitry Andric   // From AMD documentation: "If the sum of the bit index + length field
1794e8d8bef9SDimitry Andric   // is greater than 64, the results are undefined".
1795e8d8bef9SDimitry Andric   unsigned End = Index + Length;
1796e8d8bef9SDimitry Andric 
1797e8d8bef9SDimitry Andric   // Note that both field index and field length are 8-bit quantities.
1798e8d8bef9SDimitry Andric   // Since variables 'Index' and 'Length' are unsigned values
1799e8d8bef9SDimitry Andric   // obtained from zero-extending field index and field length
1800e8d8bef9SDimitry Andric   // respectively, their sum should never wrap around.
1801e8d8bef9SDimitry Andric   if (End > 64)
1802e8d8bef9SDimitry Andric     return UndefValue::get(II.getType());
1803e8d8bef9SDimitry Andric 
1804e8d8bef9SDimitry Andric   // If we are inserting whole bytes, we can convert this to a shuffle.
1805e8d8bef9SDimitry Andric   // Lowering can recognize INSERTQI shuffle masks.
1806e8d8bef9SDimitry Andric   if ((Length % 8) == 0 && (Index % 8) == 0) {
1807e8d8bef9SDimitry Andric     // Convert bit indices to byte indices.
1808e8d8bef9SDimitry Andric     Length /= 8;
1809e8d8bef9SDimitry Andric     Index /= 8;
1810e8d8bef9SDimitry Andric 
1811e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
1812e8d8bef9SDimitry Andric     auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1813e8d8bef9SDimitry Andric 
1814e8d8bef9SDimitry Andric     SmallVector<int, 16> ShuffleMask;
1815e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Index; ++i)
1816e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
1817e8d8bef9SDimitry Andric     for (int i = 0; i != (int)Length; ++i)
1818e8d8bef9SDimitry Andric       ShuffleMask.push_back(i + 16);
1819e8d8bef9SDimitry Andric     for (int i = Index + Length; i != 8; ++i)
1820e8d8bef9SDimitry Andric       ShuffleMask.push_back(i);
1821e8d8bef9SDimitry Andric     for (int i = 8; i != 16; ++i)
1822e8d8bef9SDimitry Andric       ShuffleMask.push_back(-1);
1823e8d8bef9SDimitry Andric 
1824e8d8bef9SDimitry Andric     Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
1825e8d8bef9SDimitry Andric                                             Builder.CreateBitCast(Op1, ShufTy),
1826e8d8bef9SDimitry Andric                                             ShuffleMask);
1827e8d8bef9SDimitry Andric     return Builder.CreateBitCast(SV, II.getType());
1828e8d8bef9SDimitry Andric   }
1829e8d8bef9SDimitry Andric 
1830e8d8bef9SDimitry Andric   // See if we're dealing with constant values.
1831fe6060f1SDimitry Andric   auto *C0 = dyn_cast<Constant>(Op0);
1832fe6060f1SDimitry Andric   auto *C1 = dyn_cast<Constant>(Op1);
1833fe6060f1SDimitry Andric   auto *CI00 =
1834e8d8bef9SDimitry Andric       C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1835e8d8bef9SDimitry Andric          : nullptr;
1836fe6060f1SDimitry Andric   auto *CI10 =
1837e8d8bef9SDimitry Andric       C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
1838e8d8bef9SDimitry Andric          : nullptr;
1839e8d8bef9SDimitry Andric 
1840e8d8bef9SDimitry Andric   // Constant Fold - insert bottom Length bits starting at the Index'th bit.
1841e8d8bef9SDimitry Andric   if (CI00 && CI10) {
1842e8d8bef9SDimitry Andric     APInt V00 = CI00->getValue();
1843e8d8bef9SDimitry Andric     APInt V10 = CI10->getValue();
1844e8d8bef9SDimitry Andric     APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
1845e8d8bef9SDimitry Andric     V00 = V00 & ~Mask;
1846e8d8bef9SDimitry Andric     V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
1847e8d8bef9SDimitry Andric     APInt Val = V00 | V10;
1848e8d8bef9SDimitry Andric     Type *IntTy64 = Type::getInt64Ty(II.getContext());
1849e8d8bef9SDimitry Andric     Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
1850e8d8bef9SDimitry Andric                         UndefValue::get(IntTy64)};
1851e8d8bef9SDimitry Andric     return ConstantVector::get(Args);
1852e8d8bef9SDimitry Andric   }
1853e8d8bef9SDimitry Andric 
1854e8d8bef9SDimitry Andric   // If we were an INSERTQ call, we'll save demanded elements if we convert to
1855e8d8bef9SDimitry Andric   // INSERTQI.
1856e8d8bef9SDimitry Andric   if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1857e8d8bef9SDimitry Andric     Type *IntTy8 = Type::getInt8Ty(II.getContext());
1858e8d8bef9SDimitry Andric     Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1859e8d8bef9SDimitry Andric     Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1860e8d8bef9SDimitry Andric 
1861e8d8bef9SDimitry Andric     Value *Args[] = {Op0, Op1, CILength, CIIndex};
1862e8d8bef9SDimitry Andric     Module *M = II.getModule();
1863e8d8bef9SDimitry Andric     Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
1864e8d8bef9SDimitry Andric     return Builder.CreateCall(F, Args);
1865e8d8bef9SDimitry Andric   }
1866e8d8bef9SDimitry Andric 
1867e8d8bef9SDimitry Andric   return nullptr;
1868e8d8bef9SDimitry Andric }
1869e8d8bef9SDimitry Andric 
1870e8d8bef9SDimitry Andric /// Attempt to convert pshufb* to shufflevector if the mask is constant.
1871e8d8bef9SDimitry Andric static Value *simplifyX86pshufb(const IntrinsicInst &II,
1872e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
1873fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1874e8d8bef9SDimitry Andric   if (!V)
1875e8d8bef9SDimitry Andric     return nullptr;
1876e8d8bef9SDimitry Andric 
1877e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1878e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
1879e8d8bef9SDimitry Andric   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1880e8d8bef9SDimitry Andric          "Unexpected number of elements in shuffle mask!");
1881e8d8bef9SDimitry Andric 
1882e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
1883e8d8bef9SDimitry Andric   int Indexes[64];
1884e8d8bef9SDimitry Andric 
1885e8d8bef9SDimitry Andric   // Each byte in the shuffle control mask forms an index to permute the
1886e8d8bef9SDimitry Andric   // corresponding byte in the destination operand.
1887e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
1888e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
1889e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1890e8d8bef9SDimitry Andric       return nullptr;
1891e8d8bef9SDimitry Andric 
1892e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
1893e8d8bef9SDimitry Andric       Indexes[I] = -1;
1894e8d8bef9SDimitry Andric       continue;
1895e8d8bef9SDimitry Andric     }
1896e8d8bef9SDimitry Andric 
1897e8d8bef9SDimitry Andric     int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1898e8d8bef9SDimitry Andric 
1899e8d8bef9SDimitry Andric     // If the most significant bit (bit[7]) of each byte of the shuffle
1900e8d8bef9SDimitry Andric     // control mask is set, then zero is written in the result byte.
1901e8d8bef9SDimitry Andric     // The zero vector is in the right-hand side of the resulting
1902e8d8bef9SDimitry Andric     // shufflevector.
1903e8d8bef9SDimitry Andric 
1904e8d8bef9SDimitry Andric     // The value of each index for the high 128-bit lane is the least
1905e8d8bef9SDimitry Andric     // significant 4 bits of the respective shuffle control byte.
1906e8d8bef9SDimitry Andric     Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1907e8d8bef9SDimitry Andric     Indexes[I] = Index;
1908e8d8bef9SDimitry Andric   }
1909e8d8bef9SDimitry Andric 
1910e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
1911e8d8bef9SDimitry Andric   auto V2 = Constant::getNullValue(VecTy);
1912bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
1913e8d8bef9SDimitry Andric }
1914e8d8bef9SDimitry Andric 
1915e8d8bef9SDimitry Andric /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1916e8d8bef9SDimitry Andric static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
1917e8d8bef9SDimitry Andric                                     InstCombiner::BuilderTy &Builder) {
1918fe6060f1SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1919e8d8bef9SDimitry Andric   if (!V)
1920e8d8bef9SDimitry Andric     return nullptr;
1921e8d8bef9SDimitry Andric 
1922e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1923e8d8bef9SDimitry Andric   unsigned NumElts = VecTy->getNumElements();
1924e8d8bef9SDimitry Andric   bool IsPD = VecTy->getScalarType()->isDoubleTy();
1925e8d8bef9SDimitry Andric   unsigned NumLaneElts = IsPD ? 2 : 4;
1926e8d8bef9SDimitry Andric   assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1927e8d8bef9SDimitry Andric 
1928e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
1929e8d8bef9SDimitry Andric   int Indexes[16];
1930e8d8bef9SDimitry Andric 
1931e8d8bef9SDimitry Andric   // The intrinsics only read one or two bits, clear the rest.
1932e8d8bef9SDimitry Andric   for (unsigned I = 0; I < NumElts; ++I) {
1933e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
1934e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1935e8d8bef9SDimitry Andric       return nullptr;
1936e8d8bef9SDimitry Andric 
1937e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
1938e8d8bef9SDimitry Andric       Indexes[I] = -1;
1939e8d8bef9SDimitry Andric       continue;
1940e8d8bef9SDimitry Andric     }
1941e8d8bef9SDimitry Andric 
1942e8d8bef9SDimitry Andric     APInt Index = cast<ConstantInt>(COp)->getValue();
1943e8d8bef9SDimitry Andric     Index = Index.zextOrTrunc(32).getLoBits(2);
1944e8d8bef9SDimitry Andric 
1945e8d8bef9SDimitry Andric     // The PD variants uses bit 1 to select per-lane element index, so
1946e8d8bef9SDimitry Andric     // shift down to convert to generic shuffle mask index.
1947e8d8bef9SDimitry Andric     if (IsPD)
1948e8d8bef9SDimitry Andric       Index.lshrInPlace(1);
1949e8d8bef9SDimitry Andric 
1950e8d8bef9SDimitry Andric     // The _256 variants are a bit trickier since the mask bits always index
1951e8d8bef9SDimitry Andric     // into the corresponding 128 half. In order to convert to a generic
1952e8d8bef9SDimitry Andric     // shuffle, we have to make that explicit.
1953e8d8bef9SDimitry Andric     Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1954e8d8bef9SDimitry Andric 
1955e8d8bef9SDimitry Andric     Indexes[I] = Index.getZExtValue();
1956e8d8bef9SDimitry Andric   }
1957e8d8bef9SDimitry Andric 
1958e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
1959bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts));
1960e8d8bef9SDimitry Andric }
1961e8d8bef9SDimitry Andric 
1962e8d8bef9SDimitry Andric /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1963e8d8bef9SDimitry Andric static Value *simplifyX86vpermv(const IntrinsicInst &II,
1964e8d8bef9SDimitry Andric                                 InstCombiner::BuilderTy &Builder) {
1965e8d8bef9SDimitry Andric   auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1966e8d8bef9SDimitry Andric   if (!V)
1967e8d8bef9SDimitry Andric     return nullptr;
1968e8d8bef9SDimitry Andric 
1969e8d8bef9SDimitry Andric   auto *VecTy = cast<FixedVectorType>(II.getType());
1970e8d8bef9SDimitry Andric   unsigned Size = VecTy->getNumElements();
1971e8d8bef9SDimitry Andric   assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1972e8d8bef9SDimitry Andric          "Unexpected shuffle mask size");
1973e8d8bef9SDimitry Andric 
1974e8d8bef9SDimitry Andric   // Construct a shuffle mask from constant integers or UNDEFs.
1975e8d8bef9SDimitry Andric   int Indexes[64];
1976e8d8bef9SDimitry Andric 
1977e8d8bef9SDimitry Andric   for (unsigned I = 0; I < Size; ++I) {
1978e8d8bef9SDimitry Andric     Constant *COp = V->getAggregateElement(I);
1979e8d8bef9SDimitry Andric     if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1980e8d8bef9SDimitry Andric       return nullptr;
1981e8d8bef9SDimitry Andric 
1982e8d8bef9SDimitry Andric     if (isa<UndefValue>(COp)) {
1983e8d8bef9SDimitry Andric       Indexes[I] = -1;
1984e8d8bef9SDimitry Andric       continue;
1985e8d8bef9SDimitry Andric     }
1986e8d8bef9SDimitry Andric 
1987e8d8bef9SDimitry Andric     uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1988e8d8bef9SDimitry Andric     Index &= Size - 1;
1989e8d8bef9SDimitry Andric     Indexes[I] = Index;
1990e8d8bef9SDimitry Andric   }
1991e8d8bef9SDimitry Andric 
1992e8d8bef9SDimitry Andric   auto V1 = II.getArgOperand(0);
1993bdd1243dSDimitry Andric   return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size));
1994e8d8bef9SDimitry Andric }
1995e8d8bef9SDimitry Andric 
1996bdd1243dSDimitry Andric std::optional<Instruction *>
1997e8d8bef9SDimitry Andric X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
1998e8d8bef9SDimitry Andric   auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
1999e8d8bef9SDimitry Andric                                              unsigned DemandedWidth) {
2000e8d8bef9SDimitry Andric     APInt UndefElts(Width, 0);
2001e8d8bef9SDimitry Andric     APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
2002e8d8bef9SDimitry Andric     return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
2003e8d8bef9SDimitry Andric   };
2004e8d8bef9SDimitry Andric 
2005e8d8bef9SDimitry Andric   Intrinsic::ID IID = II.getIntrinsicID();
2006e8d8bef9SDimitry Andric   switch (IID) {
2007e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_32:
2008e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bextr_64:
2009e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u32:
2010e8d8bef9SDimitry Andric   case Intrinsic::x86_tbm_bextri_u64:
2011e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
2012e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2013e8d8bef9SDimitry Andric       uint64_t Shift = C->getZExtValue();
2014e8d8bef9SDimitry Andric       uint64_t Length = (Shift >> 8) & 0xff;
2015e8d8bef9SDimitry Andric       Shift &= 0xff;
2016e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
2017e8d8bef9SDimitry Andric       // If the length is 0 or the shift is out of range, replace with zero.
2018e8d8bef9SDimitry Andric       if (Length == 0 || Shift >= BitWidth) {
2019e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2020e8d8bef9SDimitry Andric       }
2021e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
2022e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2023e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue() >> Shift;
2024e8d8bef9SDimitry Andric         if (Length > BitWidth)
2025e8d8bef9SDimitry Andric           Length = BitWidth;
2026e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Length);
2027e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2028e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2029e8d8bef9SDimitry Andric       }
2030e8d8bef9SDimitry Andric       // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2031e8d8bef9SDimitry Andric       // are only masking bits that a shift already cleared?
2032e8d8bef9SDimitry Andric     }
2033e8d8bef9SDimitry Andric     break;
2034e8d8bef9SDimitry Andric 
2035e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_32:
2036e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_bzhi_64:
2037e8d8bef9SDimitry Andric     // If the RHS is a constant we can try some simplifications.
2038e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2039e8d8bef9SDimitry Andric       uint64_t Index = C->getZExtValue() & 0xff;
2040e8d8bef9SDimitry Andric       unsigned BitWidth = II.getType()->getIntegerBitWidth();
2041e8d8bef9SDimitry Andric       if (Index >= BitWidth) {
2042e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2043e8d8bef9SDimitry Andric       }
2044e8d8bef9SDimitry Andric       if (Index == 0) {
2045e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2046e8d8bef9SDimitry Andric       }
2047e8d8bef9SDimitry Andric       // If the LHS is also a constant, we can completely constant fold this.
2048e8d8bef9SDimitry Andric       if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2049e8d8bef9SDimitry Andric         uint64_t Result = InC->getZExtValue();
2050e8d8bef9SDimitry Andric         Result &= maskTrailingOnes<uint64_t>(Index);
2051e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2052e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2053e8d8bef9SDimitry Andric       }
2054e8d8bef9SDimitry Andric       // TODO should we convert this to an AND if the RHS is constant?
2055e8d8bef9SDimitry Andric     }
2056e8d8bef9SDimitry Andric     break;
2057e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_32:
2058e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pext_64:
2059e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2060e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
2061e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2062e8d8bef9SDimitry Andric       }
2063e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
2064e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2065e8d8bef9SDimitry Andric       }
2066e8d8bef9SDimitry Andric 
206781ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
206881ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2069e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
2070e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
2071e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
2072e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
2073e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
207481ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
207581ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt);
2076e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Shifted);
2077e8d8bef9SDimitry Andric       }
2078e8d8bef9SDimitry Andric 
2079e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2080e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
2081e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
2082e8d8bef9SDimitry Andric         uint64_t Result = 0;
2083e8d8bef9SDimitry Andric         uint64_t BitToSet = 1;
2084e8d8bef9SDimitry Andric 
2085e8d8bef9SDimitry Andric         while (Mask) {
2086e8d8bef9SDimitry Andric           // Isolate lowest set bit.
2087e8d8bef9SDimitry Andric           uint64_t BitToTest = Mask & -Mask;
2088e8d8bef9SDimitry Andric           if (BitToTest & Src)
2089e8d8bef9SDimitry Andric             Result |= BitToSet;
2090e8d8bef9SDimitry Andric 
2091e8d8bef9SDimitry Andric           BitToSet <<= 1;
2092e8d8bef9SDimitry Andric           // Clear lowest set bit.
2093e8d8bef9SDimitry Andric           Mask &= Mask - 1;
2094e8d8bef9SDimitry Andric         }
2095e8d8bef9SDimitry Andric 
2096e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2097e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2098e8d8bef9SDimitry Andric       }
2099e8d8bef9SDimitry Andric     }
2100e8d8bef9SDimitry Andric     break;
2101e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_32:
2102e8d8bef9SDimitry Andric   case Intrinsic::x86_bmi_pdep_64:
2103e8d8bef9SDimitry Andric     if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2104e8d8bef9SDimitry Andric       if (MaskC->isNullValue()) {
2105e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2106e8d8bef9SDimitry Andric       }
2107e8d8bef9SDimitry Andric       if (MaskC->isAllOnesValue()) {
2108e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2109e8d8bef9SDimitry Andric       }
211081ad6265SDimitry Andric 
211181ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
211281ad6265SDimitry Andric       if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2113e8d8bef9SDimitry Andric         // any single contingous sequence of 1s anywhere in the mask simply
2114e8d8bef9SDimitry Andric         // describes a subset of the input bits shifted to the appropriate
2115e8d8bef9SDimitry Andric         // position.  Replace with the straight forward IR.
2116e8d8bef9SDimitry Andric         Value *Input = II.getArgOperand(0);
211781ad6265SDimitry Andric         Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
211881ad6265SDimitry Andric         Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt);
2119e8d8bef9SDimitry Andric         Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
2120e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, Masked);
2121e8d8bef9SDimitry Andric       }
2122e8d8bef9SDimitry Andric 
2123e8d8bef9SDimitry Andric       if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2124e8d8bef9SDimitry Andric         uint64_t Src = SrcC->getZExtValue();
2125e8d8bef9SDimitry Andric         uint64_t Mask = MaskC->getZExtValue();
2126e8d8bef9SDimitry Andric         uint64_t Result = 0;
2127e8d8bef9SDimitry Andric         uint64_t BitToTest = 1;
2128e8d8bef9SDimitry Andric 
2129e8d8bef9SDimitry Andric         while (Mask) {
2130e8d8bef9SDimitry Andric           // Isolate lowest set bit.
2131e8d8bef9SDimitry Andric           uint64_t BitToSet = Mask & -Mask;
2132e8d8bef9SDimitry Andric           if (BitToTest & Src)
2133e8d8bef9SDimitry Andric             Result |= BitToSet;
2134e8d8bef9SDimitry Andric 
2135e8d8bef9SDimitry Andric           BitToTest <<= 1;
2136e8d8bef9SDimitry Andric           // Clear lowest set bit;
2137e8d8bef9SDimitry Andric           Mask &= Mask - 1;
2138e8d8bef9SDimitry Andric         }
2139e8d8bef9SDimitry Andric 
2140e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2141e8d8bef9SDimitry Andric                                       ConstantInt::get(II.getType(), Result));
2142e8d8bef9SDimitry Andric       }
2143e8d8bef9SDimitry Andric     }
2144e8d8bef9SDimitry Andric     break;
2145e8d8bef9SDimitry Andric 
2146e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si:
2147e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvtss2si64:
2148e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si:
2149e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cvttss2si64:
2150e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si:
2151e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvtsd2si64:
2152e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si:
2153e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cvttsd2si64:
2154e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si32:
2155e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2si64:
2156e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi32:
2157e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtss2usi64:
2158e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si32:
2159e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2si64:
2160e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi32:
2161e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcvtsd2usi64:
2162e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si:
2163e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2si64:
2164e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi:
2165e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttss2usi64:
2166e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si:
2167e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2si64:
2168e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi:
2169e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_cvttsd2usi64: {
2170e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
2171e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
2172e8d8bef9SDimitry Andric     Value *Arg = II.getArgOperand(0);
2173e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements();
2174e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2175e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2176e8d8bef9SDimitry Andric     }
2177e8d8bef9SDimitry Andric     break;
2178e8d8bef9SDimitry Andric   }
2179e8d8bef9SDimitry Andric 
2180e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
2181e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
2182e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
2183e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
2184e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
2185e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
2186e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb:
2187e8d8bef9SDimitry Andric     if (Value *V = simplifyX86movmsk(II, IC.Builder)) {
2188e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2189e8d8bef9SDimitry Andric     }
2190e8d8bef9SDimitry Andric     break;
2191e8d8bef9SDimitry Andric 
2192e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comieq_ss:
2193e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comige_ss:
2194e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comigt_ss:
2195e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comile_ss:
2196e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comilt_ss:
2197e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_comineq_ss:
2198e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomieq_ss:
2199e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomige_ss:
2200e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomigt_ss:
2201e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomile_ss:
2202e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomilt_ss:
2203e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_ucomineq_ss:
2204e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comieq_sd:
2205e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comige_sd:
2206e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comigt_sd:
2207e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comile_sd:
2208e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comilt_sd:
2209e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_comineq_sd:
2210e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomieq_sd:
2211e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomige_sd:
2212e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomigt_sd:
2213e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomile_sd:
2214e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomilt_sd:
2215e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_ucomineq_sd:
2216e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_ss:
2217e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vcomi_sd:
2218e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_ss:
2219e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_cmp_sd: {
2220e8d8bef9SDimitry Andric     // These intrinsics only demand the 0th element of their input vectors. If
2221e8d8bef9SDimitry Andric     // we can simplify the input based on that, do so now.
2222e8d8bef9SDimitry Andric     bool MadeChange = false;
2223e8d8bef9SDimitry Andric     Value *Arg0 = II.getArgOperand(0);
2224e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
2225e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
2226e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2227e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2228e8d8bef9SDimitry Andric       MadeChange = true;
2229e8d8bef9SDimitry Andric     }
2230e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2231e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2232e8d8bef9SDimitry Andric       MadeChange = true;
2233e8d8bef9SDimitry Andric     }
2234e8d8bef9SDimitry Andric     if (MadeChange) {
2235e8d8bef9SDimitry Andric       return &II;
2236e8d8bef9SDimitry Andric     }
2237e8d8bef9SDimitry Andric     break;
2238e8d8bef9SDimitry Andric   }
2239e8d8bef9SDimitry Andric 
2240e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_ps_512:
2241e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_ps_512:
2242e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_ps_512:
2243e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_ps_512:
2244e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_add_pd_512:
2245e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_div_pd_512:
2246e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mul_pd_512:
2247e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_sub_pd_512:
2248e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2249e8d8bef9SDimitry Andric     // IR operations.
2250e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2251e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
2252e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
2253e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
2254e8d8bef9SDimitry Andric 
2255e8d8bef9SDimitry Andric         Value *V;
2256e8d8bef9SDimitry Andric         switch (IID) {
2257e8d8bef9SDimitry Andric         default:
2258e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
2259e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_ps_512:
2260e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_add_pd_512:
2261e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(Arg0, Arg1);
2262e8d8bef9SDimitry Andric           break;
2263e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_ps_512:
2264e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_sub_pd_512:
2265e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(Arg0, Arg1);
2266e8d8bef9SDimitry Andric           break;
2267e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_ps_512:
2268e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mul_pd_512:
2269e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(Arg0, Arg1);
2270e8d8bef9SDimitry Andric           break;
2271e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_ps_512:
2272e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_div_pd_512:
2273e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(Arg0, Arg1);
2274e8d8bef9SDimitry Andric           break;
2275e8d8bef9SDimitry Andric         }
2276e8d8bef9SDimitry Andric 
2277e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2278e8d8bef9SDimitry Andric       }
2279e8d8bef9SDimitry Andric     }
2280e8d8bef9SDimitry Andric     break;
2281e8d8bef9SDimitry Andric 
2282e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
2283e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
2284e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
2285e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
2286e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
2287e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
2288e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
2289e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
2290e8d8bef9SDimitry Andric     // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2291e8d8bef9SDimitry Andric     // IR operations.
2292e8d8bef9SDimitry Andric     if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) {
2293e8d8bef9SDimitry Andric       if (R->getValue() == 4) {
2294e8d8bef9SDimitry Andric         // Extract the element as scalars.
2295e8d8bef9SDimitry Andric         Value *Arg0 = II.getArgOperand(0);
2296e8d8bef9SDimitry Andric         Value *Arg1 = II.getArgOperand(1);
2297e8d8bef9SDimitry Andric         Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0);
2298e8d8bef9SDimitry Andric         Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0);
2299e8d8bef9SDimitry Andric 
2300e8d8bef9SDimitry Andric         Value *V;
2301e8d8bef9SDimitry Andric         switch (IID) {
2302e8d8bef9SDimitry Andric         default:
2303e8d8bef9SDimitry Andric           llvm_unreachable("Case stmts out of sync!");
2304e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_ss_round:
2305e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_add_sd_round:
2306e8d8bef9SDimitry Andric           V = IC.Builder.CreateFAdd(LHS, RHS);
2307e8d8bef9SDimitry Andric           break;
2308e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_ss_round:
2309e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_sub_sd_round:
2310e8d8bef9SDimitry Andric           V = IC.Builder.CreateFSub(LHS, RHS);
2311e8d8bef9SDimitry Andric           break;
2312e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_ss_round:
2313e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_mul_sd_round:
2314e8d8bef9SDimitry Andric           V = IC.Builder.CreateFMul(LHS, RHS);
2315e8d8bef9SDimitry Andric           break;
2316e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_ss_round:
2317e8d8bef9SDimitry Andric         case Intrinsic::x86_avx512_mask_div_sd_round:
2318e8d8bef9SDimitry Andric           V = IC.Builder.CreateFDiv(LHS, RHS);
2319e8d8bef9SDimitry Andric           break;
2320e8d8bef9SDimitry Andric         }
2321e8d8bef9SDimitry Andric 
2322e8d8bef9SDimitry Andric         // Handle the masking aspect of the intrinsic.
2323e8d8bef9SDimitry Andric         Value *Mask = II.getArgOperand(3);
2324e8d8bef9SDimitry Andric         auto *C = dyn_cast<ConstantInt>(Mask);
2325e8d8bef9SDimitry Andric         // We don't need a select if we know the mask bit is a 1.
2326e8d8bef9SDimitry Andric         if (!C || !C->getValue()[0]) {
2327e8d8bef9SDimitry Andric           // Cast the mask to an i1 vector and then extract the lowest element.
2328e8d8bef9SDimitry Andric           auto *MaskTy = FixedVectorType::get(
2329e8d8bef9SDimitry Andric               IC.Builder.getInt1Ty(),
2330e8d8bef9SDimitry Andric               cast<IntegerType>(Mask->getType())->getBitWidth());
2331e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateBitCast(Mask, MaskTy);
2332e8d8bef9SDimitry Andric           Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0);
2333e8d8bef9SDimitry Andric           // Extract the lowest element from the passthru operand.
2334e8d8bef9SDimitry Andric           Value *Passthru =
2335e8d8bef9SDimitry Andric               IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0);
2336e8d8bef9SDimitry Andric           V = IC.Builder.CreateSelect(Mask, V, Passthru);
2337e8d8bef9SDimitry Andric         }
2338e8d8bef9SDimitry Andric 
2339e8d8bef9SDimitry Andric         // Insert the result back into the original argument 0.
2340e8d8bef9SDimitry Andric         V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2341e8d8bef9SDimitry Andric 
2342e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2343e8d8bef9SDimitry Andric       }
2344e8d8bef9SDimitry Andric     }
2345e8d8bef9SDimitry Andric     break;
2346e8d8bef9SDimitry Andric 
2347e8d8bef9SDimitry Andric   // Constant fold ashr( <A x Bi>, Ci ).
2348e8d8bef9SDimitry Andric   // Constant fold lshr( <A x Bi>, Ci ).
2349e8d8bef9SDimitry Andric   // Constant fold shl( <A x Bi>, Ci ).
2350e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_d:
2351e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrai_w:
2352e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_d:
2353e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrai_w:
2354e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_128:
2355e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_256:
2356e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_d_512:
2357e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_q_512:
2358e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrai_w_512:
2359e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_d:
2360e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_q:
2361e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrli_w:
2362e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_d:
2363e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_q:
2364e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrli_w:
2365e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_d_512:
2366e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_q_512:
2367e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrli_w_512:
2368e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_d:
2369e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_q:
2370e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pslli_w:
2371e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_d:
2372e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_q:
2373e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pslli_w:
2374e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_d_512:
2375e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_q_512:
2376e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pslli_w_512:
2377e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2378e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2379e8d8bef9SDimitry Andric     }
2380e8d8bef9SDimitry Andric     break;
2381e8d8bef9SDimitry Andric 
2382e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_d:
2383e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psra_w:
2384e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_d:
2385e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psra_w:
2386e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_128:
2387e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_256:
2388e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_d_512:
2389e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_q_512:
2390e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psra_w_512:
2391e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_d:
2392e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_q:
2393e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psrl_w:
2394e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_d:
2395e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_q:
2396e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrl_w:
2397e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_d_512:
2398e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_q_512:
2399e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrl_w_512:
2400e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_d:
2401e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_q:
2402e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_psll_w:
2403e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_d:
2404e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_q:
2405e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psll_w:
2406e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_d_512:
2407e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_q_512:
2408e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psll_w_512: {
2409e8d8bef9SDimitry Andric     if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2410e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2411e8d8bef9SDimitry Andric     }
2412e8d8bef9SDimitry Andric 
2413e8d8bef9SDimitry Andric     // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2414e8d8bef9SDimitry Andric     // operand to compute the shift amount.
2415e8d8bef9SDimitry Andric     Value *Arg1 = II.getArgOperand(1);
2416e8d8bef9SDimitry Andric     assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2417e8d8bef9SDimitry Andric            "Unexpected packed shift size");
2418e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements();
2419e8d8bef9SDimitry Andric 
2420e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2421e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 1, V);
2422e8d8bef9SDimitry Andric     }
2423e8d8bef9SDimitry Andric     break;
2424e8d8bef9SDimitry Andric   }
2425e8d8bef9SDimitry Andric 
2426e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
2427e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
2428e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
2429e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
2430e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_d_512:
2431e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_q_512:
2432e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_128:
2433e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_256:
2434e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psllv_w_512:
2435e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
2436e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256:
2437e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_128:
2438e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_256:
2439e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_d_512:
2440e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_q_512:
2441e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_128:
2442e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_256:
2443e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrav_w_512:
2444e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
2445e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
2446e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
2447e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
2448e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_d_512:
2449e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_q_512:
2450e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_128:
2451e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_256:
2452e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_psrlv_w_512:
2453e8d8bef9SDimitry Andric     if (Value *V = simplifyX86varShift(II, IC.Builder)) {
2454e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2455e8d8bef9SDimitry Andric     }
2456e8d8bef9SDimitry Andric     break;
2457e8d8bef9SDimitry Andric 
2458e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
2459e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
2460e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
2461e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
2462e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
2463e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
2464e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, true)) {
2465e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2466e8d8bef9SDimitry Andric     }
2467e8d8bef9SDimitry Andric     break;
2468e8d8bef9SDimitry Andric 
2469e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
2470e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
2471e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
2472e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
2473e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
2474e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512:
2475e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pack(II, IC.Builder, false)) {
2476e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2477e8d8bef9SDimitry Andric     }
2478e8d8bef9SDimitry Andric     break;
2479e8d8bef9SDimitry Andric 
2480e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq:
2481e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_256:
2482e8d8bef9SDimitry Andric   case Intrinsic::x86_pclmulqdq_512: {
2483e8d8bef9SDimitry Andric     if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2484e8d8bef9SDimitry Andric       unsigned Imm = C->getZExtValue();
2485e8d8bef9SDimitry Andric 
2486e8d8bef9SDimitry Andric       bool MadeChange = false;
2487e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0);
2488e8d8bef9SDimitry Andric       Value *Arg1 = II.getArgOperand(1);
2489e8d8bef9SDimitry Andric       unsigned VWidth =
2490e8d8bef9SDimitry Andric           cast<FixedVectorType>(Arg0->getType())->getNumElements();
2491e8d8bef9SDimitry Andric 
2492e8d8bef9SDimitry Andric       APInt UndefElts1(VWidth, 0);
2493e8d8bef9SDimitry Andric       APInt DemandedElts1 =
2494e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1));
2495e8d8bef9SDimitry Andric       if (Value *V =
2496e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) {
2497e8d8bef9SDimitry Andric         IC.replaceOperand(II, 0, V);
2498e8d8bef9SDimitry Andric         MadeChange = true;
2499e8d8bef9SDimitry Andric       }
2500e8d8bef9SDimitry Andric 
2501e8d8bef9SDimitry Andric       APInt UndefElts2(VWidth, 0);
2502e8d8bef9SDimitry Andric       APInt DemandedElts2 =
2503e8d8bef9SDimitry Andric           APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1));
2504e8d8bef9SDimitry Andric       if (Value *V =
2505e8d8bef9SDimitry Andric               IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) {
2506e8d8bef9SDimitry Andric         IC.replaceOperand(II, 1, V);
2507e8d8bef9SDimitry Andric         MadeChange = true;
2508e8d8bef9SDimitry Andric       }
2509e8d8bef9SDimitry Andric 
2510e8d8bef9SDimitry Andric       // If either input elements are undef, the result is zero.
2511e8d8bef9SDimitry Andric       if (DemandedElts1.isSubsetOf(UndefElts1) ||
2512e8d8bef9SDimitry Andric           DemandedElts2.isSubsetOf(UndefElts2)) {
2513e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II,
2514e8d8bef9SDimitry Andric                                       ConstantAggregateZero::get(II.getType()));
2515e8d8bef9SDimitry Andric       }
2516e8d8bef9SDimitry Andric 
2517e8d8bef9SDimitry Andric       if (MadeChange) {
2518e8d8bef9SDimitry Andric         return &II;
2519e8d8bef9SDimitry Andric       }
2520e8d8bef9SDimitry Andric     }
2521e8d8bef9SDimitry Andric     break;
2522e8d8bef9SDimitry Andric   }
2523e8d8bef9SDimitry Andric 
2524e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_insertps:
2525e8d8bef9SDimitry Andric     if (Value *V = simplifyX86insertps(II, IC.Builder)) {
2526e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2527e8d8bef9SDimitry Andric     }
2528e8d8bef9SDimitry Andric     break;
2529e8d8bef9SDimitry Andric 
2530e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq: {
2531e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2532e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2533e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2534e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2535e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2536e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2537e8d8bef9SDimitry Andric            VWidth1 == 16 && "Unexpected operand sizes");
2538e8d8bef9SDimitry Andric 
2539e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2540fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
2541fe6060f1SDimitry Andric     auto *CILength =
2542e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2543e8d8bef9SDimitry Andric            : nullptr;
2544fe6060f1SDimitry Andric     auto *CIIndex =
2545e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2546e8d8bef9SDimitry Andric            : nullptr;
2547e8d8bef9SDimitry Andric 
2548e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2549e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2550e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2551e8d8bef9SDimitry Andric     }
2552e8d8bef9SDimitry Andric 
2553e8d8bef9SDimitry Andric     // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2554e8d8bef9SDimitry Andric     // operands and the lowest 16-bits of the second.
2555e8d8bef9SDimitry Andric     bool MadeChange = false;
2556e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2557e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2558e8d8bef9SDimitry Andric       MadeChange = true;
2559e8d8bef9SDimitry Andric     }
2560e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2561e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2562e8d8bef9SDimitry Andric       MadeChange = true;
2563e8d8bef9SDimitry Andric     }
2564e8d8bef9SDimitry Andric     if (MadeChange) {
2565e8d8bef9SDimitry Andric       return &II;
2566e8d8bef9SDimitry Andric     }
2567e8d8bef9SDimitry Andric     break;
2568e8d8bef9SDimitry Andric   }
2569e8d8bef9SDimitry Andric 
2570e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi: {
2571e8d8bef9SDimitry Andric     // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2572e8d8bef9SDimitry Andric     // bits of the lower 64-bits. The upper 64-bits are undefined.
2573e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2574e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2575e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2576e8d8bef9SDimitry Andric            "Unexpected operand size");
2577e8d8bef9SDimitry Andric 
2578e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2579fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1));
2580fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2));
2581e8d8bef9SDimitry Andric 
2582e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
2583e8d8bef9SDimitry Andric     if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2584e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2585e8d8bef9SDimitry Andric     }
2586e8d8bef9SDimitry Andric 
2587e8d8bef9SDimitry Andric     // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2588e8d8bef9SDimitry Andric     // operand.
2589e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2590e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2591e8d8bef9SDimitry Andric     }
2592e8d8bef9SDimitry Andric     break;
2593e8d8bef9SDimitry Andric   }
2594e8d8bef9SDimitry Andric 
2595e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq: {
2596e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2597e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2598e8d8bef9SDimitry Andric     unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2599e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2600e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2601e8d8bef9SDimitry Andric            cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 &&
2602e8d8bef9SDimitry Andric            "Unexpected operand size");
2603e8d8bef9SDimitry Andric 
2604e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2605fe6060f1SDimitry Andric     auto *C1 = dyn_cast<Constant>(Op1);
2606fe6060f1SDimitry Andric     auto *CI11 =
2607e8d8bef9SDimitry Andric         C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2608e8d8bef9SDimitry Andric            : nullptr;
2609e8d8bef9SDimitry Andric 
2610e8d8bef9SDimitry Andric     // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2611e8d8bef9SDimitry Andric     if (CI11) {
2612e8d8bef9SDimitry Andric       const APInt &V11 = CI11->getValue();
2613e8d8bef9SDimitry Andric       APInt Len = V11.zextOrTrunc(6);
2614e8d8bef9SDimitry Andric       APInt Idx = V11.lshr(8).zextOrTrunc(6);
2615e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2616e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2617e8d8bef9SDimitry Andric       }
2618e8d8bef9SDimitry Andric     }
2619e8d8bef9SDimitry Andric 
2620e8d8bef9SDimitry Andric     // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2621e8d8bef9SDimitry Andric     // operand.
2622e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2623e8d8bef9SDimitry Andric       return IC.replaceOperand(II, 0, V);
2624e8d8bef9SDimitry Andric     }
2625e8d8bef9SDimitry Andric     break;
2626e8d8bef9SDimitry Andric   }
2627e8d8bef9SDimitry Andric 
2628e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi: {
2629e8d8bef9SDimitry Andric     // INSERTQI: Extract lowest Length bits from lower half of second source and
2630e8d8bef9SDimitry Andric     // insert over first source starting at Index bit. The upper 64-bits are
2631e8d8bef9SDimitry Andric     // undefined.
2632e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2633e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2634e8d8bef9SDimitry Andric     unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2635e8d8bef9SDimitry Andric     unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2636e8d8bef9SDimitry Andric     assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2637e8d8bef9SDimitry Andric            Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2638e8d8bef9SDimitry Andric            VWidth1 == 2 && "Unexpected operand sizes");
2639e8d8bef9SDimitry Andric 
2640e8d8bef9SDimitry Andric     // See if we're dealing with constant values.
2641fe6060f1SDimitry Andric     auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2));
2642fe6060f1SDimitry Andric     auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3));
2643e8d8bef9SDimitry Andric 
2644e8d8bef9SDimitry Andric     // Attempt to simplify to a constant or shuffle vector.
2645e8d8bef9SDimitry Andric     if (CILength && CIIndex) {
2646e8d8bef9SDimitry Andric       APInt Len = CILength->getValue().zextOrTrunc(6);
2647e8d8bef9SDimitry Andric       APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2648e8d8bef9SDimitry Andric       if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2649e8d8bef9SDimitry Andric         return IC.replaceInstUsesWith(II, V);
2650e8d8bef9SDimitry Andric       }
2651e8d8bef9SDimitry Andric     }
2652e8d8bef9SDimitry Andric 
2653e8d8bef9SDimitry Andric     // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2654e8d8bef9SDimitry Andric     // operands.
2655e8d8bef9SDimitry Andric     bool MadeChange = false;
2656e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2657e8d8bef9SDimitry Andric       IC.replaceOperand(II, 0, V);
2658e8d8bef9SDimitry Andric       MadeChange = true;
2659e8d8bef9SDimitry Andric     }
2660e8d8bef9SDimitry Andric     if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2661e8d8bef9SDimitry Andric       IC.replaceOperand(II, 1, V);
2662e8d8bef9SDimitry Andric       MadeChange = true;
2663e8d8bef9SDimitry Andric     }
2664e8d8bef9SDimitry Andric     if (MadeChange) {
2665e8d8bef9SDimitry Andric       return &II;
2666e8d8bef9SDimitry Andric     }
2667e8d8bef9SDimitry Andric     break;
2668e8d8bef9SDimitry Andric   }
2669e8d8bef9SDimitry Andric 
2670e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_pblendvb:
2671e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvps:
2672e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_blendvpd:
2673e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_ps_256:
2674e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_blendv_pd_256:
2675e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pblendvb: {
2676e8d8bef9SDimitry Andric     // fold (blend A, A, Mask) -> A
2677e8d8bef9SDimitry Andric     Value *Op0 = II.getArgOperand(0);
2678e8d8bef9SDimitry Andric     Value *Op1 = II.getArgOperand(1);
2679e8d8bef9SDimitry Andric     Value *Mask = II.getArgOperand(2);
2680e8d8bef9SDimitry Andric     if (Op0 == Op1) {
2681e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
2682e8d8bef9SDimitry Andric     }
2683e8d8bef9SDimitry Andric 
2684e8d8bef9SDimitry Andric     // Zero Mask - select 1st argument.
2685e8d8bef9SDimitry Andric     if (isa<ConstantAggregateZero>(Mask)) {
2686e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, Op0);
2687e8d8bef9SDimitry Andric     }
2688e8d8bef9SDimitry Andric 
2689e8d8bef9SDimitry Andric     // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2690e8d8bef9SDimitry Andric     if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2691e8d8bef9SDimitry Andric       Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2692e8d8bef9SDimitry Andric       return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2693e8d8bef9SDimitry Andric     }
2694e8d8bef9SDimitry Andric 
2695e8d8bef9SDimitry Andric     // Convert to a vector select if we can bypass casts and find a boolean
2696e8d8bef9SDimitry Andric     // vector condition value.
2697e8d8bef9SDimitry Andric     Value *BoolVec;
2698e8d8bef9SDimitry Andric     Mask = InstCombiner::peekThroughBitcast(Mask);
2699e8d8bef9SDimitry Andric     if (match(Mask, PatternMatch::m_SExt(PatternMatch::m_Value(BoolVec))) &&
2700e8d8bef9SDimitry Andric         BoolVec->getType()->isVectorTy() &&
2701e8d8bef9SDimitry Andric         BoolVec->getType()->getScalarSizeInBits() == 1) {
2702e8d8bef9SDimitry Andric       assert(Mask->getType()->getPrimitiveSizeInBits() ==
2703e8d8bef9SDimitry Andric                  II.getType()->getPrimitiveSizeInBits() &&
2704e8d8bef9SDimitry Andric              "Not expecting mask and operands with different sizes");
2705e8d8bef9SDimitry Andric 
2706e8d8bef9SDimitry Andric       unsigned NumMaskElts =
2707e8d8bef9SDimitry Andric           cast<FixedVectorType>(Mask->getType())->getNumElements();
2708e8d8bef9SDimitry Andric       unsigned NumOperandElts =
2709e8d8bef9SDimitry Andric           cast<FixedVectorType>(II.getType())->getNumElements();
2710e8d8bef9SDimitry Andric       if (NumMaskElts == NumOperandElts) {
2711e8d8bef9SDimitry Andric         return SelectInst::Create(BoolVec, Op1, Op0);
2712e8d8bef9SDimitry Andric       }
2713e8d8bef9SDimitry Andric 
2714e8d8bef9SDimitry Andric       // If the mask has less elements than the operands, each mask bit maps to
2715e8d8bef9SDimitry Andric       // multiple elements of the operands. Bitcast back and forth.
2716e8d8bef9SDimitry Andric       if (NumMaskElts < NumOperandElts) {
2717e8d8bef9SDimitry Andric         Value *CastOp0 = IC.Builder.CreateBitCast(Op0, Mask->getType());
2718e8d8bef9SDimitry Andric         Value *CastOp1 = IC.Builder.CreateBitCast(Op1, Mask->getType());
2719e8d8bef9SDimitry Andric         Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
2720e8d8bef9SDimitry Andric         return new BitCastInst(Sel, II.getType());
2721e8d8bef9SDimitry Andric       }
2722e8d8bef9SDimitry Andric     }
2723e8d8bef9SDimitry Andric 
2724e8d8bef9SDimitry Andric     break;
2725e8d8bef9SDimitry Andric   }
2726e8d8bef9SDimitry Andric 
2727e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
2728e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
2729e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
2730e8d8bef9SDimitry Andric     if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
2731e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2732e8d8bef9SDimitry Andric     }
2733e8d8bef9SDimitry Andric     break;
2734e8d8bef9SDimitry Andric 
2735e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
2736e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
2737e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
2738e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
2739e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
2740e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
2741e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
2742e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2743e8d8bef9SDimitry Andric     }
2744e8d8bef9SDimitry Andric     break;
2745e8d8bef9SDimitry Andric 
2746e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
2747e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps:
2748e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_256:
2749e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_df_512:
2750e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_256:
2751e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_di_512:
2752e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_128:
2753e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_256:
2754e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_hi_512:
2755e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_128:
2756e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_256:
2757e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_qi_512:
2758e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_sf_512:
2759e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_permvar_si_512:
2760e8d8bef9SDimitry Andric     if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
2761e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2762e8d8bef9SDimitry Andric     }
2763e8d8bef9SDimitry Andric     break;
2764e8d8bef9SDimitry Andric 
2765e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps:
2766e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd:
2767e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_ps_256:
2768e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskload_pd_256:
2769e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d:
2770e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q:
2771e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_d_256:
2772e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskload_q_256:
2773e8d8bef9SDimitry Andric     if (Instruction *I = simplifyX86MaskedLoad(II, IC)) {
2774e8d8bef9SDimitry Andric       return I;
2775e8d8bef9SDimitry Andric     }
2776e8d8bef9SDimitry Andric     break;
2777e8d8bef9SDimitry Andric 
2778e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_maskmov_dqu:
2779e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps:
2780e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd:
2781e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_ps_256:
2782e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_maskstore_pd_256:
2783e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d:
2784e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q:
2785e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_d_256:
2786e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_maskstore_q_256:
2787e8d8bef9SDimitry Andric     if (simplifyX86MaskedStore(II, IC)) {
2788e8d8bef9SDimitry Andric       return nullptr;
2789e8d8bef9SDimitry Andric     }
2790e8d8bef9SDimitry Andric     break;
2791e8d8bef9SDimitry Andric 
2792e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_32:
2793e8d8bef9SDimitry Andric   case Intrinsic::x86_addcarry_64:
2794e8d8bef9SDimitry Andric     if (Value *V = simplifyX86addcarry(II, IC.Builder)) {
2795e8d8bef9SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2796e8d8bef9SDimitry Andric     }
2797e8d8bef9SDimitry Andric     break;
2798e8d8bef9SDimitry Andric 
2799*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_128:
2800*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_256:
2801*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_d_512:
2802*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_128:
2803*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_256:
2804*06c3fb27SDimitry Andric   case Intrinsic::x86_avx512_pternlog_q_512:
2805*06c3fb27SDimitry Andric     if (Value *V = simplifyTernarylogic(II, IC.Builder)) {
2806*06c3fb27SDimitry Andric       return IC.replaceInstUsesWith(II, V);
2807*06c3fb27SDimitry Andric     }
2808*06c3fb27SDimitry Andric     break;
2809e8d8bef9SDimitry Andric   default:
2810e8d8bef9SDimitry Andric     break;
2811e8d8bef9SDimitry Andric   }
2812bdd1243dSDimitry Andric   return std::nullopt;
2813e8d8bef9SDimitry Andric }
2814e8d8bef9SDimitry Andric 
2815bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
2816e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
2817e8d8bef9SDimitry Andric     bool &KnownBitsComputed) const {
2818e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
2819e8d8bef9SDimitry Andric   default:
2820e8d8bef9SDimitry Andric     break;
2821e8d8bef9SDimitry Andric   case Intrinsic::x86_mmx_pmovmskb:
2822e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_movmsk_ps:
2823e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_movmsk_pd:
2824e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_pmovmskb_128:
2825e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_ps_256:
2826e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_movmsk_pd_256:
2827e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pmovmskb: {
2828e8d8bef9SDimitry Andric     // MOVMSK copies the vector elements' sign bits to the low bits
2829e8d8bef9SDimitry Andric     // and zeros the high bits.
2830e8d8bef9SDimitry Andric     unsigned ArgWidth;
2831e8d8bef9SDimitry Andric     if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
2832e8d8bef9SDimitry Andric       ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
2833e8d8bef9SDimitry Andric     } else {
2834fe6060f1SDimitry Andric       auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType());
2835e8d8bef9SDimitry Andric       ArgWidth = ArgType->getNumElements();
2836e8d8bef9SDimitry Andric     }
2837e8d8bef9SDimitry Andric 
2838e8d8bef9SDimitry Andric     // If we don't need any of low bits then return zero,
2839e8d8bef9SDimitry Andric     // we know that DemandedMask is non-zero already.
2840e8d8bef9SDimitry Andric     APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
2841e8d8bef9SDimitry Andric     Type *VTy = II.getType();
2842349cc55cSDimitry Andric     if (DemandedElts.isZero()) {
2843e8d8bef9SDimitry Andric       return ConstantInt::getNullValue(VTy);
2844e8d8bef9SDimitry Andric     }
2845e8d8bef9SDimitry Andric 
2846e8d8bef9SDimitry Andric     // We know that the upper bits are set to zero.
2847e8d8bef9SDimitry Andric     Known.Zero.setBitsFrom(ArgWidth);
2848e8d8bef9SDimitry Andric     KnownBitsComputed = true;
2849e8d8bef9SDimitry Andric     break;
2850e8d8bef9SDimitry Andric   }
2851e8d8bef9SDimitry Andric   }
2852bdd1243dSDimitry Andric   return std::nullopt;
2853e8d8bef9SDimitry Andric }
2854e8d8bef9SDimitry Andric 
2855bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
2856e8d8bef9SDimitry Andric     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2857e8d8bef9SDimitry Andric     APInt &UndefElts2, APInt &UndefElts3,
2858e8d8bef9SDimitry Andric     std::function<void(Instruction *, unsigned, APInt, APInt &)>
2859e8d8bef9SDimitry Andric         simplifyAndSetOp) const {
2860e8d8bef9SDimitry Andric   unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements();
2861e8d8bef9SDimitry Andric   switch (II.getIntrinsicID()) {
2862e8d8bef9SDimitry Andric   default:
2863e8d8bef9SDimitry Andric     break;
2864e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_ss:
2865e8d8bef9SDimitry Andric   case Intrinsic::x86_xop_vfrcz_sd:
2866e8d8bef9SDimitry Andric     // The instructions for these intrinsics are speced to zero upper bits not
2867e8d8bef9SDimitry Andric     // pass them through like other scalar intrinsics. So we shouldn't just
2868e8d8bef9SDimitry Andric     // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
2869e8d8bef9SDimitry Andric     // Instead we should return a zero vector.
2870e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
2871e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
2872e8d8bef9SDimitry Andric       return ConstantAggregateZero::get(II.getType());
2873e8d8bef9SDimitry Andric     }
2874e8d8bef9SDimitry Andric 
2875e8d8bef9SDimitry Andric     // Only the lower element is used.
2876e8d8bef9SDimitry Andric     DemandedElts = 1;
2877e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2878e8d8bef9SDimitry Andric 
2879e8d8bef9SDimitry Andric     // Only the lower element is undefined. The high elements are zero.
2880e8d8bef9SDimitry Andric     UndefElts = UndefElts[0];
2881e8d8bef9SDimitry Andric     break;
2882e8d8bef9SDimitry Andric 
2883e8d8bef9SDimitry Andric   // Unary scalar-as-vector operations that work column-wise.
2884e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rcp_ss:
2885e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_rsqrt_ss:
2886e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2887e8d8bef9SDimitry Andric 
2888e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
2889e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
2890e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
2891e8d8bef9SDimitry Andric       return II.getArgOperand(0);
2892e8d8bef9SDimitry Andric     }
2893e8d8bef9SDimitry Andric     // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
2894e8d8bef9SDimitry Andric     // checks).
2895e8d8bef9SDimitry Andric     break;
2896e8d8bef9SDimitry Andric 
2897e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
2898e8d8bef9SDimitry Andric   // elements come from operand 0. The low element is a function of both
2899e8d8bef9SDimitry Andric   // operands.
2900e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_min_ss:
2901e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_max_ss:
2902e8d8bef9SDimitry Andric   case Intrinsic::x86_sse_cmp_ss:
2903e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_min_sd:
2904e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_max_sd:
2905e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_cmp_sd: {
2906e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2907e8d8bef9SDimitry Andric 
2908e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
2909e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
2910e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
2911e8d8bef9SDimitry Andric       return II.getArgOperand(0);
2912e8d8bef9SDimitry Andric     }
2913e8d8bef9SDimitry Andric 
2914e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
2915e8d8bef9SDimitry Andric     DemandedElts = 1;
2916e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2917e8d8bef9SDimitry Andric 
2918e8d8bef9SDimitry Andric     // Lower element is undefined if both lower elements are undefined.
2919e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
2920e8d8bef9SDimitry Andric     if (!UndefElts2[0])
2921e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
2922e8d8bef9SDimitry Andric 
2923e8d8bef9SDimitry Andric     break;
2924e8d8bef9SDimitry Andric   }
2925e8d8bef9SDimitry Andric 
2926e8d8bef9SDimitry Andric   // Binary scalar-as-vector operations that work column-wise. The high
2927e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element comes from operand 1.
2928e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_ss:
2929e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_round_sd: {
2930e8d8bef9SDimitry Andric     // Don't use the low element of operand 0.
2931e8d8bef9SDimitry Andric     APInt DemandedElts2 = DemandedElts;
2932e8d8bef9SDimitry Andric     DemandedElts2.clearBit(0);
2933e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
2934e8d8bef9SDimitry Andric 
2935e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
2936e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
2937e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
2938e8d8bef9SDimitry Andric       return II.getArgOperand(0);
2939e8d8bef9SDimitry Andric     }
2940e8d8bef9SDimitry Andric 
2941e8d8bef9SDimitry Andric     // Only lower element is used for operand 1.
2942e8d8bef9SDimitry Andric     DemandedElts = 1;
2943e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2944e8d8bef9SDimitry Andric 
2945e8d8bef9SDimitry Andric     // Take the high undef elements from operand 0 and take the lower element
2946e8d8bef9SDimitry Andric     // from operand 1.
2947e8d8bef9SDimitry Andric     UndefElts.clearBit(0);
2948e8d8bef9SDimitry Andric     UndefElts |= UndefElts2[0];
2949e8d8bef9SDimitry Andric     break;
2950e8d8bef9SDimitry Andric   }
2951e8d8bef9SDimitry Andric 
2952e8d8bef9SDimitry Andric   // Three input scalar-as-vector operations that work column-wise. The high
2953e8d8bef9SDimitry Andric   // elements come from operand 0 and the low element is a function of all
2954e8d8bef9SDimitry Andric   // three inputs.
2955e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_ss_round:
2956e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_ss_round:
2957e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_ss_round:
2958e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_ss_round:
2959e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_ss_round:
2960e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_ss_round:
2961e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_add_sd_round:
2962e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_div_sd_round:
2963e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_mul_sd_round:
2964e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_sub_sd_round:
2965e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_max_sd_round:
2966e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_mask_min_sd_round:
2967e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2968e8d8bef9SDimitry Andric 
2969e8d8bef9SDimitry Andric     // If lowest element of a scalar op isn't used then use Arg0.
2970e8d8bef9SDimitry Andric     if (!DemandedElts[0]) {
2971e8d8bef9SDimitry Andric       IC.addToWorklist(&II);
2972e8d8bef9SDimitry Andric       return II.getArgOperand(0);
2973e8d8bef9SDimitry Andric     }
2974e8d8bef9SDimitry Andric 
2975e8d8bef9SDimitry Andric     // Only lower element is used for operand 1 and 2.
2976e8d8bef9SDimitry Andric     DemandedElts = 1;
2977e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2978e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
2979e8d8bef9SDimitry Andric 
2980e8d8bef9SDimitry Andric     // Lower element is undefined if all three lower elements are undefined.
2981e8d8bef9SDimitry Andric     // Consider things like undef&0.  The result is known zero, not undef.
2982e8d8bef9SDimitry Andric     if (!UndefElts2[0] || !UndefElts3[0])
2983e8d8bef9SDimitry Andric       UndefElts.clearBit(0);
2984e8d8bef9SDimitry Andric     break;
2985e8d8bef9SDimitry Andric 
2986e8d8bef9SDimitry Andric   // TODO: Add fmaddsub support?
2987e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_pd:
2988e8d8bef9SDimitry Andric   case Intrinsic::x86_sse3_addsub_ps:
2989e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_pd_256:
2990e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_addsub_ps_256: {
2991e8d8bef9SDimitry Andric     // If none of the even or none of the odd lanes are required, turn this
2992e8d8bef9SDimitry Andric     // into a generic FP math instruction.
2993e8d8bef9SDimitry Andric     APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
2994e8d8bef9SDimitry Andric     APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
2995e8d8bef9SDimitry Andric     bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
2996e8d8bef9SDimitry Andric     bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
2997e8d8bef9SDimitry Andric     if (IsSubOnly || IsAddOnly) {
2998e8d8bef9SDimitry Andric       assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
2999e8d8bef9SDimitry Andric       IRBuilderBase::InsertPointGuard Guard(IC.Builder);
3000e8d8bef9SDimitry Andric       IC.Builder.SetInsertPoint(&II);
3001e8d8bef9SDimitry Andric       Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
3002e8d8bef9SDimitry Andric       return IC.Builder.CreateBinOp(
3003e8d8bef9SDimitry Andric           IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3004e8d8bef9SDimitry Andric     }
3005e8d8bef9SDimitry Andric 
3006e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3007e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3008e8d8bef9SDimitry Andric     UndefElts &= UndefElts2;
3009e8d8bef9SDimitry Andric     break;
3010e8d8bef9SDimitry Andric   }
3011e8d8bef9SDimitry Andric 
301281ad6265SDimitry Andric   // General per-element vector operations.
301381ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d:
301481ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_d_256:
301581ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q:
301681ad6265SDimitry Andric   case Intrinsic::x86_avx2_psllv_q_256:
301781ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d:
301881ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_d_256:
301981ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q:
302081ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrlv_q_256:
302181ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d:
302281ad6265SDimitry Andric   case Intrinsic::x86_avx2_psrav_d_256: {
302381ad6265SDimitry Andric     simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
302481ad6265SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
302581ad6265SDimitry Andric     UndefElts &= UndefElts2;
302681ad6265SDimitry Andric     break;
302781ad6265SDimitry Andric   }
302881ad6265SDimitry Andric 
3029e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packssdw_128:
3030e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packsswb_128:
3031e8d8bef9SDimitry Andric   case Intrinsic::x86_sse2_packuswb_128:
3032e8d8bef9SDimitry Andric   case Intrinsic::x86_sse41_packusdw:
3033e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packssdw:
3034e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packsswb:
3035e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packusdw:
3036e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_packuswb:
3037e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packssdw_512:
3038e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packsswb_512:
3039e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packusdw_512:
3040e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_packuswb_512: {
3041e8d8bef9SDimitry Andric     auto *Ty0 = II.getArgOperand(0)->getType();
3042e8d8bef9SDimitry Andric     unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3043e8d8bef9SDimitry Andric     assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
3044e8d8bef9SDimitry Andric 
3045e8d8bef9SDimitry Andric     unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3046e8d8bef9SDimitry Andric     unsigned VWidthPerLane = VWidth / NumLanes;
3047e8d8bef9SDimitry Andric     unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3048e8d8bef9SDimitry Andric 
3049e8d8bef9SDimitry Andric     // Per lane, pack the elements of the first input and then the second.
3050e8d8bef9SDimitry Andric     // e.g.
3051e8d8bef9SDimitry Andric     // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
3052e8d8bef9SDimitry Andric     // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
3053e8d8bef9SDimitry Andric     for (int OpNum = 0; OpNum != 2; ++OpNum) {
3054e8d8bef9SDimitry Andric       APInt OpDemandedElts(InnerVWidth, 0);
3055e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3056e8d8bef9SDimitry Andric         unsigned LaneIdx = Lane * VWidthPerLane;
3057e8d8bef9SDimitry Andric         for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3058e8d8bef9SDimitry Andric           unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3059e8d8bef9SDimitry Andric           if (DemandedElts[Idx])
3060e8d8bef9SDimitry Andric             OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
3061e8d8bef9SDimitry Andric         }
3062e8d8bef9SDimitry Andric       }
3063e8d8bef9SDimitry Andric 
3064e8d8bef9SDimitry Andric       // Demand elements from the operand.
3065e8d8bef9SDimitry Andric       APInt OpUndefElts(InnerVWidth, 0);
3066e8d8bef9SDimitry Andric       simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3067e8d8bef9SDimitry Andric 
3068e8d8bef9SDimitry Andric       // Pack the operand's UNDEF elements, one lane at a time.
3069e8d8bef9SDimitry Andric       OpUndefElts = OpUndefElts.zext(VWidth);
3070e8d8bef9SDimitry Andric       for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3071e8d8bef9SDimitry Andric         APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
3072e8d8bef9SDimitry Andric         LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
3073e8d8bef9SDimitry Andric         LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3074e8d8bef9SDimitry Andric         UndefElts |= LaneElts;
3075e8d8bef9SDimitry Andric       }
3076e8d8bef9SDimitry Andric     }
3077e8d8bef9SDimitry Andric     break;
3078e8d8bef9SDimitry Andric   }
3079e8d8bef9SDimitry Andric 
3080e8d8bef9SDimitry Andric   // PSHUFB
3081e8d8bef9SDimitry Andric   case Intrinsic::x86_ssse3_pshuf_b_128:
3082e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_pshuf_b:
3083e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_pshuf_b_512:
3084e8d8bef9SDimitry Andric   // PERMILVAR
3085e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps:
3086e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_ps_256:
3087e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_ps_512:
3088e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd:
3089e8d8bef9SDimitry Andric   case Intrinsic::x86_avx_vpermilvar_pd_256:
3090e8d8bef9SDimitry Andric   case Intrinsic::x86_avx512_vpermilvar_pd_512:
3091e8d8bef9SDimitry Andric   // PERMV
3092e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permd:
3093e8d8bef9SDimitry Andric   case Intrinsic::x86_avx2_permps: {
3094e8d8bef9SDimitry Andric     simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3095e8d8bef9SDimitry Andric     break;
3096e8d8bef9SDimitry Andric   }
3097e8d8bef9SDimitry Andric 
3098e8d8bef9SDimitry Andric   // SSE4A instructions leave the upper 64-bits of the 128-bit result
3099e8d8bef9SDimitry Andric   // in an undefined state.
3100e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrq:
3101e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_extrqi:
3102e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertq:
3103e8d8bef9SDimitry Andric   case Intrinsic::x86_sse4a_insertqi:
3104e8d8bef9SDimitry Andric     UndefElts.setHighBits(VWidth / 2);
3105e8d8bef9SDimitry Andric     break;
3106e8d8bef9SDimitry Andric   }
3107bdd1243dSDimitry Andric   return std::nullopt;
3108e8d8bef9SDimitry Andric }
3109