1e8d8bef9SDimitry Andric //===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric /// \file 9e8d8bef9SDimitry Andric /// This file implements a TargetTransformInfo analysis pass specific to the 10e8d8bef9SDimitry Andric /// X86 target machine. It uses the target's detailed information to provide 11e8d8bef9SDimitry Andric /// more precise answers to certain TTI queries, while letting the target 12e8d8bef9SDimitry Andric /// independent and default TTI implementations handle the rest. 13e8d8bef9SDimitry Andric /// 14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric #include "X86TargetTransformInfo.h" 17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 18e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsX86.h" 19e8d8bef9SDimitry Andric #include "llvm/Support/KnownBits.h" 20e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h" 21bdd1243dSDimitry Andric #include <optional> 22e8d8bef9SDimitry Andric 23e8d8bef9SDimitry Andric using namespace llvm; 24e8d8bef9SDimitry Andric 25e8d8bef9SDimitry Andric #define DEBUG_TYPE "x86tti" 26e8d8bef9SDimitry Andric 27e8d8bef9SDimitry Andric /// Return a constant boolean vector that has true elements in all positions 28e8d8bef9SDimitry Andric /// where the input constant data vector has an element with the sign bit set. 29e8d8bef9SDimitry Andric static Constant *getNegativeIsTrueBoolVec(Constant *V) { 30e8d8bef9SDimitry Andric VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType())); 31e8d8bef9SDimitry Andric V = ConstantExpr::getBitCast(V, IntTy); 32e8d8bef9SDimitry Andric V = ConstantExpr::getICmp(CmpInst::ICMP_SGT, Constant::getNullValue(IntTy), 33e8d8bef9SDimitry Andric V); 34e8d8bef9SDimitry Andric return V; 35e8d8bef9SDimitry Andric } 36e8d8bef9SDimitry Andric 37e8d8bef9SDimitry Andric /// Convert the x86 XMM integer vector mask to a vector of bools based on 38e8d8bef9SDimitry Andric /// each element's most significant bit (the sign bit). 39e8d8bef9SDimitry Andric static Value *getBoolVecFromMask(Value *Mask) { 40e8d8bef9SDimitry Andric // Fold Constant Mask. 41e8d8bef9SDimitry Andric if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) 42e8d8bef9SDimitry Andric return getNegativeIsTrueBoolVec(ConstantMask); 43e8d8bef9SDimitry Andric 44e8d8bef9SDimitry Andric // Mask was extended from a boolean vector. 45e8d8bef9SDimitry Andric Value *ExtMask; 46e8d8bef9SDimitry Andric if (PatternMatch::match( 47e8d8bef9SDimitry Andric Mask, PatternMatch::m_SExt(PatternMatch::m_Value(ExtMask))) && 48e8d8bef9SDimitry Andric ExtMask->getType()->isIntOrIntVectorTy(1)) 49e8d8bef9SDimitry Andric return ExtMask; 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric return nullptr; 52e8d8bef9SDimitry Andric } 53e8d8bef9SDimitry Andric 54e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an 55e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics 56e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 57e8d8bef9SDimitry Andric static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { 58e8d8bef9SDimitry Andric Value *Ptr = II.getOperand(0); 59e8d8bef9SDimitry Andric Value *Mask = II.getOperand(1); 60e8d8bef9SDimitry Andric Constant *ZeroVec = Constant::getNullValue(II.getType()); 61e8d8bef9SDimitry Andric 62e8d8bef9SDimitry Andric // Zero Mask - masked load instruction creates a zero vector. 63e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) 64e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ZeroVec); 65e8d8bef9SDimitry Andric 66e8d8bef9SDimitry Andric // The mask is constant or extended from a bool vector. Convert this x86 67e8d8bef9SDimitry Andric // intrinsic to the LLVM intrinsic to allow target-independent optimizations. 68e8d8bef9SDimitry Andric if (Value *BoolMask = getBoolVecFromMask(Mask)) { 69e8d8bef9SDimitry Andric // First, cast the x86 intrinsic scalar pointer to a vector pointer to match 70e8d8bef9SDimitry Andric // the LLVM intrinsic definition for the pointer argument. 71e8d8bef9SDimitry Andric unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 72e8d8bef9SDimitry Andric PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); 73e8d8bef9SDimitry Andric Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 74e8d8bef9SDimitry Andric 75e8d8bef9SDimitry Andric // The pass-through vector for an x86 masked load is a zero vector. 76fe6060f1SDimitry Andric CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad( 77fe6060f1SDimitry Andric II.getType(), PtrCast, Align(1), BoolMask, ZeroVec); 78e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, NewMaskedLoad); 79e8d8bef9SDimitry Andric } 80e8d8bef9SDimitry Andric 81e8d8bef9SDimitry Andric return nullptr; 82e8d8bef9SDimitry Andric } 83e8d8bef9SDimitry Andric 84e8d8bef9SDimitry Andric // TODO: If the x86 backend knew how to convert a bool vector mask back to an 85e8d8bef9SDimitry Andric // XMM register mask efficiently, we could transform all x86 masked intrinsics 86e8d8bef9SDimitry Andric // to LLVM masked intrinsics and remove the x86 masked intrinsic defs. 87e8d8bef9SDimitry Andric static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { 88e8d8bef9SDimitry Andric Value *Ptr = II.getOperand(0); 89e8d8bef9SDimitry Andric Value *Mask = II.getOperand(1); 90e8d8bef9SDimitry Andric Value *Vec = II.getOperand(2); 91e8d8bef9SDimitry Andric 92e8d8bef9SDimitry Andric // Zero Mask - this masked store instruction does nothing. 93e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) { 94e8d8bef9SDimitry Andric IC.eraseInstFromFunction(II); 95e8d8bef9SDimitry Andric return true; 96e8d8bef9SDimitry Andric } 97e8d8bef9SDimitry Andric 98e8d8bef9SDimitry Andric // The SSE2 version is too weird (eg, unaligned but non-temporal) to do 99e8d8bef9SDimitry Andric // anything else at this level. 100e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu) 101e8d8bef9SDimitry Andric return false; 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric // The mask is constant or extended from a bool vector. Convert this x86 104e8d8bef9SDimitry Andric // intrinsic to the LLVM intrinsic to allow target-independent optimizations. 105e8d8bef9SDimitry Andric if (Value *BoolMask = getBoolVecFromMask(Mask)) { 106e8d8bef9SDimitry Andric unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); 107e8d8bef9SDimitry Andric PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); 108e8d8bef9SDimitry Andric Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); 109e8d8bef9SDimitry Andric 110e8d8bef9SDimitry Andric IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask); 111e8d8bef9SDimitry Andric 112e8d8bef9SDimitry Andric // 'Replace uses' doesn't work for stores. Erase the original masked store. 113e8d8bef9SDimitry Andric IC.eraseInstFromFunction(II); 114e8d8bef9SDimitry Andric return true; 115e8d8bef9SDimitry Andric } 116e8d8bef9SDimitry Andric 117e8d8bef9SDimitry Andric return false; 118e8d8bef9SDimitry Andric } 119e8d8bef9SDimitry Andric 120e8d8bef9SDimitry Andric static Value *simplifyX86immShift(const IntrinsicInst &II, 121e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 122e8d8bef9SDimitry Andric bool LogicalShift = false; 123e8d8bef9SDimitry Andric bool ShiftLeft = false; 124e8d8bef9SDimitry Andric bool IsImm = false; 125e8d8bef9SDimitry Andric 126e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 127e8d8bef9SDimitry Andric default: 128e8d8bef9SDimitry Andric llvm_unreachable("Unexpected intrinsic!"); 129e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_d: 130e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_w: 131e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_d: 132e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_w: 133e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_128: 134e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_256: 135e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_d_512: 136e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_512: 137e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_w_512: 138e8d8bef9SDimitry Andric IsImm = true; 139bdd1243dSDimitry Andric [[fallthrough]]; 140e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_d: 141e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_w: 142e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_d: 143e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_w: 144e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_128: 145e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_256: 146e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_d_512: 147e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_512: 148e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_w_512: 149e8d8bef9SDimitry Andric LogicalShift = false; 150e8d8bef9SDimitry Andric ShiftLeft = false; 151e8d8bef9SDimitry Andric break; 152e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_d: 153e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_q: 154e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_w: 155e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_d: 156e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_q: 157e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_w: 158e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_d_512: 159e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_q_512: 160e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_w_512: 161e8d8bef9SDimitry Andric IsImm = true; 162bdd1243dSDimitry Andric [[fallthrough]]; 163e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_d: 164e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_q: 165e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_w: 166e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_d: 167e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_q: 168e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_w: 169e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_d_512: 170e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_q_512: 171e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_w_512: 172e8d8bef9SDimitry Andric LogicalShift = true; 173e8d8bef9SDimitry Andric ShiftLeft = false; 174e8d8bef9SDimitry Andric break; 175e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_d: 176e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_q: 177e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_w: 178e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_d: 179e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_q: 180e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_w: 181e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_d_512: 182e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_q_512: 183e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_w_512: 184e8d8bef9SDimitry Andric IsImm = true; 185bdd1243dSDimitry Andric [[fallthrough]]; 186e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_d: 187e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_q: 188e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_w: 189e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_d: 190e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_q: 191e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_w: 192e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_d_512: 193e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_q_512: 194e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_w_512: 195e8d8bef9SDimitry Andric LogicalShift = true; 196e8d8bef9SDimitry Andric ShiftLeft = true; 197e8d8bef9SDimitry Andric break; 198e8d8bef9SDimitry Andric } 199e8d8bef9SDimitry Andric assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 200e8d8bef9SDimitry Andric 201fe6060f1SDimitry Andric Value *Vec = II.getArgOperand(0); 202fe6060f1SDimitry Andric Value *Amt = II.getArgOperand(1); 203fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(Vec->getType()); 204fe6060f1SDimitry Andric Type *SVT = VT->getElementType(); 205fe6060f1SDimitry Andric Type *AmtVT = Amt->getType(); 206e8d8bef9SDimitry Andric unsigned VWidth = VT->getNumElements(); 207e8d8bef9SDimitry Andric unsigned BitWidth = SVT->getPrimitiveSizeInBits(); 208e8d8bef9SDimitry Andric 209e8d8bef9SDimitry Andric // If the shift amount is guaranteed to be in-range we can replace it with a 210e8d8bef9SDimitry Andric // generic shift. If its guaranteed to be out of range, logical shifts combine 211e8d8bef9SDimitry Andric // to zero and arithmetic shifts are clamped to (BitWidth - 1). 212e8d8bef9SDimitry Andric if (IsImm) { 213e8d8bef9SDimitry Andric assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type"); 214e8d8bef9SDimitry Andric KnownBits KnownAmtBits = 215e8d8bef9SDimitry Andric llvm::computeKnownBits(Amt, II.getModule()->getDataLayout()); 216e8d8bef9SDimitry Andric if (KnownAmtBits.getMaxValue().ult(BitWidth)) { 217e8d8bef9SDimitry Andric Amt = Builder.CreateZExtOrTrunc(Amt, SVT); 218e8d8bef9SDimitry Andric Amt = Builder.CreateVectorSplat(VWidth, Amt); 219e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 220e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 221e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 222e8d8bef9SDimitry Andric } 223e8d8bef9SDimitry Andric if (KnownAmtBits.getMinValue().uge(BitWidth)) { 224e8d8bef9SDimitry Andric if (LogicalShift) 225e8d8bef9SDimitry Andric return ConstantAggregateZero::get(VT); 226e8d8bef9SDimitry Andric Amt = ConstantInt::get(SVT, BitWidth - 1); 227e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt)); 228e8d8bef9SDimitry Andric } 229e8d8bef9SDimitry Andric } else { 230e8d8bef9SDimitry Andric // Ensure the first element has an in-range value and the rest of the 231e8d8bef9SDimitry Andric // elements in the bottom 64 bits are zero. 232e8d8bef9SDimitry Andric assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && 233e8d8bef9SDimitry Andric cast<VectorType>(AmtVT)->getElementType() == SVT && 234e8d8bef9SDimitry Andric "Unexpected shift-by-scalar type"); 235e8d8bef9SDimitry Andric unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements(); 236e8d8bef9SDimitry Andric APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0); 237e8d8bef9SDimitry Andric APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2); 238e8d8bef9SDimitry Andric KnownBits KnownLowerBits = llvm::computeKnownBits( 239e8d8bef9SDimitry Andric Amt, DemandedLower, II.getModule()->getDataLayout()); 240e8d8bef9SDimitry Andric KnownBits KnownUpperBits = llvm::computeKnownBits( 241e8d8bef9SDimitry Andric Amt, DemandedUpper, II.getModule()->getDataLayout()); 242e8d8bef9SDimitry Andric if (KnownLowerBits.getMaxValue().ult(BitWidth) && 243349cc55cSDimitry Andric (DemandedUpper.isZero() || KnownUpperBits.isZero())) { 244e8d8bef9SDimitry Andric SmallVector<int, 16> ZeroSplat(VWidth, 0); 245e8d8bef9SDimitry Andric Amt = Builder.CreateShuffleVector(Amt, ZeroSplat); 246e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 247e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 248e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 249e8d8bef9SDimitry Andric } 250e8d8bef9SDimitry Andric } 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric // Simplify if count is constant vector. 253fe6060f1SDimitry Andric auto *CDV = dyn_cast<ConstantDataVector>(Amt); 254e8d8bef9SDimitry Andric if (!CDV) 255e8d8bef9SDimitry Andric return nullptr; 256e8d8bef9SDimitry Andric 257e8d8bef9SDimitry Andric // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector 258e8d8bef9SDimitry Andric // operand to compute the shift amount. 259e8d8bef9SDimitry Andric assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 && 260e8d8bef9SDimitry Andric cast<VectorType>(AmtVT)->getElementType() == SVT && 261e8d8bef9SDimitry Andric "Unexpected shift-by-scalar type"); 262e8d8bef9SDimitry Andric 263e8d8bef9SDimitry Andric // Concatenate the sub-elements to create the 64-bit value. 264e8d8bef9SDimitry Andric APInt Count(64, 0); 265e8d8bef9SDimitry Andric for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) { 266e8d8bef9SDimitry Andric unsigned SubEltIdx = (NumSubElts - 1) - i; 267fe6060f1SDimitry Andric auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); 268e8d8bef9SDimitry Andric Count <<= BitWidth; 269e8d8bef9SDimitry Andric Count |= SubElt->getValue().zextOrTrunc(64); 270e8d8bef9SDimitry Andric } 271e8d8bef9SDimitry Andric 272e8d8bef9SDimitry Andric // If shift-by-zero then just return the original value. 273349cc55cSDimitry Andric if (Count.isZero()) 274e8d8bef9SDimitry Andric return Vec; 275e8d8bef9SDimitry Andric 276e8d8bef9SDimitry Andric // Handle cases when Shift >= BitWidth. 277e8d8bef9SDimitry Andric if (Count.uge(BitWidth)) { 278e8d8bef9SDimitry Andric // If LogicalShift - just return zero. 279e8d8bef9SDimitry Andric if (LogicalShift) 280e8d8bef9SDimitry Andric return ConstantAggregateZero::get(VT); 281e8d8bef9SDimitry Andric 282e8d8bef9SDimitry Andric // If ArithmeticShift - clamp Shift to (BitWidth - 1). 283e8d8bef9SDimitry Andric Count = APInt(64, BitWidth - 1); 284e8d8bef9SDimitry Andric } 285e8d8bef9SDimitry Andric 286e8d8bef9SDimitry Andric // Get a constant vector of the same type as the first operand. 287e8d8bef9SDimitry Andric auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth)); 288e8d8bef9SDimitry Andric auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt); 289e8d8bef9SDimitry Andric 290e8d8bef9SDimitry Andric if (ShiftLeft) 291e8d8bef9SDimitry Andric return Builder.CreateShl(Vec, ShiftVec); 292e8d8bef9SDimitry Andric 293e8d8bef9SDimitry Andric if (LogicalShift) 294e8d8bef9SDimitry Andric return Builder.CreateLShr(Vec, ShiftVec); 295e8d8bef9SDimitry Andric 296e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, ShiftVec); 297e8d8bef9SDimitry Andric } 298e8d8bef9SDimitry Andric 299e8d8bef9SDimitry Andric // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift. 300e8d8bef9SDimitry Andric // Unlike the generic IR shifts, the intrinsics have defined behaviour for out 301e8d8bef9SDimitry Andric // of range shift amounts (logical - set to zero, arithmetic - splat sign bit). 302e8d8bef9SDimitry Andric static Value *simplifyX86varShift(const IntrinsicInst &II, 303e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 304e8d8bef9SDimitry Andric bool LogicalShift = false; 305e8d8bef9SDimitry Andric bool ShiftLeft = false; 306e8d8bef9SDimitry Andric 307e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 308e8d8bef9SDimitry Andric default: 309e8d8bef9SDimitry Andric llvm_unreachable("Unexpected intrinsic!"); 310e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 311e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: 312e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_128: 313e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_256: 314e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_d_512: 315e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_512: 316e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_128: 317e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_256: 318e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_512: 319e8d8bef9SDimitry Andric LogicalShift = false; 320e8d8bef9SDimitry Andric ShiftLeft = false; 321e8d8bef9SDimitry Andric break; 322e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 323e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 324e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 325e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 326e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_d_512: 327e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_q_512: 328e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_128: 329e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_256: 330e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_512: 331e8d8bef9SDimitry Andric LogicalShift = true; 332e8d8bef9SDimitry Andric ShiftLeft = false; 333e8d8bef9SDimitry Andric break; 334e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 335e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 336e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 337e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 338e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_d_512: 339e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_q_512: 340e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_128: 341e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_256: 342e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_512: 343e8d8bef9SDimitry Andric LogicalShift = true; 344e8d8bef9SDimitry Andric ShiftLeft = true; 345e8d8bef9SDimitry Andric break; 346e8d8bef9SDimitry Andric } 347e8d8bef9SDimitry Andric assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left"); 348e8d8bef9SDimitry Andric 349fe6060f1SDimitry Andric Value *Vec = II.getArgOperand(0); 350fe6060f1SDimitry Andric Value *Amt = II.getArgOperand(1); 351fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(II.getType()); 352fe6060f1SDimitry Andric Type *SVT = VT->getElementType(); 353e8d8bef9SDimitry Andric int NumElts = VT->getNumElements(); 354e8d8bef9SDimitry Andric int BitWidth = SVT->getIntegerBitWidth(); 355e8d8bef9SDimitry Andric 356e8d8bef9SDimitry Andric // If the shift amount is guaranteed to be in-range we can replace it with a 357e8d8bef9SDimitry Andric // generic shift. 35881ad6265SDimitry Andric KnownBits KnownAmt = 35981ad6265SDimitry Andric llvm::computeKnownBits(Amt, II.getModule()->getDataLayout()); 36081ad6265SDimitry Andric if (KnownAmt.getMaxValue().ult(BitWidth)) { 361e8d8bef9SDimitry Andric return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) 362e8d8bef9SDimitry Andric : Builder.CreateLShr(Vec, Amt)) 363e8d8bef9SDimitry Andric : Builder.CreateAShr(Vec, Amt)); 364e8d8bef9SDimitry Andric } 365e8d8bef9SDimitry Andric 366e8d8bef9SDimitry Andric // Simplify if all shift amounts are constant/undef. 367e8d8bef9SDimitry Andric auto *CShift = dyn_cast<Constant>(Amt); 368e8d8bef9SDimitry Andric if (!CShift) 369e8d8bef9SDimitry Andric return nullptr; 370e8d8bef9SDimitry Andric 371e8d8bef9SDimitry Andric // Collect each element's shift amount. 372e8d8bef9SDimitry Andric // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth. 373e8d8bef9SDimitry Andric bool AnyOutOfRange = false; 374e8d8bef9SDimitry Andric SmallVector<int, 8> ShiftAmts; 375e8d8bef9SDimitry Andric for (int I = 0; I < NumElts; ++I) { 376e8d8bef9SDimitry Andric auto *CElt = CShift->getAggregateElement(I); 377e8d8bef9SDimitry Andric if (isa_and_nonnull<UndefValue>(CElt)) { 378e8d8bef9SDimitry Andric ShiftAmts.push_back(-1); 379e8d8bef9SDimitry Andric continue; 380e8d8bef9SDimitry Andric } 381e8d8bef9SDimitry Andric 382e8d8bef9SDimitry Andric auto *COp = dyn_cast_or_null<ConstantInt>(CElt); 383e8d8bef9SDimitry Andric if (!COp) 384e8d8bef9SDimitry Andric return nullptr; 385e8d8bef9SDimitry Andric 386e8d8bef9SDimitry Andric // Handle out of range shifts. 387e8d8bef9SDimitry Andric // If LogicalShift - set to BitWidth (special case). 388e8d8bef9SDimitry Andric // If ArithmeticShift - set to (BitWidth - 1) (sign splat). 389e8d8bef9SDimitry Andric APInt ShiftVal = COp->getValue(); 390e8d8bef9SDimitry Andric if (ShiftVal.uge(BitWidth)) { 391e8d8bef9SDimitry Andric AnyOutOfRange = LogicalShift; 392e8d8bef9SDimitry Andric ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1); 393e8d8bef9SDimitry Andric continue; 394e8d8bef9SDimitry Andric } 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric ShiftAmts.push_back((int)ShiftVal.getZExtValue()); 397e8d8bef9SDimitry Andric } 398e8d8bef9SDimitry Andric 399e8d8bef9SDimitry Andric // If all elements out of range or UNDEF, return vector of zeros/undefs. 400e8d8bef9SDimitry Andric // ArithmeticShift should only hit this if they are all UNDEF. 401e8d8bef9SDimitry Andric auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; 402e8d8bef9SDimitry Andric if (llvm::all_of(ShiftAmts, OutOfRange)) { 403e8d8bef9SDimitry Andric SmallVector<Constant *, 8> ConstantVec; 404e8d8bef9SDimitry Andric for (int Idx : ShiftAmts) { 405e8d8bef9SDimitry Andric if (Idx < 0) { 406e8d8bef9SDimitry Andric ConstantVec.push_back(UndefValue::get(SVT)); 407e8d8bef9SDimitry Andric } else { 408e8d8bef9SDimitry Andric assert(LogicalShift && "Logical shift expected"); 409e8d8bef9SDimitry Andric ConstantVec.push_back(ConstantInt::getNullValue(SVT)); 410e8d8bef9SDimitry Andric } 411e8d8bef9SDimitry Andric } 412e8d8bef9SDimitry Andric return ConstantVector::get(ConstantVec); 413e8d8bef9SDimitry Andric } 414e8d8bef9SDimitry Andric 415e8d8bef9SDimitry Andric // We can't handle only some out of range values with generic logical shifts. 416e8d8bef9SDimitry Andric if (AnyOutOfRange) 417e8d8bef9SDimitry Andric return nullptr; 418e8d8bef9SDimitry Andric 419e8d8bef9SDimitry Andric // Build the shift amount constant vector. 420e8d8bef9SDimitry Andric SmallVector<Constant *, 8> ShiftVecAmts; 421e8d8bef9SDimitry Andric for (int Idx : ShiftAmts) { 422e8d8bef9SDimitry Andric if (Idx < 0) 423e8d8bef9SDimitry Andric ShiftVecAmts.push_back(UndefValue::get(SVT)); 424e8d8bef9SDimitry Andric else 425e8d8bef9SDimitry Andric ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx)); 426e8d8bef9SDimitry Andric } 427e8d8bef9SDimitry Andric auto ShiftVec = ConstantVector::get(ShiftVecAmts); 428e8d8bef9SDimitry Andric 429e8d8bef9SDimitry Andric if (ShiftLeft) 430e8d8bef9SDimitry Andric return Builder.CreateShl(Vec, ShiftVec); 431e8d8bef9SDimitry Andric 432e8d8bef9SDimitry Andric if (LogicalShift) 433e8d8bef9SDimitry Andric return Builder.CreateLShr(Vec, ShiftVec); 434e8d8bef9SDimitry Andric 435e8d8bef9SDimitry Andric return Builder.CreateAShr(Vec, ShiftVec); 436e8d8bef9SDimitry Andric } 437e8d8bef9SDimitry Andric 438e8d8bef9SDimitry Andric static Value *simplifyX86pack(IntrinsicInst &II, 439e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder, bool IsSigned) { 440e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 441e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 442e8d8bef9SDimitry Andric Type *ResTy = II.getType(); 443e8d8bef9SDimitry Andric 444e8d8bef9SDimitry Andric // Fast all undef handling. 445e8d8bef9SDimitry Andric if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1)) 446e8d8bef9SDimitry Andric return UndefValue::get(ResTy); 447e8d8bef9SDimitry Andric 448e8d8bef9SDimitry Andric auto *ArgTy = cast<FixedVectorType>(Arg0->getType()); 449e8d8bef9SDimitry Andric unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128; 450e8d8bef9SDimitry Andric unsigned NumSrcElts = ArgTy->getNumElements(); 451e8d8bef9SDimitry Andric assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) && 452e8d8bef9SDimitry Andric "Unexpected packing types"); 453e8d8bef9SDimitry Andric 454e8d8bef9SDimitry Andric unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; 455e8d8bef9SDimitry Andric unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits(); 456e8d8bef9SDimitry Andric unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits(); 457e8d8bef9SDimitry Andric assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) && 458e8d8bef9SDimitry Andric "Unexpected packing types"); 459e8d8bef9SDimitry Andric 460e8d8bef9SDimitry Andric // Constant folding. 461e8d8bef9SDimitry Andric if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1)) 462e8d8bef9SDimitry Andric return nullptr; 463e8d8bef9SDimitry Andric 464e8d8bef9SDimitry Andric // Clamp Values - signed/unsigned both use signed clamp values, but they 465e8d8bef9SDimitry Andric // differ on the min/max values. 466e8d8bef9SDimitry Andric APInt MinValue, MaxValue; 467e8d8bef9SDimitry Andric if (IsSigned) { 468e8d8bef9SDimitry Andric // PACKSS: Truncate signed value with signed saturation. 469e8d8bef9SDimitry Andric // Source values less than dst minint are saturated to minint. 470e8d8bef9SDimitry Andric // Source values greater than dst maxint are saturated to maxint. 471e8d8bef9SDimitry Andric MinValue = 472e8d8bef9SDimitry Andric APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 473e8d8bef9SDimitry Andric MaxValue = 474e8d8bef9SDimitry Andric APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits); 475e8d8bef9SDimitry Andric } else { 476e8d8bef9SDimitry Andric // PACKUS: Truncate signed value with unsigned saturation. 477e8d8bef9SDimitry Andric // Source values less than zero are saturated to zero. 478e8d8bef9SDimitry Andric // Source values greater than dst maxuint are saturated to maxuint. 479349cc55cSDimitry Andric MinValue = APInt::getZero(SrcScalarSizeInBits); 480e8d8bef9SDimitry Andric MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits); 481e8d8bef9SDimitry Andric } 482e8d8bef9SDimitry Andric 483e8d8bef9SDimitry Andric auto *MinC = Constant::getIntegerValue(ArgTy, MinValue); 484e8d8bef9SDimitry Andric auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue); 485e8d8bef9SDimitry Andric Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0); 486e8d8bef9SDimitry Andric Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1); 487e8d8bef9SDimitry Andric Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0); 488e8d8bef9SDimitry Andric Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1); 489e8d8bef9SDimitry Andric 490e8d8bef9SDimitry Andric // Shuffle clamped args together at the lane level. 491e8d8bef9SDimitry Andric SmallVector<int, 32> PackMask; 492e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 493e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 494e8d8bef9SDimitry Andric PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); 495e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) 496e8d8bef9SDimitry Andric PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts); 497e8d8bef9SDimitry Andric } 498e8d8bef9SDimitry Andric auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask); 499e8d8bef9SDimitry Andric 500e8d8bef9SDimitry Andric // Truncate to dst size. 501e8d8bef9SDimitry Andric return Builder.CreateTrunc(Shuffle, ResTy); 502e8d8bef9SDimitry Andric } 503e8d8bef9SDimitry Andric 504e8d8bef9SDimitry Andric static Value *simplifyX86movmsk(const IntrinsicInst &II, 505e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 506e8d8bef9SDimitry Andric Value *Arg = II.getArgOperand(0); 507e8d8bef9SDimitry Andric Type *ResTy = II.getType(); 508e8d8bef9SDimitry Andric 509e8d8bef9SDimitry Andric // movmsk(undef) -> zero as we must ensure the upper bits are zero. 510e8d8bef9SDimitry Andric if (isa<UndefValue>(Arg)) 511e8d8bef9SDimitry Andric return Constant::getNullValue(ResTy); 512e8d8bef9SDimitry Andric 513e8d8bef9SDimitry Andric auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType()); 514e8d8bef9SDimitry Andric // We can't easily peek through x86_mmx types. 515e8d8bef9SDimitry Andric if (!ArgTy) 516e8d8bef9SDimitry Andric return nullptr; 517e8d8bef9SDimitry Andric 518e8d8bef9SDimitry Andric // Expand MOVMSK to compare/bitcast/zext: 519e8d8bef9SDimitry Andric // e.g. PMOVMSKB(v16i8 x): 520e8d8bef9SDimitry Andric // %cmp = icmp slt <16 x i8> %x, zeroinitializer 521e8d8bef9SDimitry Andric // %int = bitcast <16 x i1> %cmp to i16 522e8d8bef9SDimitry Andric // %res = zext i16 %int to i32 523e8d8bef9SDimitry Andric unsigned NumElts = ArgTy->getNumElements(); 524e8d8bef9SDimitry Andric Type *IntegerTy = Builder.getIntNTy(NumElts); 525e8d8bef9SDimitry Andric 52681ad6265SDimitry Andric Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy)); 52781ad6265SDimitry Andric Res = Builder.CreateIsNeg(Res); 528e8d8bef9SDimitry Andric Res = Builder.CreateBitCast(Res, IntegerTy); 529e8d8bef9SDimitry Andric Res = Builder.CreateZExtOrTrunc(Res, ResTy); 530e8d8bef9SDimitry Andric return Res; 531e8d8bef9SDimitry Andric } 532e8d8bef9SDimitry Andric 533e8d8bef9SDimitry Andric static Value *simplifyX86addcarry(const IntrinsicInst &II, 534e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 535e8d8bef9SDimitry Andric Value *CarryIn = II.getArgOperand(0); 536e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 537e8d8bef9SDimitry Andric Value *Op2 = II.getArgOperand(2); 538e8d8bef9SDimitry Andric Type *RetTy = II.getType(); 539e8d8bef9SDimitry Andric Type *OpTy = Op1->getType(); 540e8d8bef9SDimitry Andric assert(RetTy->getStructElementType(0)->isIntegerTy(8) && 541e8d8bef9SDimitry Andric RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() && 542e8d8bef9SDimitry Andric "Unexpected types for x86 addcarry"); 543e8d8bef9SDimitry Andric 544e8d8bef9SDimitry Andric // If carry-in is zero, this is just an unsigned add with overflow. 545e8d8bef9SDimitry Andric if (match(CarryIn, PatternMatch::m_ZeroInt())) { 546e8d8bef9SDimitry Andric Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy, 547e8d8bef9SDimitry Andric {Op1, Op2}); 548e8d8bef9SDimitry Andric // The types have to be adjusted to match the x86 call types. 549e8d8bef9SDimitry Andric Value *UAddResult = Builder.CreateExtractValue(UAdd, 0); 550e8d8bef9SDimitry Andric Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1), 551e8d8bef9SDimitry Andric Builder.getInt8Ty()); 552bdd1243dSDimitry Andric Value *Res = PoisonValue::get(RetTy); 553e8d8bef9SDimitry Andric Res = Builder.CreateInsertValue(Res, UAddOV, 0); 554e8d8bef9SDimitry Andric return Builder.CreateInsertValue(Res, UAddResult, 1); 555e8d8bef9SDimitry Andric } 556e8d8bef9SDimitry Andric 557e8d8bef9SDimitry Andric return nullptr; 558e8d8bef9SDimitry Andric } 559e8d8bef9SDimitry Andric 560*06c3fb27SDimitry Andric static Value *simplifyTernarylogic(const IntrinsicInst &II, 561*06c3fb27SDimitry Andric InstCombiner::BuilderTy &Builder) { 562*06c3fb27SDimitry Andric 563*06c3fb27SDimitry Andric auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3)); 564*06c3fb27SDimitry Andric if (!ArgImm || ArgImm->getValue().uge(256)) 565*06c3fb27SDimitry Andric return nullptr; 566*06c3fb27SDimitry Andric 567*06c3fb27SDimitry Andric Value *ArgA = II.getArgOperand(0); 568*06c3fb27SDimitry Andric Value *ArgB = II.getArgOperand(1); 569*06c3fb27SDimitry Andric Value *ArgC = II.getArgOperand(2); 570*06c3fb27SDimitry Andric 571*06c3fb27SDimitry Andric Type *Ty = II.getType(); 572*06c3fb27SDimitry Andric 573*06c3fb27SDimitry Andric auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 574*06c3fb27SDimitry Andric return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second}; 575*06c3fb27SDimitry Andric }; 576*06c3fb27SDimitry Andric auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 577*06c3fb27SDimitry Andric return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second}; 578*06c3fb27SDimitry Andric }; 579*06c3fb27SDimitry Andric auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> { 580*06c3fb27SDimitry Andric return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second}; 581*06c3fb27SDimitry Andric }; 582*06c3fb27SDimitry Andric auto Not = [&](auto V) -> std::pair<Value *, uint8_t> { 583*06c3fb27SDimitry Andric return {Builder.CreateNot(V.first), ~V.second}; 584*06c3fb27SDimitry Andric }; 585*06c3fb27SDimitry Andric auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); }; 586*06c3fb27SDimitry Andric auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); }; 587*06c3fb27SDimitry Andric auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); }; 588*06c3fb27SDimitry Andric 589*06c3fb27SDimitry Andric bool AIsConst = match(ArgA, PatternMatch::m_ImmConstant()); 590*06c3fb27SDimitry Andric bool BIsConst = match(ArgB, PatternMatch::m_ImmConstant()); 591*06c3fb27SDimitry Andric bool CIsConst = match(ArgC, PatternMatch::m_ImmConstant()); 592*06c3fb27SDimitry Andric 593*06c3fb27SDimitry Andric bool ABIsConst = AIsConst && BIsConst; 594*06c3fb27SDimitry Andric bool ACIsConst = AIsConst && CIsConst; 595*06c3fb27SDimitry Andric bool BCIsConst = BIsConst && CIsConst; 596*06c3fb27SDimitry Andric bool ABCIsConst = AIsConst && BIsConst && CIsConst; 597*06c3fb27SDimitry Andric 598*06c3fb27SDimitry Andric // Use for verification. Its a big table. Its difficult to go from Imm -> 599*06c3fb27SDimitry Andric // logic ops, but easy to verify that a set of logic ops is correct. We track 600*06c3fb27SDimitry Andric // the logic ops through the second value in the pair. At the end it should 601*06c3fb27SDimitry Andric // equal Imm. 602*06c3fb27SDimitry Andric std::pair<Value *, uint8_t> A = {ArgA, 0xf0}; 603*06c3fb27SDimitry Andric std::pair<Value *, uint8_t> B = {ArgB, 0xcc}; 604*06c3fb27SDimitry Andric std::pair<Value *, uint8_t> C = {ArgC, 0xaa}; 605*06c3fb27SDimitry Andric std::pair<Value *, uint8_t> Res = {nullptr, 0}; 606*06c3fb27SDimitry Andric 607*06c3fb27SDimitry Andric // Currently we only handle cases that convert directly to another instruction 608*06c3fb27SDimitry Andric // or cases where all the ops are constant. This is because we don't properly 609*06c3fb27SDimitry Andric // handle creating ternary ops in the backend, so splitting them here may 610*06c3fb27SDimitry Andric // cause regressions. As the backend improves, uncomment more cases. 611*06c3fb27SDimitry Andric 612*06c3fb27SDimitry Andric uint8_t Imm = ArgImm->getValue().getZExtValue(); 613*06c3fb27SDimitry Andric switch (Imm) { 614*06c3fb27SDimitry Andric case 0x0: 615*06c3fb27SDimitry Andric Res = {Constant::getNullValue(Ty), 0}; 616*06c3fb27SDimitry Andric break; 617*06c3fb27SDimitry Andric case 0x1: 618*06c3fb27SDimitry Andric if (ABCIsConst) 619*06c3fb27SDimitry Andric Res = Nor(Or(A, B), C); 620*06c3fb27SDimitry Andric break; 621*06c3fb27SDimitry Andric case 0x2: 622*06c3fb27SDimitry Andric if (ABCIsConst) 623*06c3fb27SDimitry Andric Res = And(Nor(A, B), C); 624*06c3fb27SDimitry Andric break; 625*06c3fb27SDimitry Andric case 0x3: 626*06c3fb27SDimitry Andric if (ABIsConst) 627*06c3fb27SDimitry Andric Res = Nor(A, B); 628*06c3fb27SDimitry Andric break; 629*06c3fb27SDimitry Andric case 0x4: 630*06c3fb27SDimitry Andric if (ABCIsConst) 631*06c3fb27SDimitry Andric Res = And(Nor(A, C), B); 632*06c3fb27SDimitry Andric break; 633*06c3fb27SDimitry Andric case 0x5: 634*06c3fb27SDimitry Andric if (ACIsConst) 635*06c3fb27SDimitry Andric Res = Nor(A, C); 636*06c3fb27SDimitry Andric break; 637*06c3fb27SDimitry Andric case 0x6: 638*06c3fb27SDimitry Andric if (ABCIsConst) 639*06c3fb27SDimitry Andric Res = Nor(A, Xnor(B, C)); 640*06c3fb27SDimitry Andric break; 641*06c3fb27SDimitry Andric case 0x7: 642*06c3fb27SDimitry Andric if (ABCIsConst) 643*06c3fb27SDimitry Andric Res = Nor(A, And(B, C)); 644*06c3fb27SDimitry Andric break; 645*06c3fb27SDimitry Andric case 0x8: 646*06c3fb27SDimitry Andric if (ABCIsConst) 647*06c3fb27SDimitry Andric Res = Nor(A, Nand(B, C)); 648*06c3fb27SDimitry Andric break; 649*06c3fb27SDimitry Andric case 0x9: 650*06c3fb27SDimitry Andric if (ABCIsConst) 651*06c3fb27SDimitry Andric Res = Nor(A, Xor(B, C)); 652*06c3fb27SDimitry Andric break; 653*06c3fb27SDimitry Andric case 0xa: 654*06c3fb27SDimitry Andric if (ACIsConst) 655*06c3fb27SDimitry Andric Res = Nor(A, Not(C)); 656*06c3fb27SDimitry Andric break; 657*06c3fb27SDimitry Andric case 0xb: 658*06c3fb27SDimitry Andric if (ABCIsConst) 659*06c3fb27SDimitry Andric Res = Nor(A, Nor(C, Not(B))); 660*06c3fb27SDimitry Andric break; 661*06c3fb27SDimitry Andric case 0xc: 662*06c3fb27SDimitry Andric if (ABIsConst) 663*06c3fb27SDimitry Andric Res = Nor(A, Not(B)); 664*06c3fb27SDimitry Andric break; 665*06c3fb27SDimitry Andric case 0xd: 666*06c3fb27SDimitry Andric if (ABCIsConst) 667*06c3fb27SDimitry Andric Res = Nor(A, Nor(B, Not(C))); 668*06c3fb27SDimitry Andric break; 669*06c3fb27SDimitry Andric case 0xe: 670*06c3fb27SDimitry Andric if (ABCIsConst) 671*06c3fb27SDimitry Andric Res = Nor(A, Nor(B, C)); 672*06c3fb27SDimitry Andric break; 673*06c3fb27SDimitry Andric case 0xf: 674*06c3fb27SDimitry Andric Res = Not(A); 675*06c3fb27SDimitry Andric break; 676*06c3fb27SDimitry Andric case 0x10: 677*06c3fb27SDimitry Andric if (ABCIsConst) 678*06c3fb27SDimitry Andric Res = And(A, Nor(B, C)); 679*06c3fb27SDimitry Andric break; 680*06c3fb27SDimitry Andric case 0x11: 681*06c3fb27SDimitry Andric if (BCIsConst) 682*06c3fb27SDimitry Andric Res = Nor(B, C); 683*06c3fb27SDimitry Andric break; 684*06c3fb27SDimitry Andric case 0x12: 685*06c3fb27SDimitry Andric if (ABCIsConst) 686*06c3fb27SDimitry Andric Res = Nor(Xnor(A, C), B); 687*06c3fb27SDimitry Andric break; 688*06c3fb27SDimitry Andric case 0x13: 689*06c3fb27SDimitry Andric if (ABCIsConst) 690*06c3fb27SDimitry Andric Res = Nor(And(A, C), B); 691*06c3fb27SDimitry Andric break; 692*06c3fb27SDimitry Andric case 0x14: 693*06c3fb27SDimitry Andric if (ABCIsConst) 694*06c3fb27SDimitry Andric Res = Nor(Xnor(A, B), C); 695*06c3fb27SDimitry Andric break; 696*06c3fb27SDimitry Andric case 0x15: 697*06c3fb27SDimitry Andric if (ABCIsConst) 698*06c3fb27SDimitry Andric Res = Nor(And(A, B), C); 699*06c3fb27SDimitry Andric break; 700*06c3fb27SDimitry Andric case 0x16: 701*06c3fb27SDimitry Andric if (ABCIsConst) 702*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), And(Nand(A, B), C)); 703*06c3fb27SDimitry Andric break; 704*06c3fb27SDimitry Andric case 0x17: 705*06c3fb27SDimitry Andric if (ABCIsConst) 706*06c3fb27SDimitry Andric Res = Xor(Or(A, B), Or(Xnor(A, B), C)); 707*06c3fb27SDimitry Andric break; 708*06c3fb27SDimitry Andric case 0x18: 709*06c3fb27SDimitry Andric if (ABCIsConst) 710*06c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Xnor(A, C)); 711*06c3fb27SDimitry Andric break; 712*06c3fb27SDimitry Andric case 0x19: 713*06c3fb27SDimitry Andric if (ABCIsConst) 714*06c3fb27SDimitry Andric Res = And(Nand(A, B), Xnor(B, C)); 715*06c3fb27SDimitry Andric break; 716*06c3fb27SDimitry Andric case 0x1a: 717*06c3fb27SDimitry Andric if (ABCIsConst) 718*06c3fb27SDimitry Andric Res = Xor(A, Or(And(A, B), C)); 719*06c3fb27SDimitry Andric break; 720*06c3fb27SDimitry Andric case 0x1b: 721*06c3fb27SDimitry Andric if (ABCIsConst) 722*06c3fb27SDimitry Andric Res = Xor(A, Or(Xnor(A, B), C)); 723*06c3fb27SDimitry Andric break; 724*06c3fb27SDimitry Andric case 0x1c: 725*06c3fb27SDimitry Andric if (ABCIsConst) 726*06c3fb27SDimitry Andric Res = Xor(A, Or(And(A, C), B)); 727*06c3fb27SDimitry Andric break; 728*06c3fb27SDimitry Andric case 0x1d: 729*06c3fb27SDimitry Andric if (ABCIsConst) 730*06c3fb27SDimitry Andric Res = Xor(A, Or(Xnor(A, C), B)); 731*06c3fb27SDimitry Andric break; 732*06c3fb27SDimitry Andric case 0x1e: 733*06c3fb27SDimitry Andric if (ABCIsConst) 734*06c3fb27SDimitry Andric Res = Xor(A, Or(B, C)); 735*06c3fb27SDimitry Andric break; 736*06c3fb27SDimitry Andric case 0x1f: 737*06c3fb27SDimitry Andric if (ABCIsConst) 738*06c3fb27SDimitry Andric Res = Nand(A, Or(B, C)); 739*06c3fb27SDimitry Andric break; 740*06c3fb27SDimitry Andric case 0x20: 741*06c3fb27SDimitry Andric if (ABCIsConst) 742*06c3fb27SDimitry Andric Res = Nor(Nand(A, C), B); 743*06c3fb27SDimitry Andric break; 744*06c3fb27SDimitry Andric case 0x21: 745*06c3fb27SDimitry Andric if (ABCIsConst) 746*06c3fb27SDimitry Andric Res = Nor(Xor(A, C), B); 747*06c3fb27SDimitry Andric break; 748*06c3fb27SDimitry Andric case 0x22: 749*06c3fb27SDimitry Andric if (BCIsConst) 750*06c3fb27SDimitry Andric Res = Nor(B, Not(C)); 751*06c3fb27SDimitry Andric break; 752*06c3fb27SDimitry Andric case 0x23: 753*06c3fb27SDimitry Andric if (ABCIsConst) 754*06c3fb27SDimitry Andric Res = Nor(B, Nor(C, Not(A))); 755*06c3fb27SDimitry Andric break; 756*06c3fb27SDimitry Andric case 0x24: 757*06c3fb27SDimitry Andric if (ABCIsConst) 758*06c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Xor(A, C)); 759*06c3fb27SDimitry Andric break; 760*06c3fb27SDimitry Andric case 0x25: 761*06c3fb27SDimitry Andric if (ABCIsConst) 762*06c3fb27SDimitry Andric Res = Xor(A, Nand(Nand(A, B), C)); 763*06c3fb27SDimitry Andric break; 764*06c3fb27SDimitry Andric case 0x26: 765*06c3fb27SDimitry Andric if (ABCIsConst) 766*06c3fb27SDimitry Andric Res = And(Nand(A, B), Xor(B, C)); 767*06c3fb27SDimitry Andric break; 768*06c3fb27SDimitry Andric case 0x27: 769*06c3fb27SDimitry Andric if (ABCIsConst) 770*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), C), B); 771*06c3fb27SDimitry Andric break; 772*06c3fb27SDimitry Andric case 0x28: 773*06c3fb27SDimitry Andric if (ABCIsConst) 774*06c3fb27SDimitry Andric Res = And(Xor(A, B), C); 775*06c3fb27SDimitry Andric break; 776*06c3fb27SDimitry Andric case 0x29: 777*06c3fb27SDimitry Andric if (ABCIsConst) 778*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nor(And(A, B), C)); 779*06c3fb27SDimitry Andric break; 780*06c3fb27SDimitry Andric case 0x2a: 781*06c3fb27SDimitry Andric if (ABCIsConst) 782*06c3fb27SDimitry Andric Res = And(Nand(A, B), C); 783*06c3fb27SDimitry Andric break; 784*06c3fb27SDimitry Andric case 0x2b: 785*06c3fb27SDimitry Andric if (ABCIsConst) 786*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Xor(A, C)), A); 787*06c3fb27SDimitry Andric break; 788*06c3fb27SDimitry Andric case 0x2c: 789*06c3fb27SDimitry Andric if (ABCIsConst) 790*06c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Nor(B, C)); 791*06c3fb27SDimitry Andric break; 792*06c3fb27SDimitry Andric case 0x2d: 793*06c3fb27SDimitry Andric if (ABCIsConst) 794*06c3fb27SDimitry Andric Res = Xor(A, Or(B, Not(C))); 795*06c3fb27SDimitry Andric break; 796*06c3fb27SDimitry Andric case 0x2e: 797*06c3fb27SDimitry Andric if (ABCIsConst) 798*06c3fb27SDimitry Andric Res = Xor(A, Or(Xor(A, C), B)); 799*06c3fb27SDimitry Andric break; 800*06c3fb27SDimitry Andric case 0x2f: 801*06c3fb27SDimitry Andric if (ABCIsConst) 802*06c3fb27SDimitry Andric Res = Nand(A, Or(B, Not(C))); 803*06c3fb27SDimitry Andric break; 804*06c3fb27SDimitry Andric case 0x30: 805*06c3fb27SDimitry Andric if (ABIsConst) 806*06c3fb27SDimitry Andric Res = Nor(B, Not(A)); 807*06c3fb27SDimitry Andric break; 808*06c3fb27SDimitry Andric case 0x31: 809*06c3fb27SDimitry Andric if (ABCIsConst) 810*06c3fb27SDimitry Andric Res = Nor(Nor(A, Not(C)), B); 811*06c3fb27SDimitry Andric break; 812*06c3fb27SDimitry Andric case 0x32: 813*06c3fb27SDimitry Andric if (ABCIsConst) 814*06c3fb27SDimitry Andric Res = Nor(Nor(A, C), B); 815*06c3fb27SDimitry Andric break; 816*06c3fb27SDimitry Andric case 0x33: 817*06c3fb27SDimitry Andric Res = Not(B); 818*06c3fb27SDimitry Andric break; 819*06c3fb27SDimitry Andric case 0x34: 820*06c3fb27SDimitry Andric if (ABCIsConst) 821*06c3fb27SDimitry Andric Res = And(Xor(A, B), Nand(B, C)); 822*06c3fb27SDimitry Andric break; 823*06c3fb27SDimitry Andric case 0x35: 824*06c3fb27SDimitry Andric if (ABCIsConst) 825*06c3fb27SDimitry Andric Res = Xor(B, Or(A, Xnor(B, C))); 826*06c3fb27SDimitry Andric break; 827*06c3fb27SDimitry Andric case 0x36: 828*06c3fb27SDimitry Andric if (ABCIsConst) 829*06c3fb27SDimitry Andric Res = Xor(Or(A, C), B); 830*06c3fb27SDimitry Andric break; 831*06c3fb27SDimitry Andric case 0x37: 832*06c3fb27SDimitry Andric if (ABCIsConst) 833*06c3fb27SDimitry Andric Res = Nand(Or(A, C), B); 834*06c3fb27SDimitry Andric break; 835*06c3fb27SDimitry Andric case 0x38: 836*06c3fb27SDimitry Andric if (ABCIsConst) 837*06c3fb27SDimitry Andric Res = Nor(Xnor(A, B), Nor(A, C)); 838*06c3fb27SDimitry Andric break; 839*06c3fb27SDimitry Andric case 0x39: 840*06c3fb27SDimitry Andric if (ABCIsConst) 841*06c3fb27SDimitry Andric Res = Xor(Or(A, Not(C)), B); 842*06c3fb27SDimitry Andric break; 843*06c3fb27SDimitry Andric case 0x3a: 844*06c3fb27SDimitry Andric if (ABCIsConst) 845*06c3fb27SDimitry Andric Res = Xor(B, Or(A, Xor(B, C))); 846*06c3fb27SDimitry Andric break; 847*06c3fb27SDimitry Andric case 0x3b: 848*06c3fb27SDimitry Andric if (ABCIsConst) 849*06c3fb27SDimitry Andric Res = Nand(Or(A, Not(C)), B); 850*06c3fb27SDimitry Andric break; 851*06c3fb27SDimitry Andric case 0x3c: 852*06c3fb27SDimitry Andric Res = Xor(A, B); 853*06c3fb27SDimitry Andric break; 854*06c3fb27SDimitry Andric case 0x3d: 855*06c3fb27SDimitry Andric if (ABCIsConst) 856*06c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, C), B)); 857*06c3fb27SDimitry Andric break; 858*06c3fb27SDimitry Andric case 0x3e: 859*06c3fb27SDimitry Andric if (ABCIsConst) 860*06c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, Not(C)), B)); 861*06c3fb27SDimitry Andric break; 862*06c3fb27SDimitry Andric case 0x3f: 863*06c3fb27SDimitry Andric if (ABIsConst) 864*06c3fb27SDimitry Andric Res = Nand(A, B); 865*06c3fb27SDimitry Andric break; 866*06c3fb27SDimitry Andric case 0x40: 867*06c3fb27SDimitry Andric if (ABCIsConst) 868*06c3fb27SDimitry Andric Res = Nor(Nand(A, B), C); 869*06c3fb27SDimitry Andric break; 870*06c3fb27SDimitry Andric case 0x41: 871*06c3fb27SDimitry Andric if (ABCIsConst) 872*06c3fb27SDimitry Andric Res = Nor(Xor(A, B), C); 873*06c3fb27SDimitry Andric break; 874*06c3fb27SDimitry Andric case 0x42: 875*06c3fb27SDimitry Andric if (ABCIsConst) 876*06c3fb27SDimitry Andric Res = Nor(Xor(A, B), Xnor(A, C)); 877*06c3fb27SDimitry Andric break; 878*06c3fb27SDimitry Andric case 0x43: 879*06c3fb27SDimitry Andric if (ABCIsConst) 880*06c3fb27SDimitry Andric Res = Xor(A, Nand(Nand(A, C), B)); 881*06c3fb27SDimitry Andric break; 882*06c3fb27SDimitry Andric case 0x44: 883*06c3fb27SDimitry Andric if (BCIsConst) 884*06c3fb27SDimitry Andric Res = Nor(C, Not(B)); 885*06c3fb27SDimitry Andric break; 886*06c3fb27SDimitry Andric case 0x45: 887*06c3fb27SDimitry Andric if (ABCIsConst) 888*06c3fb27SDimitry Andric Res = Nor(Nor(B, Not(A)), C); 889*06c3fb27SDimitry Andric break; 890*06c3fb27SDimitry Andric case 0x46: 891*06c3fb27SDimitry Andric if (ABCIsConst) 892*06c3fb27SDimitry Andric Res = Xor(Or(And(A, C), B), C); 893*06c3fb27SDimitry Andric break; 894*06c3fb27SDimitry Andric case 0x47: 895*06c3fb27SDimitry Andric if (ABCIsConst) 896*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, C), B), C); 897*06c3fb27SDimitry Andric break; 898*06c3fb27SDimitry Andric case 0x48: 899*06c3fb27SDimitry Andric if (ABCIsConst) 900*06c3fb27SDimitry Andric Res = And(Xor(A, C), B); 901*06c3fb27SDimitry Andric break; 902*06c3fb27SDimitry Andric case 0x49: 903*06c3fb27SDimitry Andric if (ABCIsConst) 904*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), And(A, C)), C); 905*06c3fb27SDimitry Andric break; 906*06c3fb27SDimitry Andric case 0x4a: 907*06c3fb27SDimitry Andric if (ABCIsConst) 908*06c3fb27SDimitry Andric Res = Nor(Xnor(A, C), Nor(B, C)); 909*06c3fb27SDimitry Andric break; 910*06c3fb27SDimitry Andric case 0x4b: 911*06c3fb27SDimitry Andric if (ABCIsConst) 912*06c3fb27SDimitry Andric Res = Xor(A, Or(C, Not(B))); 913*06c3fb27SDimitry Andric break; 914*06c3fb27SDimitry Andric case 0x4c: 915*06c3fb27SDimitry Andric if (ABCIsConst) 916*06c3fb27SDimitry Andric Res = And(Nand(A, C), B); 917*06c3fb27SDimitry Andric break; 918*06c3fb27SDimitry Andric case 0x4d: 919*06c3fb27SDimitry Andric if (ABCIsConst) 920*06c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), Xnor(A, C)), A); 921*06c3fb27SDimitry Andric break; 922*06c3fb27SDimitry Andric case 0x4e: 923*06c3fb27SDimitry Andric if (ABCIsConst) 924*06c3fb27SDimitry Andric Res = Xor(A, Or(Xor(A, B), C)); 925*06c3fb27SDimitry Andric break; 926*06c3fb27SDimitry Andric case 0x4f: 927*06c3fb27SDimitry Andric if (ABCIsConst) 928*06c3fb27SDimitry Andric Res = Nand(A, Nand(B, Not(C))); 929*06c3fb27SDimitry Andric break; 930*06c3fb27SDimitry Andric case 0x50: 931*06c3fb27SDimitry Andric if (ACIsConst) 932*06c3fb27SDimitry Andric Res = Nor(C, Not(A)); 933*06c3fb27SDimitry Andric break; 934*06c3fb27SDimitry Andric case 0x51: 935*06c3fb27SDimitry Andric if (ABCIsConst) 936*06c3fb27SDimitry Andric Res = Nor(Nor(A, Not(B)), C); 937*06c3fb27SDimitry Andric break; 938*06c3fb27SDimitry Andric case 0x52: 939*06c3fb27SDimitry Andric if (ABCIsConst) 940*06c3fb27SDimitry Andric Res = And(Xor(A, C), Nand(B, C)); 941*06c3fb27SDimitry Andric break; 942*06c3fb27SDimitry Andric case 0x53: 943*06c3fb27SDimitry Andric if (ABCIsConst) 944*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(B, C), A), C); 945*06c3fb27SDimitry Andric break; 946*06c3fb27SDimitry Andric case 0x54: 947*06c3fb27SDimitry Andric if (ABCIsConst) 948*06c3fb27SDimitry Andric Res = Nor(Nor(A, B), C); 949*06c3fb27SDimitry Andric break; 950*06c3fb27SDimitry Andric case 0x55: 951*06c3fb27SDimitry Andric Res = Not(C); 952*06c3fb27SDimitry Andric break; 953*06c3fb27SDimitry Andric case 0x56: 954*06c3fb27SDimitry Andric if (ABCIsConst) 955*06c3fb27SDimitry Andric Res = Xor(Or(A, B), C); 956*06c3fb27SDimitry Andric break; 957*06c3fb27SDimitry Andric case 0x57: 958*06c3fb27SDimitry Andric if (ABCIsConst) 959*06c3fb27SDimitry Andric Res = Nand(Or(A, B), C); 960*06c3fb27SDimitry Andric break; 961*06c3fb27SDimitry Andric case 0x58: 962*06c3fb27SDimitry Andric if (ABCIsConst) 963*06c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xnor(A, C)); 964*06c3fb27SDimitry Andric break; 965*06c3fb27SDimitry Andric case 0x59: 966*06c3fb27SDimitry Andric if (ABCIsConst) 967*06c3fb27SDimitry Andric Res = Xor(Or(A, Not(B)), C); 968*06c3fb27SDimitry Andric break; 969*06c3fb27SDimitry Andric case 0x5a: 970*06c3fb27SDimitry Andric Res = Xor(A, C); 971*06c3fb27SDimitry Andric break; 972*06c3fb27SDimitry Andric case 0x5b: 973*06c3fb27SDimitry Andric if (ABCIsConst) 974*06c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, B), C)); 975*06c3fb27SDimitry Andric break; 976*06c3fb27SDimitry Andric case 0x5c: 977*06c3fb27SDimitry Andric if (ABCIsConst) 978*06c3fb27SDimitry Andric Res = Xor(Or(Xor(B, C), A), C); 979*06c3fb27SDimitry Andric break; 980*06c3fb27SDimitry Andric case 0x5d: 981*06c3fb27SDimitry Andric if (ABCIsConst) 982*06c3fb27SDimitry Andric Res = Nand(Or(A, Not(B)), C); 983*06c3fb27SDimitry Andric break; 984*06c3fb27SDimitry Andric case 0x5e: 985*06c3fb27SDimitry Andric if (ABCIsConst) 986*06c3fb27SDimitry Andric Res = Xor(A, Or(Nor(A, Not(B)), C)); 987*06c3fb27SDimitry Andric break; 988*06c3fb27SDimitry Andric case 0x5f: 989*06c3fb27SDimitry Andric if (ACIsConst) 990*06c3fb27SDimitry Andric Res = Nand(A, C); 991*06c3fb27SDimitry Andric break; 992*06c3fb27SDimitry Andric case 0x60: 993*06c3fb27SDimitry Andric if (ABCIsConst) 994*06c3fb27SDimitry Andric Res = And(A, Xor(B, C)); 995*06c3fb27SDimitry Andric break; 996*06c3fb27SDimitry Andric case 0x61: 997*06c3fb27SDimitry Andric if (ABCIsConst) 998*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), And(B, C)), C); 999*06c3fb27SDimitry Andric break; 1000*06c3fb27SDimitry Andric case 0x62: 1001*06c3fb27SDimitry Andric if (ABCIsConst) 1002*06c3fb27SDimitry Andric Res = Nor(Nor(A, C), Xnor(B, C)); 1003*06c3fb27SDimitry Andric break; 1004*06c3fb27SDimitry Andric case 0x63: 1005*06c3fb27SDimitry Andric if (ABCIsConst) 1006*06c3fb27SDimitry Andric Res = Xor(B, Or(C, Not(A))); 1007*06c3fb27SDimitry Andric break; 1008*06c3fb27SDimitry Andric case 0x64: 1009*06c3fb27SDimitry Andric if (ABCIsConst) 1010*06c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xnor(B, C)); 1011*06c3fb27SDimitry Andric break; 1012*06c3fb27SDimitry Andric case 0x65: 1013*06c3fb27SDimitry Andric if (ABCIsConst) 1014*06c3fb27SDimitry Andric Res = Xor(Or(B, Not(A)), C); 1015*06c3fb27SDimitry Andric break; 1016*06c3fb27SDimitry Andric case 0x66: 1017*06c3fb27SDimitry Andric Res = Xor(B, C); 1018*06c3fb27SDimitry Andric break; 1019*06c3fb27SDimitry Andric case 0x67: 1020*06c3fb27SDimitry Andric if (ABCIsConst) 1021*06c3fb27SDimitry Andric Res = Or(Nor(A, B), Xor(B, C)); 1022*06c3fb27SDimitry Andric break; 1023*06c3fb27SDimitry Andric case 0x68: 1024*06c3fb27SDimitry Andric if (ABCIsConst) 1025*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nor(Nor(A, B), C)); 1026*06c3fb27SDimitry Andric break; 1027*06c3fb27SDimitry Andric case 0x69: 1028*06c3fb27SDimitry Andric if (ABCIsConst) 1029*06c3fb27SDimitry Andric Res = Xor(Xnor(A, B), C); 1030*06c3fb27SDimitry Andric break; 1031*06c3fb27SDimitry Andric case 0x6a: 1032*06c3fb27SDimitry Andric if (ABCIsConst) 1033*06c3fb27SDimitry Andric Res = Xor(And(A, B), C); 1034*06c3fb27SDimitry Andric break; 1035*06c3fb27SDimitry Andric case 0x6b: 1036*06c3fb27SDimitry Andric if (ABCIsConst) 1037*06c3fb27SDimitry Andric Res = Or(Nor(A, B), Xor(Xnor(A, B), C)); 1038*06c3fb27SDimitry Andric break; 1039*06c3fb27SDimitry Andric case 0x6c: 1040*06c3fb27SDimitry Andric if (ABCIsConst) 1041*06c3fb27SDimitry Andric Res = Xor(And(A, C), B); 1042*06c3fb27SDimitry Andric break; 1043*06c3fb27SDimitry Andric case 0x6d: 1044*06c3fb27SDimitry Andric if (ABCIsConst) 1045*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Nor(A, C)), C); 1046*06c3fb27SDimitry Andric break; 1047*06c3fb27SDimitry Andric case 0x6e: 1048*06c3fb27SDimitry Andric if (ABCIsConst) 1049*06c3fb27SDimitry Andric Res = Or(Nor(A, Not(B)), Xor(B, C)); 1050*06c3fb27SDimitry Andric break; 1051*06c3fb27SDimitry Andric case 0x6f: 1052*06c3fb27SDimitry Andric if (ABCIsConst) 1053*06c3fb27SDimitry Andric Res = Nand(A, Xnor(B, C)); 1054*06c3fb27SDimitry Andric break; 1055*06c3fb27SDimitry Andric case 0x70: 1056*06c3fb27SDimitry Andric if (ABCIsConst) 1057*06c3fb27SDimitry Andric Res = And(A, Nand(B, C)); 1058*06c3fb27SDimitry Andric break; 1059*06c3fb27SDimitry Andric case 0x71: 1060*06c3fb27SDimitry Andric if (ABCIsConst) 1061*06c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), Xor(A, C)), A); 1062*06c3fb27SDimitry Andric break; 1063*06c3fb27SDimitry Andric case 0x72: 1064*06c3fb27SDimitry Andric if (ABCIsConst) 1065*06c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), C), B); 1066*06c3fb27SDimitry Andric break; 1067*06c3fb27SDimitry Andric case 0x73: 1068*06c3fb27SDimitry Andric if (ABCIsConst) 1069*06c3fb27SDimitry Andric Res = Nand(Nand(A, Not(C)), B); 1070*06c3fb27SDimitry Andric break; 1071*06c3fb27SDimitry Andric case 0x74: 1072*06c3fb27SDimitry Andric if (ABCIsConst) 1073*06c3fb27SDimitry Andric Res = Xor(Or(Xor(A, C), B), C); 1074*06c3fb27SDimitry Andric break; 1075*06c3fb27SDimitry Andric case 0x75: 1076*06c3fb27SDimitry Andric if (ABCIsConst) 1077*06c3fb27SDimitry Andric Res = Nand(Nand(A, Not(B)), C); 1078*06c3fb27SDimitry Andric break; 1079*06c3fb27SDimitry Andric case 0x76: 1080*06c3fb27SDimitry Andric if (ABCIsConst) 1081*06c3fb27SDimitry Andric Res = Xor(B, Or(Nor(B, Not(A)), C)); 1082*06c3fb27SDimitry Andric break; 1083*06c3fb27SDimitry Andric case 0x77: 1084*06c3fb27SDimitry Andric if (BCIsConst) 1085*06c3fb27SDimitry Andric Res = Nand(B, C); 1086*06c3fb27SDimitry Andric break; 1087*06c3fb27SDimitry Andric case 0x78: 1088*06c3fb27SDimitry Andric if (ABCIsConst) 1089*06c3fb27SDimitry Andric Res = Xor(A, And(B, C)); 1090*06c3fb27SDimitry Andric break; 1091*06c3fb27SDimitry Andric case 0x79: 1092*06c3fb27SDimitry Andric if (ABCIsConst) 1093*06c3fb27SDimitry Andric Res = Xor(Or(Xnor(A, B), Nor(B, C)), C); 1094*06c3fb27SDimitry Andric break; 1095*06c3fb27SDimitry Andric case 0x7a: 1096*06c3fb27SDimitry Andric if (ABCIsConst) 1097*06c3fb27SDimitry Andric Res = Or(Xor(A, C), Nor(B, Not(A))); 1098*06c3fb27SDimitry Andric break; 1099*06c3fb27SDimitry Andric case 0x7b: 1100*06c3fb27SDimitry Andric if (ABCIsConst) 1101*06c3fb27SDimitry Andric Res = Nand(Xnor(A, C), B); 1102*06c3fb27SDimitry Andric break; 1103*06c3fb27SDimitry Andric case 0x7c: 1104*06c3fb27SDimitry Andric if (ABCIsConst) 1105*06c3fb27SDimitry Andric Res = Or(Xor(A, B), Nor(C, Not(A))); 1106*06c3fb27SDimitry Andric break; 1107*06c3fb27SDimitry Andric case 0x7d: 1108*06c3fb27SDimitry Andric if (ABCIsConst) 1109*06c3fb27SDimitry Andric Res = Nand(Xnor(A, B), C); 1110*06c3fb27SDimitry Andric break; 1111*06c3fb27SDimitry Andric case 0x7e: 1112*06c3fb27SDimitry Andric if (ABCIsConst) 1113*06c3fb27SDimitry Andric Res = Or(Xor(A, B), Xor(A, C)); 1114*06c3fb27SDimitry Andric break; 1115*06c3fb27SDimitry Andric case 0x7f: 1116*06c3fb27SDimitry Andric if (ABCIsConst) 1117*06c3fb27SDimitry Andric Res = Nand(And(A, B), C); 1118*06c3fb27SDimitry Andric break; 1119*06c3fb27SDimitry Andric case 0x80: 1120*06c3fb27SDimitry Andric if (ABCIsConst) 1121*06c3fb27SDimitry Andric Res = And(And(A, B), C); 1122*06c3fb27SDimitry Andric break; 1123*06c3fb27SDimitry Andric case 0x81: 1124*06c3fb27SDimitry Andric if (ABCIsConst) 1125*06c3fb27SDimitry Andric Res = Nor(Xor(A, B), Xor(A, C)); 1126*06c3fb27SDimitry Andric break; 1127*06c3fb27SDimitry Andric case 0x82: 1128*06c3fb27SDimitry Andric if (ABCIsConst) 1129*06c3fb27SDimitry Andric Res = And(Xnor(A, B), C); 1130*06c3fb27SDimitry Andric break; 1131*06c3fb27SDimitry Andric case 0x83: 1132*06c3fb27SDimitry Andric if (ABCIsConst) 1133*06c3fb27SDimitry Andric Res = Nor(Xor(A, B), Nor(C, Not(A))); 1134*06c3fb27SDimitry Andric break; 1135*06c3fb27SDimitry Andric case 0x84: 1136*06c3fb27SDimitry Andric if (ABCIsConst) 1137*06c3fb27SDimitry Andric Res = And(Xnor(A, C), B); 1138*06c3fb27SDimitry Andric break; 1139*06c3fb27SDimitry Andric case 0x85: 1140*06c3fb27SDimitry Andric if (ABCIsConst) 1141*06c3fb27SDimitry Andric Res = Nor(Xor(A, C), Nor(B, Not(A))); 1142*06c3fb27SDimitry Andric break; 1143*06c3fb27SDimitry Andric case 0x86: 1144*06c3fb27SDimitry Andric if (ABCIsConst) 1145*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C); 1146*06c3fb27SDimitry Andric break; 1147*06c3fb27SDimitry Andric case 0x87: 1148*06c3fb27SDimitry Andric if (ABCIsConst) 1149*06c3fb27SDimitry Andric Res = Xor(A, Nand(B, C)); 1150*06c3fb27SDimitry Andric break; 1151*06c3fb27SDimitry Andric case 0x88: 1152*06c3fb27SDimitry Andric Res = And(B, C); 1153*06c3fb27SDimitry Andric break; 1154*06c3fb27SDimitry Andric case 0x89: 1155*06c3fb27SDimitry Andric if (ABCIsConst) 1156*06c3fb27SDimitry Andric Res = Xor(B, Nor(Nor(B, Not(A)), C)); 1157*06c3fb27SDimitry Andric break; 1158*06c3fb27SDimitry Andric case 0x8a: 1159*06c3fb27SDimitry Andric if (ABCIsConst) 1160*06c3fb27SDimitry Andric Res = And(Nand(A, Not(B)), C); 1161*06c3fb27SDimitry Andric break; 1162*06c3fb27SDimitry Andric case 0x8b: 1163*06c3fb27SDimitry Andric if (ABCIsConst) 1164*06c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, C), B), C); 1165*06c3fb27SDimitry Andric break; 1166*06c3fb27SDimitry Andric case 0x8c: 1167*06c3fb27SDimitry Andric if (ABCIsConst) 1168*06c3fb27SDimitry Andric Res = And(Nand(A, Not(C)), B); 1169*06c3fb27SDimitry Andric break; 1170*06c3fb27SDimitry Andric case 0x8d: 1171*06c3fb27SDimitry Andric if (ABCIsConst) 1172*06c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), C), B); 1173*06c3fb27SDimitry Andric break; 1174*06c3fb27SDimitry Andric case 0x8e: 1175*06c3fb27SDimitry Andric if (ABCIsConst) 1176*06c3fb27SDimitry Andric Res = Xor(Or(Xor(A, B), Xor(A, C)), A); 1177*06c3fb27SDimitry Andric break; 1178*06c3fb27SDimitry Andric case 0x8f: 1179*06c3fb27SDimitry Andric if (ABCIsConst) 1180*06c3fb27SDimitry Andric Res = Nand(A, Nand(B, C)); 1181*06c3fb27SDimitry Andric break; 1182*06c3fb27SDimitry Andric case 0x90: 1183*06c3fb27SDimitry Andric if (ABCIsConst) 1184*06c3fb27SDimitry Andric Res = And(A, Xnor(B, C)); 1185*06c3fb27SDimitry Andric break; 1186*06c3fb27SDimitry Andric case 0x91: 1187*06c3fb27SDimitry Andric if (ABCIsConst) 1188*06c3fb27SDimitry Andric Res = Nor(Nor(A, Not(B)), Xor(B, C)); 1189*06c3fb27SDimitry Andric break; 1190*06c3fb27SDimitry Andric case 0x92: 1191*06c3fb27SDimitry Andric if (ABCIsConst) 1192*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C); 1193*06c3fb27SDimitry Andric break; 1194*06c3fb27SDimitry Andric case 0x93: 1195*06c3fb27SDimitry Andric if (ABCIsConst) 1196*06c3fb27SDimitry Andric Res = Xor(Nand(A, C), B); 1197*06c3fb27SDimitry Andric break; 1198*06c3fb27SDimitry Andric case 0x94: 1199*06c3fb27SDimitry Andric if (ABCIsConst) 1200*06c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xor(Xnor(A, B), C)); 1201*06c3fb27SDimitry Andric break; 1202*06c3fb27SDimitry Andric case 0x95: 1203*06c3fb27SDimitry Andric if (ABCIsConst) 1204*06c3fb27SDimitry Andric Res = Xor(Nand(A, B), C); 1205*06c3fb27SDimitry Andric break; 1206*06c3fb27SDimitry Andric case 0x96: 1207*06c3fb27SDimitry Andric if (ABCIsConst) 1208*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), C); 1209*06c3fb27SDimitry Andric break; 1210*06c3fb27SDimitry Andric case 0x97: 1211*06c3fb27SDimitry Andric if (ABCIsConst) 1212*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), Or(Nor(A, B), C)); 1213*06c3fb27SDimitry Andric break; 1214*06c3fb27SDimitry Andric case 0x98: 1215*06c3fb27SDimitry Andric if (ABCIsConst) 1216*06c3fb27SDimitry Andric Res = Nor(Nor(A, B), Xor(B, C)); 1217*06c3fb27SDimitry Andric break; 1218*06c3fb27SDimitry Andric case 0x99: 1219*06c3fb27SDimitry Andric if (BCIsConst) 1220*06c3fb27SDimitry Andric Res = Xnor(B, C); 1221*06c3fb27SDimitry Andric break; 1222*06c3fb27SDimitry Andric case 0x9a: 1223*06c3fb27SDimitry Andric if (ABCIsConst) 1224*06c3fb27SDimitry Andric Res = Xor(Nor(B, Not(A)), C); 1225*06c3fb27SDimitry Andric break; 1226*06c3fb27SDimitry Andric case 0x9b: 1227*06c3fb27SDimitry Andric if (ABCIsConst) 1228*06c3fb27SDimitry Andric Res = Or(Nor(A, B), Xnor(B, C)); 1229*06c3fb27SDimitry Andric break; 1230*06c3fb27SDimitry Andric case 0x9c: 1231*06c3fb27SDimitry Andric if (ABCIsConst) 1232*06c3fb27SDimitry Andric Res = Xor(B, Nor(C, Not(A))); 1233*06c3fb27SDimitry Andric break; 1234*06c3fb27SDimitry Andric case 0x9d: 1235*06c3fb27SDimitry Andric if (ABCIsConst) 1236*06c3fb27SDimitry Andric Res = Or(Nor(A, C), Xnor(B, C)); 1237*06c3fb27SDimitry Andric break; 1238*06c3fb27SDimitry Andric case 0x9e: 1239*06c3fb27SDimitry Andric if (ABCIsConst) 1240*06c3fb27SDimitry Andric Res = Xor(And(Xor(A, B), Nand(B, C)), C); 1241*06c3fb27SDimitry Andric break; 1242*06c3fb27SDimitry Andric case 0x9f: 1243*06c3fb27SDimitry Andric if (ABCIsConst) 1244*06c3fb27SDimitry Andric Res = Nand(A, Xor(B, C)); 1245*06c3fb27SDimitry Andric break; 1246*06c3fb27SDimitry Andric case 0xa0: 1247*06c3fb27SDimitry Andric Res = And(A, C); 1248*06c3fb27SDimitry Andric break; 1249*06c3fb27SDimitry Andric case 0xa1: 1250*06c3fb27SDimitry Andric if (ABCIsConst) 1251*06c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, Not(B)), C)); 1252*06c3fb27SDimitry Andric break; 1253*06c3fb27SDimitry Andric case 0xa2: 1254*06c3fb27SDimitry Andric if (ABCIsConst) 1255*06c3fb27SDimitry Andric Res = And(Or(A, Not(B)), C); 1256*06c3fb27SDimitry Andric break; 1257*06c3fb27SDimitry Andric case 0xa3: 1258*06c3fb27SDimitry Andric if (ABCIsConst) 1259*06c3fb27SDimitry Andric Res = Xor(Nor(Xor(B, C), A), C); 1260*06c3fb27SDimitry Andric break; 1261*06c3fb27SDimitry Andric case 0xa4: 1262*06c3fb27SDimitry Andric if (ABCIsConst) 1263*06c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, B), C)); 1264*06c3fb27SDimitry Andric break; 1265*06c3fb27SDimitry Andric case 0xa5: 1266*06c3fb27SDimitry Andric if (ACIsConst) 1267*06c3fb27SDimitry Andric Res = Xnor(A, C); 1268*06c3fb27SDimitry Andric break; 1269*06c3fb27SDimitry Andric case 0xa6: 1270*06c3fb27SDimitry Andric if (ABCIsConst) 1271*06c3fb27SDimitry Andric Res = Xor(Nor(A, Not(B)), C); 1272*06c3fb27SDimitry Andric break; 1273*06c3fb27SDimitry Andric case 0xa7: 1274*06c3fb27SDimitry Andric if (ABCIsConst) 1275*06c3fb27SDimitry Andric Res = Or(Nor(A, B), Xnor(A, C)); 1276*06c3fb27SDimitry Andric break; 1277*06c3fb27SDimitry Andric case 0xa8: 1278*06c3fb27SDimitry Andric if (ABCIsConst) 1279*06c3fb27SDimitry Andric Res = And(Or(A, B), C); 1280*06c3fb27SDimitry Andric break; 1281*06c3fb27SDimitry Andric case 0xa9: 1282*06c3fb27SDimitry Andric if (ABCIsConst) 1283*06c3fb27SDimitry Andric Res = Xor(Nor(A, B), C); 1284*06c3fb27SDimitry Andric break; 1285*06c3fb27SDimitry Andric case 0xaa: 1286*06c3fb27SDimitry Andric Res = C; 1287*06c3fb27SDimitry Andric break; 1288*06c3fb27SDimitry Andric case 0xab: 1289*06c3fb27SDimitry Andric if (ABCIsConst) 1290*06c3fb27SDimitry Andric Res = Or(Nor(A, B), C); 1291*06c3fb27SDimitry Andric break; 1292*06c3fb27SDimitry Andric case 0xac: 1293*06c3fb27SDimitry Andric if (ABCIsConst) 1294*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(B, C), A), C); 1295*06c3fb27SDimitry Andric break; 1296*06c3fb27SDimitry Andric case 0xad: 1297*06c3fb27SDimitry Andric if (ABCIsConst) 1298*06c3fb27SDimitry Andric Res = Or(Xnor(A, C), And(B, C)); 1299*06c3fb27SDimitry Andric break; 1300*06c3fb27SDimitry Andric case 0xae: 1301*06c3fb27SDimitry Andric if (ABCIsConst) 1302*06c3fb27SDimitry Andric Res = Or(Nor(A, Not(B)), C); 1303*06c3fb27SDimitry Andric break; 1304*06c3fb27SDimitry Andric case 0xaf: 1305*06c3fb27SDimitry Andric if (ACIsConst) 1306*06c3fb27SDimitry Andric Res = Or(C, Not(A)); 1307*06c3fb27SDimitry Andric break; 1308*06c3fb27SDimitry Andric case 0xb0: 1309*06c3fb27SDimitry Andric if (ABCIsConst) 1310*06c3fb27SDimitry Andric Res = And(A, Nand(B, Not(C))); 1311*06c3fb27SDimitry Andric break; 1312*06c3fb27SDimitry Andric case 0xb1: 1313*06c3fb27SDimitry Andric if (ABCIsConst) 1314*06c3fb27SDimitry Andric Res = Xor(A, Nor(Xor(A, B), C)); 1315*06c3fb27SDimitry Andric break; 1316*06c3fb27SDimitry Andric case 0xb2: 1317*06c3fb27SDimitry Andric if (ABCIsConst) 1318*06c3fb27SDimitry Andric Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A); 1319*06c3fb27SDimitry Andric break; 1320*06c3fb27SDimitry Andric case 0xb3: 1321*06c3fb27SDimitry Andric if (ABCIsConst) 1322*06c3fb27SDimitry Andric Res = Nand(Nand(A, C), B); 1323*06c3fb27SDimitry Andric break; 1324*06c3fb27SDimitry Andric case 0xb4: 1325*06c3fb27SDimitry Andric if (ABCIsConst) 1326*06c3fb27SDimitry Andric Res = Xor(A, Nor(C, Not(B))); 1327*06c3fb27SDimitry Andric break; 1328*06c3fb27SDimitry Andric case 0xb5: 1329*06c3fb27SDimitry Andric if (ABCIsConst) 1330*06c3fb27SDimitry Andric Res = Or(Xnor(A, C), Nor(B, C)); 1331*06c3fb27SDimitry Andric break; 1332*06c3fb27SDimitry Andric case 0xb6: 1333*06c3fb27SDimitry Andric if (ABCIsConst) 1334*06c3fb27SDimitry Andric Res = Xor(And(Xor(A, B), Nand(A, C)), C); 1335*06c3fb27SDimitry Andric break; 1336*06c3fb27SDimitry Andric case 0xb7: 1337*06c3fb27SDimitry Andric if (ABCIsConst) 1338*06c3fb27SDimitry Andric Res = Nand(Xor(A, C), B); 1339*06c3fb27SDimitry Andric break; 1340*06c3fb27SDimitry Andric case 0xb8: 1341*06c3fb27SDimitry Andric if (ABCIsConst) 1342*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, C), B), C); 1343*06c3fb27SDimitry Andric break; 1344*06c3fb27SDimitry Andric case 0xb9: 1345*06c3fb27SDimitry Andric if (ABCIsConst) 1346*06c3fb27SDimitry Andric Res = Xor(Nor(And(A, C), B), C); 1347*06c3fb27SDimitry Andric break; 1348*06c3fb27SDimitry Andric case 0xba: 1349*06c3fb27SDimitry Andric if (ABCIsConst) 1350*06c3fb27SDimitry Andric Res = Or(Nor(B, Not(A)), C); 1351*06c3fb27SDimitry Andric break; 1352*06c3fb27SDimitry Andric case 0xbb: 1353*06c3fb27SDimitry Andric if (BCIsConst) 1354*06c3fb27SDimitry Andric Res = Or(C, Not(B)); 1355*06c3fb27SDimitry Andric break; 1356*06c3fb27SDimitry Andric case 0xbc: 1357*06c3fb27SDimitry Andric if (ABCIsConst) 1358*06c3fb27SDimitry Andric Res = Xor(A, And(Nand(A, C), B)); 1359*06c3fb27SDimitry Andric break; 1360*06c3fb27SDimitry Andric case 0xbd: 1361*06c3fb27SDimitry Andric if (ABCIsConst) 1362*06c3fb27SDimitry Andric Res = Or(Xor(A, B), Xnor(A, C)); 1363*06c3fb27SDimitry Andric break; 1364*06c3fb27SDimitry Andric case 0xbe: 1365*06c3fb27SDimitry Andric if (ABCIsConst) 1366*06c3fb27SDimitry Andric Res = Or(Xor(A, B), C); 1367*06c3fb27SDimitry Andric break; 1368*06c3fb27SDimitry Andric case 0xbf: 1369*06c3fb27SDimitry Andric if (ABCIsConst) 1370*06c3fb27SDimitry Andric Res = Or(Nand(A, B), C); 1371*06c3fb27SDimitry Andric break; 1372*06c3fb27SDimitry Andric case 0xc0: 1373*06c3fb27SDimitry Andric Res = And(A, B); 1374*06c3fb27SDimitry Andric break; 1375*06c3fb27SDimitry Andric case 0xc1: 1376*06c3fb27SDimitry Andric if (ABCIsConst) 1377*06c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, Not(C)), B)); 1378*06c3fb27SDimitry Andric break; 1379*06c3fb27SDimitry Andric case 0xc2: 1380*06c3fb27SDimitry Andric if (ABCIsConst) 1381*06c3fb27SDimitry Andric Res = Xor(A, Nor(Nor(A, C), B)); 1382*06c3fb27SDimitry Andric break; 1383*06c3fb27SDimitry Andric case 0xc3: 1384*06c3fb27SDimitry Andric if (ABIsConst) 1385*06c3fb27SDimitry Andric Res = Xnor(A, B); 1386*06c3fb27SDimitry Andric break; 1387*06c3fb27SDimitry Andric case 0xc4: 1388*06c3fb27SDimitry Andric if (ABCIsConst) 1389*06c3fb27SDimitry Andric Res = And(Or(A, Not(C)), B); 1390*06c3fb27SDimitry Andric break; 1391*06c3fb27SDimitry Andric case 0xc5: 1392*06c3fb27SDimitry Andric if (ABCIsConst) 1393*06c3fb27SDimitry Andric Res = Xor(B, Nor(A, Xor(B, C))); 1394*06c3fb27SDimitry Andric break; 1395*06c3fb27SDimitry Andric case 0xc6: 1396*06c3fb27SDimitry Andric if (ABCIsConst) 1397*06c3fb27SDimitry Andric Res = Xor(Nor(A, Not(C)), B); 1398*06c3fb27SDimitry Andric break; 1399*06c3fb27SDimitry Andric case 0xc7: 1400*06c3fb27SDimitry Andric if (ABCIsConst) 1401*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), Nor(A, C)); 1402*06c3fb27SDimitry Andric break; 1403*06c3fb27SDimitry Andric case 0xc8: 1404*06c3fb27SDimitry Andric if (ABCIsConst) 1405*06c3fb27SDimitry Andric Res = And(Or(A, C), B); 1406*06c3fb27SDimitry Andric break; 1407*06c3fb27SDimitry Andric case 0xc9: 1408*06c3fb27SDimitry Andric if (ABCIsConst) 1409*06c3fb27SDimitry Andric Res = Xor(Nor(A, C), B); 1410*06c3fb27SDimitry Andric break; 1411*06c3fb27SDimitry Andric case 0xca: 1412*06c3fb27SDimitry Andric if (ABCIsConst) 1413*06c3fb27SDimitry Andric Res = Xor(B, Nor(A, Xnor(B, C))); 1414*06c3fb27SDimitry Andric break; 1415*06c3fb27SDimitry Andric case 0xcb: 1416*06c3fb27SDimitry Andric if (ABCIsConst) 1417*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), And(B, C)); 1418*06c3fb27SDimitry Andric break; 1419*06c3fb27SDimitry Andric case 0xcc: 1420*06c3fb27SDimitry Andric Res = B; 1421*06c3fb27SDimitry Andric break; 1422*06c3fb27SDimitry Andric case 0xcd: 1423*06c3fb27SDimitry Andric if (ABCIsConst) 1424*06c3fb27SDimitry Andric Res = Or(Nor(A, C), B); 1425*06c3fb27SDimitry Andric break; 1426*06c3fb27SDimitry Andric case 0xce: 1427*06c3fb27SDimitry Andric if (ABCIsConst) 1428*06c3fb27SDimitry Andric Res = Or(Nor(A, Not(C)), B); 1429*06c3fb27SDimitry Andric break; 1430*06c3fb27SDimitry Andric case 0xcf: 1431*06c3fb27SDimitry Andric if (ABIsConst) 1432*06c3fb27SDimitry Andric Res = Or(B, Not(A)); 1433*06c3fb27SDimitry Andric break; 1434*06c3fb27SDimitry Andric case 0xd0: 1435*06c3fb27SDimitry Andric if (ABCIsConst) 1436*06c3fb27SDimitry Andric Res = And(A, Or(B, Not(C))); 1437*06c3fb27SDimitry Andric break; 1438*06c3fb27SDimitry Andric case 0xd1: 1439*06c3fb27SDimitry Andric if (ABCIsConst) 1440*06c3fb27SDimitry Andric Res = Xor(A, Nor(Xor(A, C), B)); 1441*06c3fb27SDimitry Andric break; 1442*06c3fb27SDimitry Andric case 0xd2: 1443*06c3fb27SDimitry Andric if (ABCIsConst) 1444*06c3fb27SDimitry Andric Res = Xor(A, Nor(B, Not(C))); 1445*06c3fb27SDimitry Andric break; 1446*06c3fb27SDimitry Andric case 0xd3: 1447*06c3fb27SDimitry Andric if (ABCIsConst) 1448*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), Nor(B, C)); 1449*06c3fb27SDimitry Andric break; 1450*06c3fb27SDimitry Andric case 0xd4: 1451*06c3fb27SDimitry Andric if (ABCIsConst) 1452*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A); 1453*06c3fb27SDimitry Andric break; 1454*06c3fb27SDimitry Andric case 0xd5: 1455*06c3fb27SDimitry Andric if (ABCIsConst) 1456*06c3fb27SDimitry Andric Res = Nand(Nand(A, B), C); 1457*06c3fb27SDimitry Andric break; 1458*06c3fb27SDimitry Andric case 0xd6: 1459*06c3fb27SDimitry Andric if (ABCIsConst) 1460*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), Or(And(A, B), C)); 1461*06c3fb27SDimitry Andric break; 1462*06c3fb27SDimitry Andric case 0xd7: 1463*06c3fb27SDimitry Andric if (ABCIsConst) 1464*06c3fb27SDimitry Andric Res = Nand(Xor(A, B), C); 1465*06c3fb27SDimitry Andric break; 1466*06c3fb27SDimitry Andric case 0xd8: 1467*06c3fb27SDimitry Andric if (ABCIsConst) 1468*06c3fb27SDimitry Andric Res = Xor(Nor(Xnor(A, B), C), B); 1469*06c3fb27SDimitry Andric break; 1470*06c3fb27SDimitry Andric case 0xd9: 1471*06c3fb27SDimitry Andric if (ABCIsConst) 1472*06c3fb27SDimitry Andric Res = Or(And(A, B), Xnor(B, C)); 1473*06c3fb27SDimitry Andric break; 1474*06c3fb27SDimitry Andric case 0xda: 1475*06c3fb27SDimitry Andric if (ABCIsConst) 1476*06c3fb27SDimitry Andric Res = Xor(A, And(Nand(A, B), C)); 1477*06c3fb27SDimitry Andric break; 1478*06c3fb27SDimitry Andric case 0xdb: 1479*06c3fb27SDimitry Andric if (ABCIsConst) 1480*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), Xor(A, C)); 1481*06c3fb27SDimitry Andric break; 1482*06c3fb27SDimitry Andric case 0xdc: 1483*06c3fb27SDimitry Andric if (ABCIsConst) 1484*06c3fb27SDimitry Andric Res = Or(B, Nor(C, Not(A))); 1485*06c3fb27SDimitry Andric break; 1486*06c3fb27SDimitry Andric case 0xdd: 1487*06c3fb27SDimitry Andric if (BCIsConst) 1488*06c3fb27SDimitry Andric Res = Or(B, Not(C)); 1489*06c3fb27SDimitry Andric break; 1490*06c3fb27SDimitry Andric case 0xde: 1491*06c3fb27SDimitry Andric if (ABCIsConst) 1492*06c3fb27SDimitry Andric Res = Or(Xor(A, C), B); 1493*06c3fb27SDimitry Andric break; 1494*06c3fb27SDimitry Andric case 0xdf: 1495*06c3fb27SDimitry Andric if (ABCIsConst) 1496*06c3fb27SDimitry Andric Res = Or(Nand(A, C), B); 1497*06c3fb27SDimitry Andric break; 1498*06c3fb27SDimitry Andric case 0xe0: 1499*06c3fb27SDimitry Andric if (ABCIsConst) 1500*06c3fb27SDimitry Andric Res = And(A, Or(B, C)); 1501*06c3fb27SDimitry Andric break; 1502*06c3fb27SDimitry Andric case 0xe1: 1503*06c3fb27SDimitry Andric if (ABCIsConst) 1504*06c3fb27SDimitry Andric Res = Xor(A, Nor(B, C)); 1505*06c3fb27SDimitry Andric break; 1506*06c3fb27SDimitry Andric case 0xe2: 1507*06c3fb27SDimitry Andric if (ABCIsConst) 1508*06c3fb27SDimitry Andric Res = Xor(A, Nor(Xnor(A, C), B)); 1509*06c3fb27SDimitry Andric break; 1510*06c3fb27SDimitry Andric case 0xe3: 1511*06c3fb27SDimitry Andric if (ABCIsConst) 1512*06c3fb27SDimitry Andric Res = Xor(A, Nor(And(A, C), B)); 1513*06c3fb27SDimitry Andric break; 1514*06c3fb27SDimitry Andric case 0xe4: 1515*06c3fb27SDimitry Andric if (ABCIsConst) 1516*06c3fb27SDimitry Andric Res = Xor(A, Nor(Xnor(A, B), C)); 1517*06c3fb27SDimitry Andric break; 1518*06c3fb27SDimitry Andric case 0xe5: 1519*06c3fb27SDimitry Andric if (ABCIsConst) 1520*06c3fb27SDimitry Andric Res = Xor(A, Nor(And(A, B), C)); 1521*06c3fb27SDimitry Andric break; 1522*06c3fb27SDimitry Andric case 0xe6: 1523*06c3fb27SDimitry Andric if (ABCIsConst) 1524*06c3fb27SDimitry Andric Res = Or(And(A, B), Xor(B, C)); 1525*06c3fb27SDimitry Andric break; 1526*06c3fb27SDimitry Andric case 0xe7: 1527*06c3fb27SDimitry Andric if (ABCIsConst) 1528*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), Xnor(A, C)); 1529*06c3fb27SDimitry Andric break; 1530*06c3fb27SDimitry Andric case 0xe8: 1531*06c3fb27SDimitry Andric if (ABCIsConst) 1532*06c3fb27SDimitry Andric Res = Xor(Or(A, B), Nor(Xnor(A, B), C)); 1533*06c3fb27SDimitry Andric break; 1534*06c3fb27SDimitry Andric case 0xe9: 1535*06c3fb27SDimitry Andric if (ABCIsConst) 1536*06c3fb27SDimitry Andric Res = Xor(Xor(A, B), Nand(Nand(A, B), C)); 1537*06c3fb27SDimitry Andric break; 1538*06c3fb27SDimitry Andric case 0xea: 1539*06c3fb27SDimitry Andric if (ABCIsConst) 1540*06c3fb27SDimitry Andric Res = Or(And(A, B), C); 1541*06c3fb27SDimitry Andric break; 1542*06c3fb27SDimitry Andric case 0xeb: 1543*06c3fb27SDimitry Andric if (ABCIsConst) 1544*06c3fb27SDimitry Andric Res = Or(Xnor(A, B), C); 1545*06c3fb27SDimitry Andric break; 1546*06c3fb27SDimitry Andric case 0xec: 1547*06c3fb27SDimitry Andric if (ABCIsConst) 1548*06c3fb27SDimitry Andric Res = Or(And(A, C), B); 1549*06c3fb27SDimitry Andric break; 1550*06c3fb27SDimitry Andric case 0xed: 1551*06c3fb27SDimitry Andric if (ABCIsConst) 1552*06c3fb27SDimitry Andric Res = Or(Xnor(A, C), B); 1553*06c3fb27SDimitry Andric break; 1554*06c3fb27SDimitry Andric case 0xee: 1555*06c3fb27SDimitry Andric Res = Or(B, C); 1556*06c3fb27SDimitry Andric break; 1557*06c3fb27SDimitry Andric case 0xef: 1558*06c3fb27SDimitry Andric if (ABCIsConst) 1559*06c3fb27SDimitry Andric Res = Nand(A, Nor(B, C)); 1560*06c3fb27SDimitry Andric break; 1561*06c3fb27SDimitry Andric case 0xf0: 1562*06c3fb27SDimitry Andric Res = A; 1563*06c3fb27SDimitry Andric break; 1564*06c3fb27SDimitry Andric case 0xf1: 1565*06c3fb27SDimitry Andric if (ABCIsConst) 1566*06c3fb27SDimitry Andric Res = Or(A, Nor(B, C)); 1567*06c3fb27SDimitry Andric break; 1568*06c3fb27SDimitry Andric case 0xf2: 1569*06c3fb27SDimitry Andric if (ABCIsConst) 1570*06c3fb27SDimitry Andric Res = Or(A, Nor(B, Not(C))); 1571*06c3fb27SDimitry Andric break; 1572*06c3fb27SDimitry Andric case 0xf3: 1573*06c3fb27SDimitry Andric if (ABIsConst) 1574*06c3fb27SDimitry Andric Res = Or(A, Not(B)); 1575*06c3fb27SDimitry Andric break; 1576*06c3fb27SDimitry Andric case 0xf4: 1577*06c3fb27SDimitry Andric if (ABCIsConst) 1578*06c3fb27SDimitry Andric Res = Or(A, Nor(C, Not(B))); 1579*06c3fb27SDimitry Andric break; 1580*06c3fb27SDimitry Andric case 0xf5: 1581*06c3fb27SDimitry Andric if (ACIsConst) 1582*06c3fb27SDimitry Andric Res = Or(A, Not(C)); 1583*06c3fb27SDimitry Andric break; 1584*06c3fb27SDimitry Andric case 0xf6: 1585*06c3fb27SDimitry Andric if (ABCIsConst) 1586*06c3fb27SDimitry Andric Res = Or(A, Xor(B, C)); 1587*06c3fb27SDimitry Andric break; 1588*06c3fb27SDimitry Andric case 0xf7: 1589*06c3fb27SDimitry Andric if (ABCIsConst) 1590*06c3fb27SDimitry Andric Res = Or(A, Nand(B, C)); 1591*06c3fb27SDimitry Andric break; 1592*06c3fb27SDimitry Andric case 0xf8: 1593*06c3fb27SDimitry Andric if (ABCIsConst) 1594*06c3fb27SDimitry Andric Res = Or(A, And(B, C)); 1595*06c3fb27SDimitry Andric break; 1596*06c3fb27SDimitry Andric case 0xf9: 1597*06c3fb27SDimitry Andric if (ABCIsConst) 1598*06c3fb27SDimitry Andric Res = Or(A, Xnor(B, C)); 1599*06c3fb27SDimitry Andric break; 1600*06c3fb27SDimitry Andric case 0xfa: 1601*06c3fb27SDimitry Andric Res = Or(A, C); 1602*06c3fb27SDimitry Andric break; 1603*06c3fb27SDimitry Andric case 0xfb: 1604*06c3fb27SDimitry Andric if (ABCIsConst) 1605*06c3fb27SDimitry Andric Res = Nand(Nor(A, C), B); 1606*06c3fb27SDimitry Andric break; 1607*06c3fb27SDimitry Andric case 0xfc: 1608*06c3fb27SDimitry Andric Res = Or(A, B); 1609*06c3fb27SDimitry Andric break; 1610*06c3fb27SDimitry Andric case 0xfd: 1611*06c3fb27SDimitry Andric if (ABCIsConst) 1612*06c3fb27SDimitry Andric Res = Nand(Nor(A, B), C); 1613*06c3fb27SDimitry Andric break; 1614*06c3fb27SDimitry Andric case 0xfe: 1615*06c3fb27SDimitry Andric if (ABCIsConst) 1616*06c3fb27SDimitry Andric Res = Or(Or(A, B), C); 1617*06c3fb27SDimitry Andric break; 1618*06c3fb27SDimitry Andric case 0xff: 1619*06c3fb27SDimitry Andric Res = {Constant::getAllOnesValue(Ty), 0xff}; 1620*06c3fb27SDimitry Andric break; 1621*06c3fb27SDimitry Andric } 1622*06c3fb27SDimitry Andric 1623*06c3fb27SDimitry Andric assert((Res.first == nullptr || Res.second == Imm) && 1624*06c3fb27SDimitry Andric "Simplification of ternary logic does not verify!"); 1625*06c3fb27SDimitry Andric return Res.first; 1626*06c3fb27SDimitry Andric } 1627*06c3fb27SDimitry Andric 1628e8d8bef9SDimitry Andric static Value *simplifyX86insertps(const IntrinsicInst &II, 1629e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1630e8d8bef9SDimitry Andric auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2)); 1631e8d8bef9SDimitry Andric if (!CInt) 1632e8d8bef9SDimitry Andric return nullptr; 1633e8d8bef9SDimitry Andric 1634e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1635e8d8bef9SDimitry Andric assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); 1636e8d8bef9SDimitry Andric 1637e8d8bef9SDimitry Andric // The immediate permute control byte looks like this: 1638e8d8bef9SDimitry Andric // [3:0] - zero mask for each 32-bit lane 1639e8d8bef9SDimitry Andric // [5:4] - select one 32-bit destination lane 1640e8d8bef9SDimitry Andric // [7:6] - select one 32-bit source lane 1641e8d8bef9SDimitry Andric 1642e8d8bef9SDimitry Andric uint8_t Imm = CInt->getZExtValue(); 1643e8d8bef9SDimitry Andric uint8_t ZMask = Imm & 0xf; 1644e8d8bef9SDimitry Andric uint8_t DestLane = (Imm >> 4) & 0x3; 1645e8d8bef9SDimitry Andric uint8_t SourceLane = (Imm >> 6) & 0x3; 1646e8d8bef9SDimitry Andric 1647e8d8bef9SDimitry Andric ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); 1648e8d8bef9SDimitry Andric 1649e8d8bef9SDimitry Andric // If all zero mask bits are set, this was just a weird way to 1650e8d8bef9SDimitry Andric // generate a zero vector. 1651e8d8bef9SDimitry Andric if (ZMask == 0xf) 1652e8d8bef9SDimitry Andric return ZeroVector; 1653e8d8bef9SDimitry Andric 1654e8d8bef9SDimitry Andric // Initialize by passing all of the first source bits through. 1655e8d8bef9SDimitry Andric int ShuffleMask[4] = {0, 1, 2, 3}; 1656e8d8bef9SDimitry Andric 1657e8d8bef9SDimitry Andric // We may replace the second operand with the zero vector. 1658e8d8bef9SDimitry Andric Value *V1 = II.getArgOperand(1); 1659e8d8bef9SDimitry Andric 1660e8d8bef9SDimitry Andric if (ZMask) { 1661e8d8bef9SDimitry Andric // If the zero mask is being used with a single input or the zero mask 1662e8d8bef9SDimitry Andric // overrides the destination lane, this is a shuffle with the zero vector. 1663e8d8bef9SDimitry Andric if ((II.getArgOperand(0) == II.getArgOperand(1)) || 1664e8d8bef9SDimitry Andric (ZMask & (1 << DestLane))) { 1665e8d8bef9SDimitry Andric V1 = ZeroVector; 1666e8d8bef9SDimitry Andric // We may still move 32-bits of the first source vector from one lane 1667e8d8bef9SDimitry Andric // to another. 1668e8d8bef9SDimitry Andric ShuffleMask[DestLane] = SourceLane; 1669e8d8bef9SDimitry Andric // The zero mask may override the previous insert operation. 1670e8d8bef9SDimitry Andric for (unsigned i = 0; i < 4; ++i) 1671e8d8bef9SDimitry Andric if ((ZMask >> i) & 0x1) 1672e8d8bef9SDimitry Andric ShuffleMask[i] = i + 4; 1673e8d8bef9SDimitry Andric } else { 1674e8d8bef9SDimitry Andric // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle? 1675e8d8bef9SDimitry Andric return nullptr; 1676e8d8bef9SDimitry Andric } 1677e8d8bef9SDimitry Andric } else { 1678e8d8bef9SDimitry Andric // Replace the selected destination lane with the selected source lane. 1679e8d8bef9SDimitry Andric ShuffleMask[DestLane] = SourceLane + 4; 1680e8d8bef9SDimitry Andric } 1681e8d8bef9SDimitry Andric 1682e8d8bef9SDimitry Andric return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask); 1683e8d8bef9SDimitry Andric } 1684e8d8bef9SDimitry Andric 1685e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding 1686e8d8bef9SDimitry Andric /// or conversion to a shuffle vector. 1687e8d8bef9SDimitry Andric static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, 1688e8d8bef9SDimitry Andric ConstantInt *CILength, ConstantInt *CIIndex, 1689e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1690e8d8bef9SDimitry Andric auto LowConstantHighUndef = [&](uint64_t Val) { 1691e8d8bef9SDimitry Andric Type *IntTy64 = Type::getInt64Ty(II.getContext()); 1692e8d8bef9SDimitry Andric Constant *Args[] = {ConstantInt::get(IntTy64, Val), 1693e8d8bef9SDimitry Andric UndefValue::get(IntTy64)}; 1694e8d8bef9SDimitry Andric return ConstantVector::get(Args); 1695e8d8bef9SDimitry Andric }; 1696e8d8bef9SDimitry Andric 1697e8d8bef9SDimitry Andric // See if we're dealing with constant values. 1698fe6060f1SDimitry Andric auto *C0 = dyn_cast<Constant>(Op0); 1699fe6060f1SDimitry Andric auto *CI0 = 1700e8d8bef9SDimitry Andric C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 1701e8d8bef9SDimitry Andric : nullptr; 1702e8d8bef9SDimitry Andric 1703e8d8bef9SDimitry Andric // Attempt to constant fold. 1704e8d8bef9SDimitry Andric if (CILength && CIIndex) { 1705e8d8bef9SDimitry Andric // From AMD documentation: "The bit index and field length are each six 1706e8d8bef9SDimitry Andric // bits in length other bits of the field are ignored." 1707e8d8bef9SDimitry Andric APInt APIndex = CIIndex->getValue().zextOrTrunc(6); 1708e8d8bef9SDimitry Andric APInt APLength = CILength->getValue().zextOrTrunc(6); 1709e8d8bef9SDimitry Andric 1710e8d8bef9SDimitry Andric unsigned Index = APIndex.getZExtValue(); 1711e8d8bef9SDimitry Andric 1712e8d8bef9SDimitry Andric // From AMD documentation: "a value of zero in the field length is 1713e8d8bef9SDimitry Andric // defined as length of 64". 1714e8d8bef9SDimitry Andric unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 1715e8d8bef9SDimitry Andric 1716e8d8bef9SDimitry Andric // From AMD documentation: "If the sum of the bit index + length field 1717e8d8bef9SDimitry Andric // is greater than 64, the results are undefined". 1718e8d8bef9SDimitry Andric unsigned End = Index + Length; 1719e8d8bef9SDimitry Andric 1720e8d8bef9SDimitry Andric // Note that both field index and field length are 8-bit quantities. 1721e8d8bef9SDimitry Andric // Since variables 'Index' and 'Length' are unsigned values 1722e8d8bef9SDimitry Andric // obtained from zero-extending field index and field length 1723e8d8bef9SDimitry Andric // respectively, their sum should never wrap around. 1724e8d8bef9SDimitry Andric if (End > 64) 1725e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1726e8d8bef9SDimitry Andric 1727e8d8bef9SDimitry Andric // If we are inserting whole bytes, we can convert this to a shuffle. 1728e8d8bef9SDimitry Andric // Lowering can recognize EXTRQI shuffle masks. 1729e8d8bef9SDimitry Andric if ((Length % 8) == 0 && (Index % 8) == 0) { 1730e8d8bef9SDimitry Andric // Convert bit indices to byte indices. 1731e8d8bef9SDimitry Andric Length /= 8; 1732e8d8bef9SDimitry Andric Index /= 8; 1733e8d8bef9SDimitry Andric 1734e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1735e8d8bef9SDimitry Andric auto *ShufTy = FixedVectorType::get(IntTy8, 16); 1736e8d8bef9SDimitry Andric 1737e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask; 1738e8d8bef9SDimitry Andric for (int i = 0; i != (int)Length; ++i) 1739e8d8bef9SDimitry Andric ShuffleMask.push_back(i + Index); 1740e8d8bef9SDimitry Andric for (int i = Length; i != 8; ++i) 1741e8d8bef9SDimitry Andric ShuffleMask.push_back(i + 16); 1742e8d8bef9SDimitry Andric for (int i = 8; i != 16; ++i) 1743e8d8bef9SDimitry Andric ShuffleMask.push_back(-1); 1744e8d8bef9SDimitry Andric 1745e8d8bef9SDimitry Andric Value *SV = Builder.CreateShuffleVector( 1746e8d8bef9SDimitry Andric Builder.CreateBitCast(Op0, ShufTy), 1747e8d8bef9SDimitry Andric ConstantAggregateZero::get(ShufTy), ShuffleMask); 1748e8d8bef9SDimitry Andric return Builder.CreateBitCast(SV, II.getType()); 1749e8d8bef9SDimitry Andric } 1750e8d8bef9SDimitry Andric 1751e8d8bef9SDimitry Andric // Constant Fold - shift Index'th bit to lowest position and mask off 1752e8d8bef9SDimitry Andric // Length bits. 1753e8d8bef9SDimitry Andric if (CI0) { 1754e8d8bef9SDimitry Andric APInt Elt = CI0->getValue(); 1755e8d8bef9SDimitry Andric Elt.lshrInPlace(Index); 1756e8d8bef9SDimitry Andric Elt = Elt.zextOrTrunc(Length); 1757e8d8bef9SDimitry Andric return LowConstantHighUndef(Elt.getZExtValue()); 1758e8d8bef9SDimitry Andric } 1759e8d8bef9SDimitry Andric 1760e8d8bef9SDimitry Andric // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI. 1761e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) { 1762e8d8bef9SDimitry Andric Value *Args[] = {Op0, CILength, CIIndex}; 1763e8d8bef9SDimitry Andric Module *M = II.getModule(); 1764e8d8bef9SDimitry Andric Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi); 1765e8d8bef9SDimitry Andric return Builder.CreateCall(F, Args); 1766e8d8bef9SDimitry Andric } 1767e8d8bef9SDimitry Andric } 1768e8d8bef9SDimitry Andric 1769e8d8bef9SDimitry Andric // Constant Fold - extraction from zero is always {zero, undef}. 1770e8d8bef9SDimitry Andric if (CI0 && CI0->isZero()) 1771e8d8bef9SDimitry Andric return LowConstantHighUndef(0); 1772e8d8bef9SDimitry Andric 1773e8d8bef9SDimitry Andric return nullptr; 1774e8d8bef9SDimitry Andric } 1775e8d8bef9SDimitry Andric 1776e8d8bef9SDimitry Andric /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant 1777e8d8bef9SDimitry Andric /// folding or conversion to a shuffle vector. 1778e8d8bef9SDimitry Andric static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, 1779e8d8bef9SDimitry Andric APInt APLength, APInt APIndex, 1780e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1781e8d8bef9SDimitry Andric // From AMD documentation: "The bit index and field length are each six bits 1782e8d8bef9SDimitry Andric // in length other bits of the field are ignored." 1783e8d8bef9SDimitry Andric APIndex = APIndex.zextOrTrunc(6); 1784e8d8bef9SDimitry Andric APLength = APLength.zextOrTrunc(6); 1785e8d8bef9SDimitry Andric 1786e8d8bef9SDimitry Andric // Attempt to constant fold. 1787e8d8bef9SDimitry Andric unsigned Index = APIndex.getZExtValue(); 1788e8d8bef9SDimitry Andric 1789e8d8bef9SDimitry Andric // From AMD documentation: "a value of zero in the field length is 1790e8d8bef9SDimitry Andric // defined as length of 64". 1791e8d8bef9SDimitry Andric unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue(); 1792e8d8bef9SDimitry Andric 1793e8d8bef9SDimitry Andric // From AMD documentation: "If the sum of the bit index + length field 1794e8d8bef9SDimitry Andric // is greater than 64, the results are undefined". 1795e8d8bef9SDimitry Andric unsigned End = Index + Length; 1796e8d8bef9SDimitry Andric 1797e8d8bef9SDimitry Andric // Note that both field index and field length are 8-bit quantities. 1798e8d8bef9SDimitry Andric // Since variables 'Index' and 'Length' are unsigned values 1799e8d8bef9SDimitry Andric // obtained from zero-extending field index and field length 1800e8d8bef9SDimitry Andric // respectively, their sum should never wrap around. 1801e8d8bef9SDimitry Andric if (End > 64) 1802e8d8bef9SDimitry Andric return UndefValue::get(II.getType()); 1803e8d8bef9SDimitry Andric 1804e8d8bef9SDimitry Andric // If we are inserting whole bytes, we can convert this to a shuffle. 1805e8d8bef9SDimitry Andric // Lowering can recognize INSERTQI shuffle masks. 1806e8d8bef9SDimitry Andric if ((Length % 8) == 0 && (Index % 8) == 0) { 1807e8d8bef9SDimitry Andric // Convert bit indices to byte indices. 1808e8d8bef9SDimitry Andric Length /= 8; 1809e8d8bef9SDimitry Andric Index /= 8; 1810e8d8bef9SDimitry Andric 1811e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1812e8d8bef9SDimitry Andric auto *ShufTy = FixedVectorType::get(IntTy8, 16); 1813e8d8bef9SDimitry Andric 1814e8d8bef9SDimitry Andric SmallVector<int, 16> ShuffleMask; 1815e8d8bef9SDimitry Andric for (int i = 0; i != (int)Index; ++i) 1816e8d8bef9SDimitry Andric ShuffleMask.push_back(i); 1817e8d8bef9SDimitry Andric for (int i = 0; i != (int)Length; ++i) 1818e8d8bef9SDimitry Andric ShuffleMask.push_back(i + 16); 1819e8d8bef9SDimitry Andric for (int i = Index + Length; i != 8; ++i) 1820e8d8bef9SDimitry Andric ShuffleMask.push_back(i); 1821e8d8bef9SDimitry Andric for (int i = 8; i != 16; ++i) 1822e8d8bef9SDimitry Andric ShuffleMask.push_back(-1); 1823e8d8bef9SDimitry Andric 1824e8d8bef9SDimitry Andric Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy), 1825e8d8bef9SDimitry Andric Builder.CreateBitCast(Op1, ShufTy), 1826e8d8bef9SDimitry Andric ShuffleMask); 1827e8d8bef9SDimitry Andric return Builder.CreateBitCast(SV, II.getType()); 1828e8d8bef9SDimitry Andric } 1829e8d8bef9SDimitry Andric 1830e8d8bef9SDimitry Andric // See if we're dealing with constant values. 1831fe6060f1SDimitry Andric auto *C0 = dyn_cast<Constant>(Op0); 1832fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 1833fe6060f1SDimitry Andric auto *CI00 = 1834e8d8bef9SDimitry Andric C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0)) 1835e8d8bef9SDimitry Andric : nullptr; 1836fe6060f1SDimitry Andric auto *CI10 = 1837e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 1838e8d8bef9SDimitry Andric : nullptr; 1839e8d8bef9SDimitry Andric 1840e8d8bef9SDimitry Andric // Constant Fold - insert bottom Length bits starting at the Index'th bit. 1841e8d8bef9SDimitry Andric if (CI00 && CI10) { 1842e8d8bef9SDimitry Andric APInt V00 = CI00->getValue(); 1843e8d8bef9SDimitry Andric APInt V10 = CI10->getValue(); 1844e8d8bef9SDimitry Andric APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index); 1845e8d8bef9SDimitry Andric V00 = V00 & ~Mask; 1846e8d8bef9SDimitry Andric V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index); 1847e8d8bef9SDimitry Andric APInt Val = V00 | V10; 1848e8d8bef9SDimitry Andric Type *IntTy64 = Type::getInt64Ty(II.getContext()); 1849e8d8bef9SDimitry Andric Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()), 1850e8d8bef9SDimitry Andric UndefValue::get(IntTy64)}; 1851e8d8bef9SDimitry Andric return ConstantVector::get(Args); 1852e8d8bef9SDimitry Andric } 1853e8d8bef9SDimitry Andric 1854e8d8bef9SDimitry Andric // If we were an INSERTQ call, we'll save demanded elements if we convert to 1855e8d8bef9SDimitry Andric // INSERTQI. 1856e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) { 1857e8d8bef9SDimitry Andric Type *IntTy8 = Type::getInt8Ty(II.getContext()); 1858e8d8bef9SDimitry Andric Constant *CILength = ConstantInt::get(IntTy8, Length, false); 1859e8d8bef9SDimitry Andric Constant *CIIndex = ConstantInt::get(IntTy8, Index, false); 1860e8d8bef9SDimitry Andric 1861e8d8bef9SDimitry Andric Value *Args[] = {Op0, Op1, CILength, CIIndex}; 1862e8d8bef9SDimitry Andric Module *M = II.getModule(); 1863e8d8bef9SDimitry Andric Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 1864e8d8bef9SDimitry Andric return Builder.CreateCall(F, Args); 1865e8d8bef9SDimitry Andric } 1866e8d8bef9SDimitry Andric 1867e8d8bef9SDimitry Andric return nullptr; 1868e8d8bef9SDimitry Andric } 1869e8d8bef9SDimitry Andric 1870e8d8bef9SDimitry Andric /// Attempt to convert pshufb* to shufflevector if the mask is constant. 1871e8d8bef9SDimitry Andric static Value *simplifyX86pshufb(const IntrinsicInst &II, 1872e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1873fe6060f1SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 1874e8d8bef9SDimitry Andric if (!V) 1875e8d8bef9SDimitry Andric return nullptr; 1876e8d8bef9SDimitry Andric 1877e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1878e8d8bef9SDimitry Andric unsigned NumElts = VecTy->getNumElements(); 1879e8d8bef9SDimitry Andric assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && 1880e8d8bef9SDimitry Andric "Unexpected number of elements in shuffle mask!"); 1881e8d8bef9SDimitry Andric 1882e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 1883e8d8bef9SDimitry Andric int Indexes[64]; 1884e8d8bef9SDimitry Andric 1885e8d8bef9SDimitry Andric // Each byte in the shuffle control mask forms an index to permute the 1886e8d8bef9SDimitry Andric // corresponding byte in the destination operand. 1887e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 1888e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 1889e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 1890e8d8bef9SDimitry Andric return nullptr; 1891e8d8bef9SDimitry Andric 1892e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 1893e8d8bef9SDimitry Andric Indexes[I] = -1; 1894e8d8bef9SDimitry Andric continue; 1895e8d8bef9SDimitry Andric } 1896e8d8bef9SDimitry Andric 1897e8d8bef9SDimitry Andric int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue(); 1898e8d8bef9SDimitry Andric 1899e8d8bef9SDimitry Andric // If the most significant bit (bit[7]) of each byte of the shuffle 1900e8d8bef9SDimitry Andric // control mask is set, then zero is written in the result byte. 1901e8d8bef9SDimitry Andric // The zero vector is in the right-hand side of the resulting 1902e8d8bef9SDimitry Andric // shufflevector. 1903e8d8bef9SDimitry Andric 1904e8d8bef9SDimitry Andric // The value of each index for the high 128-bit lane is the least 1905e8d8bef9SDimitry Andric // significant 4 bits of the respective shuffle control byte. 1906e8d8bef9SDimitry Andric Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0); 1907e8d8bef9SDimitry Andric Indexes[I] = Index; 1908e8d8bef9SDimitry Andric } 1909e8d8bef9SDimitry Andric 1910e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 1911e8d8bef9SDimitry Andric auto V2 = Constant::getNullValue(VecTy); 1912bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts)); 1913e8d8bef9SDimitry Andric } 1914e8d8bef9SDimitry Andric 1915e8d8bef9SDimitry Andric /// Attempt to convert vpermilvar* to shufflevector if the mask is constant. 1916e8d8bef9SDimitry Andric static Value *simplifyX86vpermilvar(const IntrinsicInst &II, 1917e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1918fe6060f1SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 1919e8d8bef9SDimitry Andric if (!V) 1920e8d8bef9SDimitry Andric return nullptr; 1921e8d8bef9SDimitry Andric 1922e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1923e8d8bef9SDimitry Andric unsigned NumElts = VecTy->getNumElements(); 1924e8d8bef9SDimitry Andric bool IsPD = VecTy->getScalarType()->isDoubleTy(); 1925e8d8bef9SDimitry Andric unsigned NumLaneElts = IsPD ? 2 : 4; 1926e8d8bef9SDimitry Andric assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2); 1927e8d8bef9SDimitry Andric 1928e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 1929e8d8bef9SDimitry Andric int Indexes[16]; 1930e8d8bef9SDimitry Andric 1931e8d8bef9SDimitry Andric // The intrinsics only read one or two bits, clear the rest. 1932e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumElts; ++I) { 1933e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 1934e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 1935e8d8bef9SDimitry Andric return nullptr; 1936e8d8bef9SDimitry Andric 1937e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 1938e8d8bef9SDimitry Andric Indexes[I] = -1; 1939e8d8bef9SDimitry Andric continue; 1940e8d8bef9SDimitry Andric } 1941e8d8bef9SDimitry Andric 1942e8d8bef9SDimitry Andric APInt Index = cast<ConstantInt>(COp)->getValue(); 1943e8d8bef9SDimitry Andric Index = Index.zextOrTrunc(32).getLoBits(2); 1944e8d8bef9SDimitry Andric 1945e8d8bef9SDimitry Andric // The PD variants uses bit 1 to select per-lane element index, so 1946e8d8bef9SDimitry Andric // shift down to convert to generic shuffle mask index. 1947e8d8bef9SDimitry Andric if (IsPD) 1948e8d8bef9SDimitry Andric Index.lshrInPlace(1); 1949e8d8bef9SDimitry Andric 1950e8d8bef9SDimitry Andric // The _256 variants are a bit trickier since the mask bits always index 1951e8d8bef9SDimitry Andric // into the corresponding 128 half. In order to convert to a generic 1952e8d8bef9SDimitry Andric // shuffle, we have to make that explicit. 1953e8d8bef9SDimitry Andric Index += APInt(32, (I / NumLaneElts) * NumLaneElts); 1954e8d8bef9SDimitry Andric 1955e8d8bef9SDimitry Andric Indexes[I] = Index.getZExtValue(); 1956e8d8bef9SDimitry Andric } 1957e8d8bef9SDimitry Andric 1958e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 1959bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts)); 1960e8d8bef9SDimitry Andric } 1961e8d8bef9SDimitry Andric 1962e8d8bef9SDimitry Andric /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant. 1963e8d8bef9SDimitry Andric static Value *simplifyX86vpermv(const IntrinsicInst &II, 1964e8d8bef9SDimitry Andric InstCombiner::BuilderTy &Builder) { 1965e8d8bef9SDimitry Andric auto *V = dyn_cast<Constant>(II.getArgOperand(1)); 1966e8d8bef9SDimitry Andric if (!V) 1967e8d8bef9SDimitry Andric return nullptr; 1968e8d8bef9SDimitry Andric 1969e8d8bef9SDimitry Andric auto *VecTy = cast<FixedVectorType>(II.getType()); 1970e8d8bef9SDimitry Andric unsigned Size = VecTy->getNumElements(); 1971e8d8bef9SDimitry Andric assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) && 1972e8d8bef9SDimitry Andric "Unexpected shuffle mask size"); 1973e8d8bef9SDimitry Andric 1974e8d8bef9SDimitry Andric // Construct a shuffle mask from constant integers or UNDEFs. 1975e8d8bef9SDimitry Andric int Indexes[64]; 1976e8d8bef9SDimitry Andric 1977e8d8bef9SDimitry Andric for (unsigned I = 0; I < Size; ++I) { 1978e8d8bef9SDimitry Andric Constant *COp = V->getAggregateElement(I); 1979e8d8bef9SDimitry Andric if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) 1980e8d8bef9SDimitry Andric return nullptr; 1981e8d8bef9SDimitry Andric 1982e8d8bef9SDimitry Andric if (isa<UndefValue>(COp)) { 1983e8d8bef9SDimitry Andric Indexes[I] = -1; 1984e8d8bef9SDimitry Andric continue; 1985e8d8bef9SDimitry Andric } 1986e8d8bef9SDimitry Andric 1987e8d8bef9SDimitry Andric uint32_t Index = cast<ConstantInt>(COp)->getZExtValue(); 1988e8d8bef9SDimitry Andric Index &= Size - 1; 1989e8d8bef9SDimitry Andric Indexes[I] = Index; 1990e8d8bef9SDimitry Andric } 1991e8d8bef9SDimitry Andric 1992e8d8bef9SDimitry Andric auto V1 = II.getArgOperand(0); 1993bdd1243dSDimitry Andric return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size)); 1994e8d8bef9SDimitry Andric } 1995e8d8bef9SDimitry Andric 1996bdd1243dSDimitry Andric std::optional<Instruction *> 1997e8d8bef9SDimitry Andric X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 1998e8d8bef9SDimitry Andric auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width, 1999e8d8bef9SDimitry Andric unsigned DemandedWidth) { 2000e8d8bef9SDimitry Andric APInt UndefElts(Width, 0); 2001e8d8bef9SDimitry Andric APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth); 2002e8d8bef9SDimitry Andric return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts); 2003e8d8bef9SDimitry Andric }; 2004e8d8bef9SDimitry Andric 2005e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID(); 2006e8d8bef9SDimitry Andric switch (IID) { 2007e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bextr_32: 2008e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bextr_64: 2009e8d8bef9SDimitry Andric case Intrinsic::x86_tbm_bextri_u32: 2010e8d8bef9SDimitry Andric case Intrinsic::x86_tbm_bextri_u64: 2011e8d8bef9SDimitry Andric // If the RHS is a constant we can try some simplifications. 2012e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2013e8d8bef9SDimitry Andric uint64_t Shift = C->getZExtValue(); 2014e8d8bef9SDimitry Andric uint64_t Length = (Shift >> 8) & 0xff; 2015e8d8bef9SDimitry Andric Shift &= 0xff; 2016e8d8bef9SDimitry Andric unsigned BitWidth = II.getType()->getIntegerBitWidth(); 2017e8d8bef9SDimitry Andric // If the length is 0 or the shift is out of range, replace with zero. 2018e8d8bef9SDimitry Andric if (Length == 0 || Shift >= BitWidth) { 2019e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2020e8d8bef9SDimitry Andric } 2021e8d8bef9SDimitry Andric // If the LHS is also a constant, we can completely constant fold this. 2022e8d8bef9SDimitry Andric if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2023e8d8bef9SDimitry Andric uint64_t Result = InC->getZExtValue() >> Shift; 2024e8d8bef9SDimitry Andric if (Length > BitWidth) 2025e8d8bef9SDimitry Andric Length = BitWidth; 2026e8d8bef9SDimitry Andric Result &= maskTrailingOnes<uint64_t>(Length); 2027e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2028e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2029e8d8bef9SDimitry Andric } 2030e8d8bef9SDimitry Andric // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we 2031e8d8bef9SDimitry Andric // are only masking bits that a shift already cleared? 2032e8d8bef9SDimitry Andric } 2033e8d8bef9SDimitry Andric break; 2034e8d8bef9SDimitry Andric 2035e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bzhi_32: 2036e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_bzhi_64: 2037e8d8bef9SDimitry Andric // If the RHS is a constant we can try some simplifications. 2038e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2039e8d8bef9SDimitry Andric uint64_t Index = C->getZExtValue() & 0xff; 2040e8d8bef9SDimitry Andric unsigned BitWidth = II.getType()->getIntegerBitWidth(); 2041e8d8bef9SDimitry Andric if (Index >= BitWidth) { 2042e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2043e8d8bef9SDimitry Andric } 2044e8d8bef9SDimitry Andric if (Index == 0) { 2045e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2046e8d8bef9SDimitry Andric } 2047e8d8bef9SDimitry Andric // If the LHS is also a constant, we can completely constant fold this. 2048e8d8bef9SDimitry Andric if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2049e8d8bef9SDimitry Andric uint64_t Result = InC->getZExtValue(); 2050e8d8bef9SDimitry Andric Result &= maskTrailingOnes<uint64_t>(Index); 2051e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2052e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2053e8d8bef9SDimitry Andric } 2054e8d8bef9SDimitry Andric // TODO should we convert this to an AND if the RHS is constant? 2055e8d8bef9SDimitry Andric } 2056e8d8bef9SDimitry Andric break; 2057e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pext_32: 2058e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pext_64: 2059e8d8bef9SDimitry Andric if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2060e8d8bef9SDimitry Andric if (MaskC->isNullValue()) { 2061e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2062e8d8bef9SDimitry Andric } 2063e8d8bef9SDimitry Andric if (MaskC->isAllOnesValue()) { 2064e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2065e8d8bef9SDimitry Andric } 2066e8d8bef9SDimitry Andric 206781ad6265SDimitry Andric unsigned MaskIdx, MaskLen; 206881ad6265SDimitry Andric if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { 2069e8d8bef9SDimitry Andric // any single contingous sequence of 1s anywhere in the mask simply 2070e8d8bef9SDimitry Andric // describes a subset of the input bits shifted to the appropriate 2071e8d8bef9SDimitry Andric // position. Replace with the straight forward IR. 2072e8d8bef9SDimitry Andric Value *Input = II.getArgOperand(0); 2073e8d8bef9SDimitry Andric Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1)); 207481ad6265SDimitry Andric Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); 207581ad6265SDimitry Andric Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt); 2076e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Shifted); 2077e8d8bef9SDimitry Andric } 2078e8d8bef9SDimitry Andric 2079e8d8bef9SDimitry Andric if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2080e8d8bef9SDimitry Andric uint64_t Src = SrcC->getZExtValue(); 2081e8d8bef9SDimitry Andric uint64_t Mask = MaskC->getZExtValue(); 2082e8d8bef9SDimitry Andric uint64_t Result = 0; 2083e8d8bef9SDimitry Andric uint64_t BitToSet = 1; 2084e8d8bef9SDimitry Andric 2085e8d8bef9SDimitry Andric while (Mask) { 2086e8d8bef9SDimitry Andric // Isolate lowest set bit. 2087e8d8bef9SDimitry Andric uint64_t BitToTest = Mask & -Mask; 2088e8d8bef9SDimitry Andric if (BitToTest & Src) 2089e8d8bef9SDimitry Andric Result |= BitToSet; 2090e8d8bef9SDimitry Andric 2091e8d8bef9SDimitry Andric BitToSet <<= 1; 2092e8d8bef9SDimitry Andric // Clear lowest set bit. 2093e8d8bef9SDimitry Andric Mask &= Mask - 1; 2094e8d8bef9SDimitry Andric } 2095e8d8bef9SDimitry Andric 2096e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2097e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2098e8d8bef9SDimitry Andric } 2099e8d8bef9SDimitry Andric } 2100e8d8bef9SDimitry Andric break; 2101e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pdep_32: 2102e8d8bef9SDimitry Andric case Intrinsic::x86_bmi_pdep_64: 2103e8d8bef9SDimitry Andric if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) { 2104e8d8bef9SDimitry Andric if (MaskC->isNullValue()) { 2105e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0)); 2106e8d8bef9SDimitry Andric } 2107e8d8bef9SDimitry Andric if (MaskC->isAllOnesValue()) { 2108e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, II.getArgOperand(0)); 2109e8d8bef9SDimitry Andric } 211081ad6265SDimitry Andric 211181ad6265SDimitry Andric unsigned MaskIdx, MaskLen; 211281ad6265SDimitry Andric if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) { 2113e8d8bef9SDimitry Andric // any single contingous sequence of 1s anywhere in the mask simply 2114e8d8bef9SDimitry Andric // describes a subset of the input bits shifted to the appropriate 2115e8d8bef9SDimitry Andric // position. Replace with the straight forward IR. 2116e8d8bef9SDimitry Andric Value *Input = II.getArgOperand(0); 211781ad6265SDimitry Andric Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx); 211881ad6265SDimitry Andric Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt); 2119e8d8bef9SDimitry Andric Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1)); 2120e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Masked); 2121e8d8bef9SDimitry Andric } 2122e8d8bef9SDimitry Andric 2123e8d8bef9SDimitry Andric if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) { 2124e8d8bef9SDimitry Andric uint64_t Src = SrcC->getZExtValue(); 2125e8d8bef9SDimitry Andric uint64_t Mask = MaskC->getZExtValue(); 2126e8d8bef9SDimitry Andric uint64_t Result = 0; 2127e8d8bef9SDimitry Andric uint64_t BitToTest = 1; 2128e8d8bef9SDimitry Andric 2129e8d8bef9SDimitry Andric while (Mask) { 2130e8d8bef9SDimitry Andric // Isolate lowest set bit. 2131e8d8bef9SDimitry Andric uint64_t BitToSet = Mask & -Mask; 2132e8d8bef9SDimitry Andric if (BitToTest & Src) 2133e8d8bef9SDimitry Andric Result |= BitToSet; 2134e8d8bef9SDimitry Andric 2135e8d8bef9SDimitry Andric BitToTest <<= 1; 2136e8d8bef9SDimitry Andric // Clear lowest set bit; 2137e8d8bef9SDimitry Andric Mask &= Mask - 1; 2138e8d8bef9SDimitry Andric } 2139e8d8bef9SDimitry Andric 2140e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2141e8d8bef9SDimitry Andric ConstantInt::get(II.getType(), Result)); 2142e8d8bef9SDimitry Andric } 2143e8d8bef9SDimitry Andric } 2144e8d8bef9SDimitry Andric break; 2145e8d8bef9SDimitry Andric 2146e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvtss2si: 2147e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvtss2si64: 2148e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvttss2si: 2149e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cvttss2si64: 2150e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvtsd2si: 2151e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvtsd2si64: 2152e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si: 2153e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cvttsd2si64: 2154e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2si32: 2155e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2si64: 2156e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2usi32: 2157e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtss2usi64: 2158e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2si32: 2159e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2si64: 2160e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2usi32: 2161e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcvtsd2usi64: 2162e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2si: 2163e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2si64: 2164e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2usi: 2165e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttss2usi64: 2166e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2si: 2167e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2si64: 2168e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2usi: 2169e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_cvttsd2usi64: { 2170e8d8bef9SDimitry Andric // These intrinsics only demand the 0th element of their input vectors. If 2171e8d8bef9SDimitry Andric // we can simplify the input based on that, do so now. 2172e8d8bef9SDimitry Andric Value *Arg = II.getArgOperand(0); 2173e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements(); 2174e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) { 2175e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2176e8d8bef9SDimitry Andric } 2177e8d8bef9SDimitry Andric break; 2178e8d8bef9SDimitry Andric } 2179e8d8bef9SDimitry Andric 2180e8d8bef9SDimitry Andric case Intrinsic::x86_mmx_pmovmskb: 2181e8d8bef9SDimitry Andric case Intrinsic::x86_sse_movmsk_ps: 2182e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_movmsk_pd: 2183e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pmovmskb_128: 2184e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_pd_256: 2185e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_ps_256: 2186e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pmovmskb: 2187e8d8bef9SDimitry Andric if (Value *V = simplifyX86movmsk(II, IC.Builder)) { 2188e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2189e8d8bef9SDimitry Andric } 2190e8d8bef9SDimitry Andric break; 2191e8d8bef9SDimitry Andric 2192e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comieq_ss: 2193e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comige_ss: 2194e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comigt_ss: 2195e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comile_ss: 2196e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comilt_ss: 2197e8d8bef9SDimitry Andric case Intrinsic::x86_sse_comineq_ss: 2198e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomieq_ss: 2199e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomige_ss: 2200e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomigt_ss: 2201e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomile_ss: 2202e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomilt_ss: 2203e8d8bef9SDimitry Andric case Intrinsic::x86_sse_ucomineq_ss: 2204e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comieq_sd: 2205e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comige_sd: 2206e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comigt_sd: 2207e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comile_sd: 2208e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comilt_sd: 2209e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_comineq_sd: 2210e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomieq_sd: 2211e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomige_sd: 2212e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomigt_sd: 2213e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomile_sd: 2214e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomilt_sd: 2215e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_ucomineq_sd: 2216e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcomi_ss: 2217e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vcomi_sd: 2218e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_cmp_ss: 2219e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_cmp_sd: { 2220e8d8bef9SDimitry Andric // These intrinsics only demand the 0th element of their input vectors. If 2221e8d8bef9SDimitry Andric // we can simplify the input based on that, do so now. 2222e8d8bef9SDimitry Andric bool MadeChange = false; 2223e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2224e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2225e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements(); 2226e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) { 2227e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2228e8d8bef9SDimitry Andric MadeChange = true; 2229e8d8bef9SDimitry Andric } 2230e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) { 2231e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2232e8d8bef9SDimitry Andric MadeChange = true; 2233e8d8bef9SDimitry Andric } 2234e8d8bef9SDimitry Andric if (MadeChange) { 2235e8d8bef9SDimitry Andric return &II; 2236e8d8bef9SDimitry Andric } 2237e8d8bef9SDimitry Andric break; 2238e8d8bef9SDimitry Andric } 2239e8d8bef9SDimitry Andric 2240e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_ps_512: 2241e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_ps_512: 2242e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_ps_512: 2243e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_ps_512: 2244e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_pd_512: 2245e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_pd_512: 2246e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_pd_512: 2247e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_pd_512: 2248e8d8bef9SDimitry Andric // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2249e8d8bef9SDimitry Andric // IR operations. 2250e8d8bef9SDimitry Andric if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) { 2251e8d8bef9SDimitry Andric if (R->getValue() == 4) { 2252e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2253e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2254e8d8bef9SDimitry Andric 2255e8d8bef9SDimitry Andric Value *V; 2256e8d8bef9SDimitry Andric switch (IID) { 2257e8d8bef9SDimitry Andric default: 2258e8d8bef9SDimitry Andric llvm_unreachable("Case stmts out of sync!"); 2259e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_ps_512: 2260e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_add_pd_512: 2261e8d8bef9SDimitry Andric V = IC.Builder.CreateFAdd(Arg0, Arg1); 2262e8d8bef9SDimitry Andric break; 2263e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_ps_512: 2264e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_sub_pd_512: 2265e8d8bef9SDimitry Andric V = IC.Builder.CreateFSub(Arg0, Arg1); 2266e8d8bef9SDimitry Andric break; 2267e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_ps_512: 2268e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mul_pd_512: 2269e8d8bef9SDimitry Andric V = IC.Builder.CreateFMul(Arg0, Arg1); 2270e8d8bef9SDimitry Andric break; 2271e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_ps_512: 2272e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_div_pd_512: 2273e8d8bef9SDimitry Andric V = IC.Builder.CreateFDiv(Arg0, Arg1); 2274e8d8bef9SDimitry Andric break; 2275e8d8bef9SDimitry Andric } 2276e8d8bef9SDimitry Andric 2277e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2278e8d8bef9SDimitry Andric } 2279e8d8bef9SDimitry Andric } 2280e8d8bef9SDimitry Andric break; 2281e8d8bef9SDimitry Andric 2282e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 2283e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 2284e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 2285e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 2286e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 2287e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 2288e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 2289e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 2290e8d8bef9SDimitry Andric // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular 2291e8d8bef9SDimitry Andric // IR operations. 2292e8d8bef9SDimitry Andric if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) { 2293e8d8bef9SDimitry Andric if (R->getValue() == 4) { 2294e8d8bef9SDimitry Andric // Extract the element as scalars. 2295e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2296e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2297e8d8bef9SDimitry Andric Value *LHS = IC.Builder.CreateExtractElement(Arg0, (uint64_t)0); 2298e8d8bef9SDimitry Andric Value *RHS = IC.Builder.CreateExtractElement(Arg1, (uint64_t)0); 2299e8d8bef9SDimitry Andric 2300e8d8bef9SDimitry Andric Value *V; 2301e8d8bef9SDimitry Andric switch (IID) { 2302e8d8bef9SDimitry Andric default: 2303e8d8bef9SDimitry Andric llvm_unreachable("Case stmts out of sync!"); 2304e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 2305e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 2306e8d8bef9SDimitry Andric V = IC.Builder.CreateFAdd(LHS, RHS); 2307e8d8bef9SDimitry Andric break; 2308e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 2309e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 2310e8d8bef9SDimitry Andric V = IC.Builder.CreateFSub(LHS, RHS); 2311e8d8bef9SDimitry Andric break; 2312e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 2313e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 2314e8d8bef9SDimitry Andric V = IC.Builder.CreateFMul(LHS, RHS); 2315e8d8bef9SDimitry Andric break; 2316e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 2317e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 2318e8d8bef9SDimitry Andric V = IC.Builder.CreateFDiv(LHS, RHS); 2319e8d8bef9SDimitry Andric break; 2320e8d8bef9SDimitry Andric } 2321e8d8bef9SDimitry Andric 2322e8d8bef9SDimitry Andric // Handle the masking aspect of the intrinsic. 2323e8d8bef9SDimitry Andric Value *Mask = II.getArgOperand(3); 2324e8d8bef9SDimitry Andric auto *C = dyn_cast<ConstantInt>(Mask); 2325e8d8bef9SDimitry Andric // We don't need a select if we know the mask bit is a 1. 2326e8d8bef9SDimitry Andric if (!C || !C->getValue()[0]) { 2327e8d8bef9SDimitry Andric // Cast the mask to an i1 vector and then extract the lowest element. 2328e8d8bef9SDimitry Andric auto *MaskTy = FixedVectorType::get( 2329e8d8bef9SDimitry Andric IC.Builder.getInt1Ty(), 2330e8d8bef9SDimitry Andric cast<IntegerType>(Mask->getType())->getBitWidth()); 2331e8d8bef9SDimitry Andric Mask = IC.Builder.CreateBitCast(Mask, MaskTy); 2332e8d8bef9SDimitry Andric Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0); 2333e8d8bef9SDimitry Andric // Extract the lowest element from the passthru operand. 2334e8d8bef9SDimitry Andric Value *Passthru = 2335e8d8bef9SDimitry Andric IC.Builder.CreateExtractElement(II.getArgOperand(2), (uint64_t)0); 2336e8d8bef9SDimitry Andric V = IC.Builder.CreateSelect(Mask, V, Passthru); 2337e8d8bef9SDimitry Andric } 2338e8d8bef9SDimitry Andric 2339e8d8bef9SDimitry Andric // Insert the result back into the original argument 0. 2340e8d8bef9SDimitry Andric V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0); 2341e8d8bef9SDimitry Andric 2342e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2343e8d8bef9SDimitry Andric } 2344e8d8bef9SDimitry Andric } 2345e8d8bef9SDimitry Andric break; 2346e8d8bef9SDimitry Andric 2347e8d8bef9SDimitry Andric // Constant fold ashr( <A x Bi>, Ci ). 2348e8d8bef9SDimitry Andric // Constant fold lshr( <A x Bi>, Ci ). 2349e8d8bef9SDimitry Andric // Constant fold shl( <A x Bi>, Ci ). 2350e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_d: 2351e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrai_w: 2352e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_d: 2353e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrai_w: 2354e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_128: 2355e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_256: 2356e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_d_512: 2357e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_q_512: 2358e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrai_w_512: 2359e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_d: 2360e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_q: 2361e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrli_w: 2362e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_d: 2363e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_q: 2364e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrli_w: 2365e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_d_512: 2366e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_q_512: 2367e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrli_w_512: 2368e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_d: 2369e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_q: 2370e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pslli_w: 2371e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_d: 2372e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_q: 2373e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pslli_w: 2374e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_d_512: 2375e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_q_512: 2376e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pslli_w_512: 2377e8d8bef9SDimitry Andric if (Value *V = simplifyX86immShift(II, IC.Builder)) { 2378e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2379e8d8bef9SDimitry Andric } 2380e8d8bef9SDimitry Andric break; 2381e8d8bef9SDimitry Andric 2382e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_d: 2383e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psra_w: 2384e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_d: 2385e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psra_w: 2386e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_128: 2387e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_256: 2388e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_d_512: 2389e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_q_512: 2390e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psra_w_512: 2391e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_d: 2392e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_q: 2393e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psrl_w: 2394e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_d: 2395e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_q: 2396e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrl_w: 2397e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_d_512: 2398e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_q_512: 2399e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrl_w_512: 2400e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_d: 2401e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_q: 2402e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_psll_w: 2403e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_d: 2404e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_q: 2405e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psll_w: 2406e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_d_512: 2407e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_q_512: 2408e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psll_w_512: { 2409e8d8bef9SDimitry Andric if (Value *V = simplifyX86immShift(II, IC.Builder)) { 2410e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2411e8d8bef9SDimitry Andric } 2412e8d8bef9SDimitry Andric 2413e8d8bef9SDimitry Andric // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector 2414e8d8bef9SDimitry Andric // operand to compute the shift amount. 2415e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2416e8d8bef9SDimitry Andric assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 && 2417e8d8bef9SDimitry Andric "Unexpected packed shift size"); 2418e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements(); 2419e8d8bef9SDimitry Andric 2420e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) { 2421e8d8bef9SDimitry Andric return IC.replaceOperand(II, 1, V); 2422e8d8bef9SDimitry Andric } 2423e8d8bef9SDimitry Andric break; 2424e8d8bef9SDimitry Andric } 2425e8d8bef9SDimitry Andric 2426e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 2427e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 2428e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 2429e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 2430e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_d_512: 2431e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_q_512: 2432e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_128: 2433e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_256: 2434e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psllv_w_512: 2435e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 2436e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: 2437e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_128: 2438e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_256: 2439e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_d_512: 2440e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_q_512: 2441e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_128: 2442e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_256: 2443e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrav_w_512: 2444e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 2445e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 2446e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 2447e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 2448e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_d_512: 2449e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_q_512: 2450e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_128: 2451e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_256: 2452e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_psrlv_w_512: 2453e8d8bef9SDimitry Andric if (Value *V = simplifyX86varShift(II, IC.Builder)) { 2454e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2455e8d8bef9SDimitry Andric } 2456e8d8bef9SDimitry Andric break; 2457e8d8bef9SDimitry Andric 2458e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packssdw_128: 2459e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packsswb_128: 2460e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packssdw: 2461e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packsswb: 2462e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packssdw_512: 2463e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packsswb_512: 2464e8d8bef9SDimitry Andric if (Value *V = simplifyX86pack(II, IC.Builder, true)) { 2465e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2466e8d8bef9SDimitry Andric } 2467e8d8bef9SDimitry Andric break; 2468e8d8bef9SDimitry Andric 2469e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packuswb_128: 2470e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_packusdw: 2471e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packusdw: 2472e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packuswb: 2473e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packusdw_512: 2474e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packuswb_512: 2475e8d8bef9SDimitry Andric if (Value *V = simplifyX86pack(II, IC.Builder, false)) { 2476e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2477e8d8bef9SDimitry Andric } 2478e8d8bef9SDimitry Andric break; 2479e8d8bef9SDimitry Andric 2480e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq: 2481e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq_256: 2482e8d8bef9SDimitry Andric case Intrinsic::x86_pclmulqdq_512: { 2483e8d8bef9SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) { 2484e8d8bef9SDimitry Andric unsigned Imm = C->getZExtValue(); 2485e8d8bef9SDimitry Andric 2486e8d8bef9SDimitry Andric bool MadeChange = false; 2487e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0); 2488e8d8bef9SDimitry Andric Value *Arg1 = II.getArgOperand(1); 2489e8d8bef9SDimitry Andric unsigned VWidth = 2490e8d8bef9SDimitry Andric cast<FixedVectorType>(Arg0->getType())->getNumElements(); 2491e8d8bef9SDimitry Andric 2492e8d8bef9SDimitry Andric APInt UndefElts1(VWidth, 0); 2493e8d8bef9SDimitry Andric APInt DemandedElts1 = 2494e8d8bef9SDimitry Andric APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1)); 2495e8d8bef9SDimitry Andric if (Value *V = 2496e8d8bef9SDimitry Andric IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) { 2497e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2498e8d8bef9SDimitry Andric MadeChange = true; 2499e8d8bef9SDimitry Andric } 2500e8d8bef9SDimitry Andric 2501e8d8bef9SDimitry Andric APInt UndefElts2(VWidth, 0); 2502e8d8bef9SDimitry Andric APInt DemandedElts2 = 2503e8d8bef9SDimitry Andric APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1)); 2504e8d8bef9SDimitry Andric if (Value *V = 2505e8d8bef9SDimitry Andric IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) { 2506e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2507e8d8bef9SDimitry Andric MadeChange = true; 2508e8d8bef9SDimitry Andric } 2509e8d8bef9SDimitry Andric 2510e8d8bef9SDimitry Andric // If either input elements are undef, the result is zero. 2511e8d8bef9SDimitry Andric if (DemandedElts1.isSubsetOf(UndefElts1) || 2512e8d8bef9SDimitry Andric DemandedElts2.isSubsetOf(UndefElts2)) { 2513e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, 2514e8d8bef9SDimitry Andric ConstantAggregateZero::get(II.getType())); 2515e8d8bef9SDimitry Andric } 2516e8d8bef9SDimitry Andric 2517e8d8bef9SDimitry Andric if (MadeChange) { 2518e8d8bef9SDimitry Andric return &II; 2519e8d8bef9SDimitry Andric } 2520e8d8bef9SDimitry Andric } 2521e8d8bef9SDimitry Andric break; 2522e8d8bef9SDimitry Andric } 2523e8d8bef9SDimitry Andric 2524e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_insertps: 2525e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertps(II, IC.Builder)) { 2526e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2527e8d8bef9SDimitry Andric } 2528e8d8bef9SDimitry Andric break; 2529e8d8bef9SDimitry Andric 2530e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrq: { 2531e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2532e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2533e8d8bef9SDimitry Andric unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2534e8d8bef9SDimitry Andric unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements(); 2535e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2536e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 2537e8d8bef9SDimitry Andric VWidth1 == 16 && "Unexpected operand sizes"); 2538e8d8bef9SDimitry Andric 2539e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2540fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 2541fe6060f1SDimitry Andric auto *CILength = 2542e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0)) 2543e8d8bef9SDimitry Andric : nullptr; 2544fe6060f1SDimitry Andric auto *CIIndex = 2545e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 2546e8d8bef9SDimitry Andric : nullptr; 2547e8d8bef9SDimitry Andric 2548e8d8bef9SDimitry Andric // Attempt to simplify to a constant, shuffle vector or EXTRQI call. 2549e8d8bef9SDimitry Andric if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) { 2550e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2551e8d8bef9SDimitry Andric } 2552e8d8bef9SDimitry Andric 2553e8d8bef9SDimitry Andric // EXTRQ only uses the lowest 64-bits of the first 128-bit vector 2554e8d8bef9SDimitry Andric // operands and the lowest 16-bits of the second. 2555e8d8bef9SDimitry Andric bool MadeChange = false; 2556e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 2557e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2558e8d8bef9SDimitry Andric MadeChange = true; 2559e8d8bef9SDimitry Andric } 2560e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) { 2561e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2562e8d8bef9SDimitry Andric MadeChange = true; 2563e8d8bef9SDimitry Andric } 2564e8d8bef9SDimitry Andric if (MadeChange) { 2565e8d8bef9SDimitry Andric return &II; 2566e8d8bef9SDimitry Andric } 2567e8d8bef9SDimitry Andric break; 2568e8d8bef9SDimitry Andric } 2569e8d8bef9SDimitry Andric 2570e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrqi: { 2571e8d8bef9SDimitry Andric // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining 2572e8d8bef9SDimitry Andric // bits of the lower 64-bits. The upper 64-bits are undefined. 2573e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2574e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2575e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 2576e8d8bef9SDimitry Andric "Unexpected operand size"); 2577e8d8bef9SDimitry Andric 2578e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2579fe6060f1SDimitry Andric auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1)); 2580fe6060f1SDimitry Andric auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2)); 2581e8d8bef9SDimitry Andric 2582e8d8bef9SDimitry Andric // Attempt to simplify to a constant or shuffle vector. 2583e8d8bef9SDimitry Andric if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) { 2584e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2585e8d8bef9SDimitry Andric } 2586e8d8bef9SDimitry Andric 2587e8d8bef9SDimitry Andric // EXTRQI only uses the lowest 64-bits of the first 128-bit vector 2588e8d8bef9SDimitry Andric // operand. 2589e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 2590e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2591e8d8bef9SDimitry Andric } 2592e8d8bef9SDimitry Andric break; 2593e8d8bef9SDimitry Andric } 2594e8d8bef9SDimitry Andric 2595e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertq: { 2596e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2597e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2598e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2599e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2600e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 && 2601e8d8bef9SDimitry Andric cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 && 2602e8d8bef9SDimitry Andric "Unexpected operand size"); 2603e8d8bef9SDimitry Andric 2604e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2605fe6060f1SDimitry Andric auto *C1 = dyn_cast<Constant>(Op1); 2606fe6060f1SDimitry Andric auto *CI11 = 2607e8d8bef9SDimitry Andric C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1)) 2608e8d8bef9SDimitry Andric : nullptr; 2609e8d8bef9SDimitry Andric 2610e8d8bef9SDimitry Andric // Attempt to simplify to a constant, shuffle vector or INSERTQI call. 2611e8d8bef9SDimitry Andric if (CI11) { 2612e8d8bef9SDimitry Andric const APInt &V11 = CI11->getValue(); 2613e8d8bef9SDimitry Andric APInt Len = V11.zextOrTrunc(6); 2614e8d8bef9SDimitry Andric APInt Idx = V11.lshr(8).zextOrTrunc(6); 2615e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) { 2616e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2617e8d8bef9SDimitry Andric } 2618e8d8bef9SDimitry Andric } 2619e8d8bef9SDimitry Andric 2620e8d8bef9SDimitry Andric // INSERTQ only uses the lowest 64-bits of the first 128-bit vector 2621e8d8bef9SDimitry Andric // operand. 2622e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) { 2623e8d8bef9SDimitry Andric return IC.replaceOperand(II, 0, V); 2624e8d8bef9SDimitry Andric } 2625e8d8bef9SDimitry Andric break; 2626e8d8bef9SDimitry Andric } 2627e8d8bef9SDimitry Andric 2628e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertqi: { 2629e8d8bef9SDimitry Andric // INSERTQI: Extract lowest Length bits from lower half of second source and 2630e8d8bef9SDimitry Andric // insert over first source starting at Index bit. The upper 64-bits are 2631e8d8bef9SDimitry Andric // undefined. 2632e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2633e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2634e8d8bef9SDimitry Andric unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements(); 2635e8d8bef9SDimitry Andric unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements(); 2636e8d8bef9SDimitry Andric assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && 2637e8d8bef9SDimitry Andric Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 && 2638e8d8bef9SDimitry Andric VWidth1 == 2 && "Unexpected operand sizes"); 2639e8d8bef9SDimitry Andric 2640e8d8bef9SDimitry Andric // See if we're dealing with constant values. 2641fe6060f1SDimitry Andric auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2)); 2642fe6060f1SDimitry Andric auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3)); 2643e8d8bef9SDimitry Andric 2644e8d8bef9SDimitry Andric // Attempt to simplify to a constant or shuffle vector. 2645e8d8bef9SDimitry Andric if (CILength && CIIndex) { 2646e8d8bef9SDimitry Andric APInt Len = CILength->getValue().zextOrTrunc(6); 2647e8d8bef9SDimitry Andric APInt Idx = CIIndex->getValue().zextOrTrunc(6); 2648e8d8bef9SDimitry Andric if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) { 2649e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2650e8d8bef9SDimitry Andric } 2651e8d8bef9SDimitry Andric } 2652e8d8bef9SDimitry Andric 2653e8d8bef9SDimitry Andric // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector 2654e8d8bef9SDimitry Andric // operands. 2655e8d8bef9SDimitry Andric bool MadeChange = false; 2656e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) { 2657e8d8bef9SDimitry Andric IC.replaceOperand(II, 0, V); 2658e8d8bef9SDimitry Andric MadeChange = true; 2659e8d8bef9SDimitry Andric } 2660e8d8bef9SDimitry Andric if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) { 2661e8d8bef9SDimitry Andric IC.replaceOperand(II, 1, V); 2662e8d8bef9SDimitry Andric MadeChange = true; 2663e8d8bef9SDimitry Andric } 2664e8d8bef9SDimitry Andric if (MadeChange) { 2665e8d8bef9SDimitry Andric return &II; 2666e8d8bef9SDimitry Andric } 2667e8d8bef9SDimitry Andric break; 2668e8d8bef9SDimitry Andric } 2669e8d8bef9SDimitry Andric 2670e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_pblendvb: 2671e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_blendvps: 2672e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_blendvpd: 2673e8d8bef9SDimitry Andric case Intrinsic::x86_avx_blendv_ps_256: 2674e8d8bef9SDimitry Andric case Intrinsic::x86_avx_blendv_pd_256: 2675e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pblendvb: { 2676e8d8bef9SDimitry Andric // fold (blend A, A, Mask) -> A 2677e8d8bef9SDimitry Andric Value *Op0 = II.getArgOperand(0); 2678e8d8bef9SDimitry Andric Value *Op1 = II.getArgOperand(1); 2679e8d8bef9SDimitry Andric Value *Mask = II.getArgOperand(2); 2680e8d8bef9SDimitry Andric if (Op0 == Op1) { 2681e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 2682e8d8bef9SDimitry Andric } 2683e8d8bef9SDimitry Andric 2684e8d8bef9SDimitry Andric // Zero Mask - select 1st argument. 2685e8d8bef9SDimitry Andric if (isa<ConstantAggregateZero>(Mask)) { 2686e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, Op0); 2687e8d8bef9SDimitry Andric } 2688e8d8bef9SDimitry Andric 2689e8d8bef9SDimitry Andric // Constant Mask - select 1st/2nd argument lane based on top bit of mask. 2690e8d8bef9SDimitry Andric if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) { 2691e8d8bef9SDimitry Andric Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask); 2692e8d8bef9SDimitry Andric return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); 2693e8d8bef9SDimitry Andric } 2694e8d8bef9SDimitry Andric 2695e8d8bef9SDimitry Andric // Convert to a vector select if we can bypass casts and find a boolean 2696e8d8bef9SDimitry Andric // vector condition value. 2697e8d8bef9SDimitry Andric Value *BoolVec; 2698e8d8bef9SDimitry Andric Mask = InstCombiner::peekThroughBitcast(Mask); 2699e8d8bef9SDimitry Andric if (match(Mask, PatternMatch::m_SExt(PatternMatch::m_Value(BoolVec))) && 2700e8d8bef9SDimitry Andric BoolVec->getType()->isVectorTy() && 2701e8d8bef9SDimitry Andric BoolVec->getType()->getScalarSizeInBits() == 1) { 2702e8d8bef9SDimitry Andric assert(Mask->getType()->getPrimitiveSizeInBits() == 2703e8d8bef9SDimitry Andric II.getType()->getPrimitiveSizeInBits() && 2704e8d8bef9SDimitry Andric "Not expecting mask and operands with different sizes"); 2705e8d8bef9SDimitry Andric 2706e8d8bef9SDimitry Andric unsigned NumMaskElts = 2707e8d8bef9SDimitry Andric cast<FixedVectorType>(Mask->getType())->getNumElements(); 2708e8d8bef9SDimitry Andric unsigned NumOperandElts = 2709e8d8bef9SDimitry Andric cast<FixedVectorType>(II.getType())->getNumElements(); 2710e8d8bef9SDimitry Andric if (NumMaskElts == NumOperandElts) { 2711e8d8bef9SDimitry Andric return SelectInst::Create(BoolVec, Op1, Op0); 2712e8d8bef9SDimitry Andric } 2713e8d8bef9SDimitry Andric 2714e8d8bef9SDimitry Andric // If the mask has less elements than the operands, each mask bit maps to 2715e8d8bef9SDimitry Andric // multiple elements of the operands. Bitcast back and forth. 2716e8d8bef9SDimitry Andric if (NumMaskElts < NumOperandElts) { 2717e8d8bef9SDimitry Andric Value *CastOp0 = IC.Builder.CreateBitCast(Op0, Mask->getType()); 2718e8d8bef9SDimitry Andric Value *CastOp1 = IC.Builder.CreateBitCast(Op1, Mask->getType()); 2719e8d8bef9SDimitry Andric Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0); 2720e8d8bef9SDimitry Andric return new BitCastInst(Sel, II.getType()); 2721e8d8bef9SDimitry Andric } 2722e8d8bef9SDimitry Andric } 2723e8d8bef9SDimitry Andric 2724e8d8bef9SDimitry Andric break; 2725e8d8bef9SDimitry Andric } 2726e8d8bef9SDimitry Andric 2727e8d8bef9SDimitry Andric case Intrinsic::x86_ssse3_pshuf_b_128: 2728e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pshuf_b: 2729e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pshuf_b_512: 2730e8d8bef9SDimitry Andric if (Value *V = simplifyX86pshufb(II, IC.Builder)) { 2731e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2732e8d8bef9SDimitry Andric } 2733e8d8bef9SDimitry Andric break; 2734e8d8bef9SDimitry Andric 2735e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps: 2736e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps_256: 2737e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_ps_512: 2738e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd: 2739e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd_256: 2740e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_pd_512: 2741e8d8bef9SDimitry Andric if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) { 2742e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2743e8d8bef9SDimitry Andric } 2744e8d8bef9SDimitry Andric break; 2745e8d8bef9SDimitry Andric 2746e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permd: 2747e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permps: 2748e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_df_256: 2749e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_df_512: 2750e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_di_256: 2751e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_di_512: 2752e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_128: 2753e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_256: 2754e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_hi_512: 2755e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_128: 2756e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_256: 2757e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_qi_512: 2758e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_sf_512: 2759e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_permvar_si_512: 2760e8d8bef9SDimitry Andric if (Value *V = simplifyX86vpermv(II, IC.Builder)) { 2761e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2762e8d8bef9SDimitry Andric } 2763e8d8bef9SDimitry Andric break; 2764e8d8bef9SDimitry Andric 2765e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_ps: 2766e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_pd: 2767e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_ps_256: 2768e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskload_pd_256: 2769e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_d: 2770e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_q: 2771e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_d_256: 2772e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskload_q_256: 2773e8d8bef9SDimitry Andric if (Instruction *I = simplifyX86MaskedLoad(II, IC)) { 2774e8d8bef9SDimitry Andric return I; 2775e8d8bef9SDimitry Andric } 2776e8d8bef9SDimitry Andric break; 2777e8d8bef9SDimitry Andric 2778e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_maskmov_dqu: 2779e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_ps: 2780e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_pd: 2781e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_ps_256: 2782e8d8bef9SDimitry Andric case Intrinsic::x86_avx_maskstore_pd_256: 2783e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_d: 2784e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_q: 2785e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_d_256: 2786e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_maskstore_q_256: 2787e8d8bef9SDimitry Andric if (simplifyX86MaskedStore(II, IC)) { 2788e8d8bef9SDimitry Andric return nullptr; 2789e8d8bef9SDimitry Andric } 2790e8d8bef9SDimitry Andric break; 2791e8d8bef9SDimitry Andric 2792e8d8bef9SDimitry Andric case Intrinsic::x86_addcarry_32: 2793e8d8bef9SDimitry Andric case Intrinsic::x86_addcarry_64: 2794e8d8bef9SDimitry Andric if (Value *V = simplifyX86addcarry(II, IC.Builder)) { 2795e8d8bef9SDimitry Andric return IC.replaceInstUsesWith(II, V); 2796e8d8bef9SDimitry Andric } 2797e8d8bef9SDimitry Andric break; 2798e8d8bef9SDimitry Andric 2799*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_128: 2800*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_256: 2801*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_d_512: 2802*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_128: 2803*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_256: 2804*06c3fb27SDimitry Andric case Intrinsic::x86_avx512_pternlog_q_512: 2805*06c3fb27SDimitry Andric if (Value *V = simplifyTernarylogic(II, IC.Builder)) { 2806*06c3fb27SDimitry Andric return IC.replaceInstUsesWith(II, V); 2807*06c3fb27SDimitry Andric } 2808*06c3fb27SDimitry Andric break; 2809e8d8bef9SDimitry Andric default: 2810e8d8bef9SDimitry Andric break; 2811e8d8bef9SDimitry Andric } 2812bdd1243dSDimitry Andric return std::nullopt; 2813e8d8bef9SDimitry Andric } 2814e8d8bef9SDimitry Andric 2815bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic( 2816e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, 2817e8d8bef9SDimitry Andric bool &KnownBitsComputed) const { 2818e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 2819e8d8bef9SDimitry Andric default: 2820e8d8bef9SDimitry Andric break; 2821e8d8bef9SDimitry Andric case Intrinsic::x86_mmx_pmovmskb: 2822e8d8bef9SDimitry Andric case Intrinsic::x86_sse_movmsk_ps: 2823e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_movmsk_pd: 2824e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_pmovmskb_128: 2825e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_ps_256: 2826e8d8bef9SDimitry Andric case Intrinsic::x86_avx_movmsk_pd_256: 2827e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pmovmskb: { 2828e8d8bef9SDimitry Andric // MOVMSK copies the vector elements' sign bits to the low bits 2829e8d8bef9SDimitry Andric // and zeros the high bits. 2830e8d8bef9SDimitry Andric unsigned ArgWidth; 2831e8d8bef9SDimitry Andric if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) { 2832e8d8bef9SDimitry Andric ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>. 2833e8d8bef9SDimitry Andric } else { 2834fe6060f1SDimitry Andric auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType()); 2835e8d8bef9SDimitry Andric ArgWidth = ArgType->getNumElements(); 2836e8d8bef9SDimitry Andric } 2837e8d8bef9SDimitry Andric 2838e8d8bef9SDimitry Andric // If we don't need any of low bits then return zero, 2839e8d8bef9SDimitry Andric // we know that DemandedMask is non-zero already. 2840e8d8bef9SDimitry Andric APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); 2841e8d8bef9SDimitry Andric Type *VTy = II.getType(); 2842349cc55cSDimitry Andric if (DemandedElts.isZero()) { 2843e8d8bef9SDimitry Andric return ConstantInt::getNullValue(VTy); 2844e8d8bef9SDimitry Andric } 2845e8d8bef9SDimitry Andric 2846e8d8bef9SDimitry Andric // We know that the upper bits are set to zero. 2847e8d8bef9SDimitry Andric Known.Zero.setBitsFrom(ArgWidth); 2848e8d8bef9SDimitry Andric KnownBitsComputed = true; 2849e8d8bef9SDimitry Andric break; 2850e8d8bef9SDimitry Andric } 2851e8d8bef9SDimitry Andric } 2852bdd1243dSDimitry Andric return std::nullopt; 2853e8d8bef9SDimitry Andric } 2854e8d8bef9SDimitry Andric 2855bdd1243dSDimitry Andric std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( 2856e8d8bef9SDimitry Andric InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 2857e8d8bef9SDimitry Andric APInt &UndefElts2, APInt &UndefElts3, 2858e8d8bef9SDimitry Andric std::function<void(Instruction *, unsigned, APInt, APInt &)> 2859e8d8bef9SDimitry Andric simplifyAndSetOp) const { 2860e8d8bef9SDimitry Andric unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements(); 2861e8d8bef9SDimitry Andric switch (II.getIntrinsicID()) { 2862e8d8bef9SDimitry Andric default: 2863e8d8bef9SDimitry Andric break; 2864e8d8bef9SDimitry Andric case Intrinsic::x86_xop_vfrcz_ss: 2865e8d8bef9SDimitry Andric case Intrinsic::x86_xop_vfrcz_sd: 2866e8d8bef9SDimitry Andric // The instructions for these intrinsics are speced to zero upper bits not 2867e8d8bef9SDimitry Andric // pass them through like other scalar intrinsics. So we shouldn't just 2868e8d8bef9SDimitry Andric // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. 2869e8d8bef9SDimitry Andric // Instead we should return a zero vector. 2870e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 2871e8d8bef9SDimitry Andric IC.addToWorklist(&II); 2872e8d8bef9SDimitry Andric return ConstantAggregateZero::get(II.getType()); 2873e8d8bef9SDimitry Andric } 2874e8d8bef9SDimitry Andric 2875e8d8bef9SDimitry Andric // Only the lower element is used. 2876e8d8bef9SDimitry Andric DemandedElts = 1; 2877e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 2878e8d8bef9SDimitry Andric 2879e8d8bef9SDimitry Andric // Only the lower element is undefined. The high elements are zero. 2880e8d8bef9SDimitry Andric UndefElts = UndefElts[0]; 2881e8d8bef9SDimitry Andric break; 2882e8d8bef9SDimitry Andric 2883e8d8bef9SDimitry Andric // Unary scalar-as-vector operations that work column-wise. 2884e8d8bef9SDimitry Andric case Intrinsic::x86_sse_rcp_ss: 2885e8d8bef9SDimitry Andric case Intrinsic::x86_sse_rsqrt_ss: 2886e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 2887e8d8bef9SDimitry Andric 2888e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 2889e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 2890e8d8bef9SDimitry Andric IC.addToWorklist(&II); 2891e8d8bef9SDimitry Andric return II.getArgOperand(0); 2892e8d8bef9SDimitry Andric } 2893e8d8bef9SDimitry Andric // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions 2894e8d8bef9SDimitry Andric // checks). 2895e8d8bef9SDimitry Andric break; 2896e8d8bef9SDimitry Andric 2897e8d8bef9SDimitry Andric // Binary scalar-as-vector operations that work column-wise. The high 2898e8d8bef9SDimitry Andric // elements come from operand 0. The low element is a function of both 2899e8d8bef9SDimitry Andric // operands. 2900e8d8bef9SDimitry Andric case Intrinsic::x86_sse_min_ss: 2901e8d8bef9SDimitry Andric case Intrinsic::x86_sse_max_ss: 2902e8d8bef9SDimitry Andric case Intrinsic::x86_sse_cmp_ss: 2903e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_min_sd: 2904e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_max_sd: 2905e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_cmp_sd: { 2906e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 2907e8d8bef9SDimitry Andric 2908e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 2909e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 2910e8d8bef9SDimitry Andric IC.addToWorklist(&II); 2911e8d8bef9SDimitry Andric return II.getArgOperand(0); 2912e8d8bef9SDimitry Andric } 2913e8d8bef9SDimitry Andric 2914e8d8bef9SDimitry Andric // Only lower element is used for operand 1. 2915e8d8bef9SDimitry Andric DemandedElts = 1; 2916e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 2917e8d8bef9SDimitry Andric 2918e8d8bef9SDimitry Andric // Lower element is undefined if both lower elements are undefined. 2919e8d8bef9SDimitry Andric // Consider things like undef&0. The result is known zero, not undef. 2920e8d8bef9SDimitry Andric if (!UndefElts2[0]) 2921e8d8bef9SDimitry Andric UndefElts.clearBit(0); 2922e8d8bef9SDimitry Andric 2923e8d8bef9SDimitry Andric break; 2924e8d8bef9SDimitry Andric } 2925e8d8bef9SDimitry Andric 2926e8d8bef9SDimitry Andric // Binary scalar-as-vector operations that work column-wise. The high 2927e8d8bef9SDimitry Andric // elements come from operand 0 and the low element comes from operand 1. 2928e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_round_ss: 2929e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_round_sd: { 2930e8d8bef9SDimitry Andric // Don't use the low element of operand 0. 2931e8d8bef9SDimitry Andric APInt DemandedElts2 = DemandedElts; 2932e8d8bef9SDimitry Andric DemandedElts2.clearBit(0); 2933e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts); 2934e8d8bef9SDimitry Andric 2935e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 2936e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 2937e8d8bef9SDimitry Andric IC.addToWorklist(&II); 2938e8d8bef9SDimitry Andric return II.getArgOperand(0); 2939e8d8bef9SDimitry Andric } 2940e8d8bef9SDimitry Andric 2941e8d8bef9SDimitry Andric // Only lower element is used for operand 1. 2942e8d8bef9SDimitry Andric DemandedElts = 1; 2943e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 2944e8d8bef9SDimitry Andric 2945e8d8bef9SDimitry Andric // Take the high undef elements from operand 0 and take the lower element 2946e8d8bef9SDimitry Andric // from operand 1. 2947e8d8bef9SDimitry Andric UndefElts.clearBit(0); 2948e8d8bef9SDimitry Andric UndefElts |= UndefElts2[0]; 2949e8d8bef9SDimitry Andric break; 2950e8d8bef9SDimitry Andric } 2951e8d8bef9SDimitry Andric 2952e8d8bef9SDimitry Andric // Three input scalar-as-vector operations that work column-wise. The high 2953e8d8bef9SDimitry Andric // elements come from operand 0 and the low element is a function of all 2954e8d8bef9SDimitry Andric // three inputs. 2955e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_ss_round: 2956e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_ss_round: 2957e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_ss_round: 2958e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_ss_round: 2959e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_max_ss_round: 2960e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_min_ss_round: 2961e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_add_sd_round: 2962e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_div_sd_round: 2963e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_mul_sd_round: 2964e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_sub_sd_round: 2965e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_max_sd_round: 2966e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_mask_min_sd_round: 2967e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 2968e8d8bef9SDimitry Andric 2969e8d8bef9SDimitry Andric // If lowest element of a scalar op isn't used then use Arg0. 2970e8d8bef9SDimitry Andric if (!DemandedElts[0]) { 2971e8d8bef9SDimitry Andric IC.addToWorklist(&II); 2972e8d8bef9SDimitry Andric return II.getArgOperand(0); 2973e8d8bef9SDimitry Andric } 2974e8d8bef9SDimitry Andric 2975e8d8bef9SDimitry Andric // Only lower element is used for operand 1 and 2. 2976e8d8bef9SDimitry Andric DemandedElts = 1; 2977e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 2978e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3); 2979e8d8bef9SDimitry Andric 2980e8d8bef9SDimitry Andric // Lower element is undefined if all three lower elements are undefined. 2981e8d8bef9SDimitry Andric // Consider things like undef&0. The result is known zero, not undef. 2982e8d8bef9SDimitry Andric if (!UndefElts2[0] || !UndefElts3[0]) 2983e8d8bef9SDimitry Andric UndefElts.clearBit(0); 2984e8d8bef9SDimitry Andric break; 2985e8d8bef9SDimitry Andric 2986e8d8bef9SDimitry Andric // TODO: Add fmaddsub support? 2987e8d8bef9SDimitry Andric case Intrinsic::x86_sse3_addsub_pd: 2988e8d8bef9SDimitry Andric case Intrinsic::x86_sse3_addsub_ps: 2989e8d8bef9SDimitry Andric case Intrinsic::x86_avx_addsub_pd_256: 2990e8d8bef9SDimitry Andric case Intrinsic::x86_avx_addsub_ps_256: { 2991e8d8bef9SDimitry Andric // If none of the even or none of the odd lanes are required, turn this 2992e8d8bef9SDimitry Andric // into a generic FP math instruction. 2993e8d8bef9SDimitry Andric APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1)); 2994e8d8bef9SDimitry Andric APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2)); 2995e8d8bef9SDimitry Andric bool IsSubOnly = DemandedElts.isSubsetOf(SubMask); 2996e8d8bef9SDimitry Andric bool IsAddOnly = DemandedElts.isSubsetOf(AddMask); 2997e8d8bef9SDimitry Andric if (IsSubOnly || IsAddOnly) { 2998e8d8bef9SDimitry Andric assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only"); 2999e8d8bef9SDimitry Andric IRBuilderBase::InsertPointGuard Guard(IC.Builder); 3000e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(&II); 3001e8d8bef9SDimitry Andric Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1); 3002e8d8bef9SDimitry Andric return IC.Builder.CreateBinOp( 3003e8d8bef9SDimitry Andric IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1); 3004e8d8bef9SDimitry Andric } 3005e8d8bef9SDimitry Andric 3006e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 3007e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 3008e8d8bef9SDimitry Andric UndefElts &= UndefElts2; 3009e8d8bef9SDimitry Andric break; 3010e8d8bef9SDimitry Andric } 3011e8d8bef9SDimitry Andric 301281ad6265SDimitry Andric // General per-element vector operations. 301381ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_d: 301481ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_d_256: 301581ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_q: 301681ad6265SDimitry Andric case Intrinsic::x86_avx2_psllv_q_256: 301781ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_d: 301881ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_d_256: 301981ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_q: 302081ad6265SDimitry Andric case Intrinsic::x86_avx2_psrlv_q_256: 302181ad6265SDimitry Andric case Intrinsic::x86_avx2_psrav_d: 302281ad6265SDimitry Andric case Intrinsic::x86_avx2_psrav_d_256: { 302381ad6265SDimitry Andric simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); 302481ad6265SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); 302581ad6265SDimitry Andric UndefElts &= UndefElts2; 302681ad6265SDimitry Andric break; 302781ad6265SDimitry Andric } 302881ad6265SDimitry Andric 3029e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packssdw_128: 3030e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packsswb_128: 3031e8d8bef9SDimitry Andric case Intrinsic::x86_sse2_packuswb_128: 3032e8d8bef9SDimitry Andric case Intrinsic::x86_sse41_packusdw: 3033e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packssdw: 3034e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packsswb: 3035e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packusdw: 3036e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_packuswb: 3037e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packssdw_512: 3038e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packsswb_512: 3039e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packusdw_512: 3040e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_packuswb_512: { 3041e8d8bef9SDimitry Andric auto *Ty0 = II.getArgOperand(0)->getType(); 3042e8d8bef9SDimitry Andric unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements(); 3043e8d8bef9SDimitry Andric assert(VWidth == (InnerVWidth * 2) && "Unexpected input size"); 3044e8d8bef9SDimitry Andric 3045e8d8bef9SDimitry Andric unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128; 3046e8d8bef9SDimitry Andric unsigned VWidthPerLane = VWidth / NumLanes; 3047e8d8bef9SDimitry Andric unsigned InnerVWidthPerLane = InnerVWidth / NumLanes; 3048e8d8bef9SDimitry Andric 3049e8d8bef9SDimitry Andric // Per lane, pack the elements of the first input and then the second. 3050e8d8bef9SDimitry Andric // e.g. 3051e8d8bef9SDimitry Andric // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3]) 3052e8d8bef9SDimitry Andric // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15]) 3053e8d8bef9SDimitry Andric for (int OpNum = 0; OpNum != 2; ++OpNum) { 3054e8d8bef9SDimitry Andric APInt OpDemandedElts(InnerVWidth, 0); 3055e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 3056e8d8bef9SDimitry Andric unsigned LaneIdx = Lane * VWidthPerLane; 3057e8d8bef9SDimitry Andric for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) { 3058e8d8bef9SDimitry Andric unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum; 3059e8d8bef9SDimitry Andric if (DemandedElts[Idx]) 3060e8d8bef9SDimitry Andric OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt); 3061e8d8bef9SDimitry Andric } 3062e8d8bef9SDimitry Andric } 3063e8d8bef9SDimitry Andric 3064e8d8bef9SDimitry Andric // Demand elements from the operand. 3065e8d8bef9SDimitry Andric APInt OpUndefElts(InnerVWidth, 0); 3066e8d8bef9SDimitry Andric simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts); 3067e8d8bef9SDimitry Andric 3068e8d8bef9SDimitry Andric // Pack the operand's UNDEF elements, one lane at a time. 3069e8d8bef9SDimitry Andric OpUndefElts = OpUndefElts.zext(VWidth); 3070e8d8bef9SDimitry Andric for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { 3071e8d8bef9SDimitry Andric APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); 3072e8d8bef9SDimitry Andric LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); 3073e8d8bef9SDimitry Andric LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); 3074e8d8bef9SDimitry Andric UndefElts |= LaneElts; 3075e8d8bef9SDimitry Andric } 3076e8d8bef9SDimitry Andric } 3077e8d8bef9SDimitry Andric break; 3078e8d8bef9SDimitry Andric } 3079e8d8bef9SDimitry Andric 3080e8d8bef9SDimitry Andric // PSHUFB 3081e8d8bef9SDimitry Andric case Intrinsic::x86_ssse3_pshuf_b_128: 3082e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_pshuf_b: 3083e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_pshuf_b_512: 3084e8d8bef9SDimitry Andric // PERMILVAR 3085e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps: 3086e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_ps_256: 3087e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_ps_512: 3088e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd: 3089e8d8bef9SDimitry Andric case Intrinsic::x86_avx_vpermilvar_pd_256: 3090e8d8bef9SDimitry Andric case Intrinsic::x86_avx512_vpermilvar_pd_512: 3091e8d8bef9SDimitry Andric // PERMV 3092e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permd: 3093e8d8bef9SDimitry Andric case Intrinsic::x86_avx2_permps: { 3094e8d8bef9SDimitry Andric simplifyAndSetOp(&II, 1, DemandedElts, UndefElts); 3095e8d8bef9SDimitry Andric break; 3096e8d8bef9SDimitry Andric } 3097e8d8bef9SDimitry Andric 3098e8d8bef9SDimitry Andric // SSE4A instructions leave the upper 64-bits of the 128-bit result 3099e8d8bef9SDimitry Andric // in an undefined state. 3100e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrq: 3101e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_extrqi: 3102e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertq: 3103e8d8bef9SDimitry Andric case Intrinsic::x86_sse4a_insertqi: 3104e8d8bef9SDimitry Andric UndefElts.setHighBits(VWidth / 2); 3105e8d8bef9SDimitry Andric break; 3106e8d8bef9SDimitry Andric } 3107bdd1243dSDimitry Andric return std::nullopt; 3108e8d8bef9SDimitry Andric } 3109