X86ISelLowering.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching full:blend
1613       // when we have a 256bit-wide blend with immediate.  in X86TargetLowering()
3612 /// Return true if every element in Mask, is an in-place blend/select mask or is
5885       // loops converting between OR and BLEND shuffles due to  in getFauxShuffleMask()
6693   // See if this build_vector can be lowered as a blend with zero.  in LowerBuildVectorv4x32()
6713     // Let the shuffle legalizer deal with blend operations.  in LowerBuildVectorv4x32()
8091   // Convert to blend(fsub,fadd).  in lowerToAddSubOrFMAddSub()
8839   // and blend the FREEZE-UNDEF operands back in.  in LowerBUILD_VECTOR()
9459 /// that it is also not lane-crossing. It may however involve a blend from the
9958     // Don't bother if we can blend instead.  in matchShuffleWithUNPCK()
10004 // X86 has dedicated unpack instructions that can handle specific blend
10480 /// This handles cases where we can model a blend exactly as a bitmask due to
10515       return SDValue(); // Not a blend.  in lowerShuffleAsBitMask()
10533 /// Try to emit a blend instruction for a shuffle using bit math.
10535 /// This is used as a fallback approach when first class blend instructions are
10572   assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");  in matchShuffleAsBlend()
10580   // then ensure the blend mask part for that lane just references that input.  in matchShuffleAsBlend()
10584   // Attempt to generate the binary blend mask. If an input is zero then  in matchShuffleAsBlend()
10628     // If we only used V2 then splat the lane blend mask to avoid any demanded  in matchShuffleAsBlend()
10630     // blend mask bit).  in matchShuffleAsBlend()
10639 /// Try to emit a blend instruction for a shuffle.
10644 /// that the shuffle mask is a blend, or convertible into a blend with zero.
10695     // Use PBLENDW for lower/upper lanes and then blend lanes.  in lowerShuffleAsBlend()
10728     // If we have VPTERNLOG, we can use that as a bit blend.  in lowerShuffleAsBlend()
10734     // Scale the blend by the number of bytes per element.  in lowerShuffleAsBlend()
10737     // This form of blend is always done on bytes. Compute the byte vector  in lowerShuffleAsBlend()
10801 /// Try to lower as a blend of elements from two inputs followed by
10804 /// This matches the pattern where we can blend elements from two inputs and
10811   // We build up the blend mask while checking whether a blend is a viable way  in lowerShuffleAsBlendAndPermute()
10825       return SDValue(); // Can't blend in the needed input!  in lowerShuffleAsBlendAndPermute()
10830   // If only immediate blends, then bail if the blend mask can't be widened to  in lowerShuffleAsBlendAndPermute()
11149 /// Generic routine to decompose a shuffle and blend into independent
11153 /// shuffle+blend operations on newer X86 ISAs where we have very fast blend
11155 /// blends. For vXi8/vXi16 shuffles we may use unpack instead of blend.
11164   // unpack/blend them together.  in lowerShuffleAsDecomposedShuffleMerge()
11204   // Currently, we may need to produce one shuffle per input, and blend results.  in lowerShuffleAsDecomposedShuffleMerge()
11214   // Try to lower with the simpler initial blend/unpack/rotate strategies unless  in lowerShuffleAsDecomposedShuffleMerge()
11248   // If the final mask is an alternating blend of vXi8/vXi16, convert to an  in lowerShuffleAsDecomposedShuffleMerge()
12844   // blend patterns if a zero-blend above didn't work.  in lowerV2F64Shuffle()
12855     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,  in lowerV2F64Shuffle()  local
12857       return Blend;  in lowerV2F64Shuffle()
12928   // We have different paths for blend lowering, but they all must use the  in lowerV2I64Shuffle()
12932     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,  in lowerV2I64Shuffle()  local
12934       return Blend;  in lowerV2I64Shuffle()
12997       // To make this work, blend them together as the first step.  in lowerShuffleWithSHUFPS()
13003       // Now proceed to reconstruct the final blend as we have the necessary  in lowerShuffleWithSHUFPS()
13030       // trying to place elements directly, just blend them and set up the final  in lowerShuffleWithSHUFPS()
13033       // The first two blend mask elements are for V1, the second two are for  in lowerShuffleWithSHUFPS()
13043       // a blend.  in lowerShuffleWithSHUFPS()
13075     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,  in lowerV4F32Shuffle()  local
13077       return Blend;  in lowerV4F32Shuffle()
13169 /// blends we use the floating point domain blend instructions.
13239   // We have different paths for blend lowering, but they all must use the  in lowerV4I32Shuffle()
13243     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,  in lowerV4I32Shuffle()  local
13245       return Blend;  in lowerV4I32Shuffle()
13284   // We implement this with SHUFPS because it can blend from two vectors.  in lowerV4I32Shuffle()
13803 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
13804 /// blend if only one input is used.
13845   // If we need shuffled inputs from both, blend the two.  in lowerShuffleAsBlendOfPSHUFBs()
13865 /// the two inputs, try to interleave them. Otherwise, blend the low and high
13948   // We have different paths for blend lowering, but they all must use the  in lowerV8I16Shuffle()
13952     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,  in lowerV8I16Shuffle()  local
13954       return Blend;  in lowerV8I16Shuffle()
14057   // If we can't directly blend but can use PSHUFB, that will be better as it  in lowerV8I16Shuffle()
14058   // can both shuffle and set up the inefficient blend.  in lowerV8I16Shuffle()
14065   // We can always bit-blend if we have to so the fallback strategy is to  in lowerV8I16Shuffle()
14344     // If both V1 and V2 are in use and we can use a direct blend or an unpack,  in lowerV16I8Shuffle()
14349         if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask,  in lowerV16I8Shuffle()  local
14351           return Blend;  in lowerV16I8Shuffle()
14356       // the complexity of the shuffles goes away when we do the final blend as  in lowerV16I8Shuffle()
14392   if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG))  in lowerV16I8Shuffle()  local
14393     return Blend;  in lowerV16I8Shuffle()
14470     // This will be a single vector shuffle instead of a blend so nuke VHiHalf.  in lowerV16I8Shuffle()
14482     // VHiHalf so that we can blend them as i16s.  in lowerV16I8Shuffle()
14618     // manually combine these blend masks as much as possible so that we create  in splitAndLowerShuffle()
14634       // We only use half of V1 so map the usage down into the final blend mask.  in splitAndLowerShuffle()
14643       // We only use half of V2 so map the usage down into the final blend mask.  in splitAndLowerShuffle()
14661 /// blend/unpack.
14676   // If this can be modeled as a broadcast of two elements followed by a blend,  in lowerShuffleAsSplitOrBlend()
14714   // Otherwise, just fall back to decomposed shuffles and a blend/unpack. This  in lowerShuffleAsSplitOrBlend()
14988   if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,  in lowerV2X128Shuffle()  local
14990     return Blend;  in lowerV2X128Shuffle()
15883   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,  in lowerV4F64Shuffle()  local
15885     return Blend;  in lowerV4F64Shuffle()
15887   // Check if the blend happens to exactly fit that of SHUFPD.  in lowerV4F64Shuffle()
15898   // canonicalize to a blend of splat which isn't necessary for this combine.  in lowerV4F64Shuffle()
15906   // blend the result.  in lowerV4F64Shuffle()
15932   // If we have AVX2 then we always want to lower with a blend because an v4 we  in lowerV4F64Shuffle()
15960   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,  in lowerV4I64Shuffle()  local
15962     return Blend;  in lowerV4I64Shuffle()
16026   // blend the result.  in lowerV4I64Shuffle()
16051   // Otherwise fall back on generic blend lowering.  in lowerV4I64Shuffle()
16068   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,  in lowerV8F32Shuffle()  local
16070     return Blend;  in lowerV8F32Shuffle()
16167   // If we have AVX2 then we always want to lower with a blend because at v8 we  in lowerV8F32Shuffle()
16215   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,  in lowerV8I32Shuffle()  local
16217     return Blend;  in lowerV8I32Shuffle()
16315   // Otherwise fall back on generic blend lowering.  in lowerV8I32Shuffle()
16345   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,  in lowerV16I16Shuffle()  local
16347     return Blend;  in lowerV16I16Shuffle()
16468   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,  in lowerV32I8Shuffle()  local
16470     return Blend;  in lowerV32I8Shuffle()
16784   // Check if the blend happens to exactly fit that of SHUFPD.  in lowerV8F64Shuffle()
16793   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,  in lowerV8F64Shuffle()  local
16795     return Blend;  in lowerV8F64Shuffle()
16829     if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,  in lowerV16F32Shuffle()  local
16831       return Blend;  in lowerV16F32Shuffle()
16837   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,  in lowerV16F32Shuffle()  local
16839     return Blend;  in lowerV16F32Shuffle()
16933   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,  in lowerV8I64Shuffle()  local
16935     return Blend;  in lowerV8I64Shuffle()
17030   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,  in lowerV16I32Shuffle()  local
17032     return Blend;  in lowerV16I32Shuffle()
17090   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,  in lowerV32I16Shuffle()  local
17092     return Blend;  in lowerV32I16Shuffle()
17163   if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,  in lowerV64I8Shuffle()  local
17165     return Blend;  in lowerV64I8Shuffle()
17174     // If we can't directly blend but can use PSHUFB, that will be better as it  in lowerV64I8Shuffle()
17175     // can both shuffle and set up the inefficient blend.  in lowerV64I8Shuffle()
17699         // Choose indices that are blend-friendly.  in lowerVECTOR_SHUFFLE()
17798   // Try to lower this to a blend-style vector shuffle. This can handle all  in LowerVSELECT()
17822   // into an i1 condition so that we can use the mask-based 512-bit blend  in LowerVSELECT()
17863   // VSELECT-matching blend, return Op, and but if we need to expand, return  in LowerVSELECT()
18262     // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.  in LowerINSERT_VECTOR_ELT()
18274     // See if we can do this more efficiently with a blend shuffle with a  in LowerINSERT_VECTOR_ELT()
18291     // using a blend if we have AVX or AVX2 and the right data type.  in LowerINSERT_VECTOR_ELT()
18309     // then prefer the broadcast+blend sequence.  in LowerINSERT_VECTOR_ELT()
18389         // a vector, we prefer to generate a blend with immediate rather  in LowerINSERT_VECTOR_ELT()
24086       // case, so that sequence would be faster than a variable blend.  in LowerSELECT()
29559     // Only perform this blend if we can perform it without loading a mask.  in LowerShift()
29755         // to a masked blend which selects bytes based just on the sign bit  in LowerShift()
32042     // Emit a blend.  in LowerMLOAD()
38230   // Attempt to match against a OR if we're performing a blend shuffle and the  in matchBinaryShuffle()
40374 // Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
40380   assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");  in combineBlendOfPermutes()
40389   // to the same width as the blend mask.  in combineBlendOfPermutes()
40417   // Use the permute demanded elts masks as the new blend mask.  in combineBlendOfPermutes()
40418   // Create the new permute mask as a blend of the 2 original permute masks.  in combineBlendOfPermutes()
40440   assert(isBlendOrUndef(NewBlendMask) && "Bad blend");  in combineBlendOfPermutes()
40444   // the blend mask is the same in the 128-bit subvectors (or can widen to  in combineBlendOfPermutes()
41042       // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.  in combineTargetShuffle()
41043       // TODO: Handle MVT::v16i16 repeated blend mask.  in combineTargetShuffle()
41060       // blend(pshufb(x,m1),pshufb(y,m2))  in combineTargetShuffle()
41061       // --> m3 = blend(m1,m2)  in combineTargetShuffle()
41062       //     blend(pshufb(x,m3),pshufb(y,m3))  in combineTargetShuffle()
45664 /// this node with one of the variable blend instructions, restructure the
45686   // cases where a *dynamic* blend will fail even though a constant-condition  in combineVSelectToBLENDV()
45687   // blend could be custom lowered.  in combineVSelectToBLENDV()
45696   // rather than just the high bit and using an i8-element blend.  in combineVSelectToBLENDV()
45705   // There are no 512-bit blend instructions that use sign bits.  in combineVSelectToBLENDV()
51445     SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,  in combineMaskedLoadConstantMask()  local
51447     return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true);  in combineMaskedLoadConstantMask()
51468   SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,  in combineMaskedLoadConstantMask()  local
51471   return DCI.CombineTo(ML, Blend, NewML.getValue(1), true);  in combineMaskedLoadConstantMask()
51490       if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI))  in combineMaskedLoad()  local
51491         return Blend;  in combineMaskedLoad()
56616         // MVT::v16i16 has repeated blend mask.  in combineConcatVectorOps()