X86ISelLowering.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp

Lines Matching +full:high +full:- +full:vt
1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
71 #define DEBUG_TYPE "x86-isel"
74     "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
78         "alignment set by x86-experimental-pref-loop-alignment."),
82     "x86-br-merging-base-cost", cl::init(2),
88         "will be merged, and above which conditionals will be split. Set to -1 "
93     "x86-br-merging-ccmp-bias", cl::init(6),
94     cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target "
99     "x86-br-merging-likely-bias", cl::init(0),
100     cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
105              "the instruction cost threshold. Set to -1 to never merge likely "
110     "x86-br-merging-unlikely-bias", cl::init(-1),
112         "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
117         "the instruction cost threshold. Set to -1 to never merge unlikely "
122     "mul-constant-optimization", cl::init(true),
137   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.  in X86TargetLowering()
141   // default expansion to a no-op.  in X86TargetLowering()
144   // For 64-bit, since we have so many registers, use the ILP scheduler.  in X86TargetLowering()
145   // For 32-bit, use the register pressure specific scheduling.  in X86TargetLowering()
154   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());  in X86TargetLowering()
202   for (MVT VT : MVT::integer_valuetypes())  in X86TargetLowering()  local
203     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);  in X86TargetLowering()
216   for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {  in X86TargetLowering()
217     setCondCodeAction(ISD::SETOEQ, VT, Expand);  in X86TargetLowering()
218     setCondCodeAction(ISD::SETUNE, VT, Expand);  in X86TargetLowering()
264     // We have an algorithm for SSE2, and we turn this into a 64-bit  in X86TargetLowering()
268     // We have an algorithm for SSE2->double, and we turn this into a  in X86TargetLowering()
269     // 64-bit FILD followed by conditional FADD for other targets.  in X86TargetLowering()
284     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64  in X86TargetLowering()
298     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64  in X86TargetLowering()
330     for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {  in X86TargetLowering()
331       setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);  in X86TargetLowering()
332       setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);  in X86TargetLowering()
350       // Without SSE, i64->f64 goes through memory.  in X86TargetLowering()
358   // the two-result form to trivial CSE, which is able to combine x/y and x%y  in X86TargetLowering()
361   // Scalar integer multiply-high is also lowered to use two-result  in X86TargetLowering()
363   // (low) operations are left as Legal, as there are single-result  in X86TargetLowering()
364   // instructions for this in x86. Using the two-result multiply instructions  in X86TargetLowering()
365   // when both high and low results are needed must be arranged by dagcombine.  in X86TargetLowering()
366   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {  in X86TargetLowering()
367     setOperationAction(ISD::MULHS, VT, Expand);  in X86TargetLowering()
368     setOperationAction(ISD::MULHU, VT, Expand);  in X86TargetLowering()
369     setOperationAction(ISD::SDIV, VT, Expand);  in X86TargetLowering()
370     setOperationAction(ISD::UDIV, VT, Expand);  in X86TargetLowering()
371     setOperationAction(ISD::SREM, VT, Expand);  in X86TargetLowering()
372     setOperationAction(ISD::UREM, VT, Expand);  in X86TargetLowering()
377   for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,  in X86TargetLowering()
379     setOperationAction(ISD::BR_CC,     VT, Expand);  in X86TargetLowering()
380     setOperationAction(ISD::SELECT_CC, VT, Expand);  in X86TargetLowering()
426     for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {  in X86TargetLowering()
427       if (VT == MVT::i64 && !Subtarget.is64Bit())  in X86TargetLowering()
429       setOperationAction(ISD::CTLZ           , VT, Custom);  in X86TargetLowering()
430       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);  in X86TargetLowering()
436     // Special handling for half-precision floating point conversions.  in X86TargetLowering()
448   for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {  in X86TargetLowering()
449     setOperationAction(ISD::STRICT_FP_TO_BF16, VT, Expand);  in X86TargetLowering()
450     setOperationAction(ISD::STRICT_BF16_TO_FP, VT, Expand);  in X86TargetLowering()
453   for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {  in X86TargetLowering()
454     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);  in X86TargetLowering()
455     setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);  in X86TargetLowering()
456     setTruncStoreAction(VT, MVT::f16, Expand);  in X86TargetLowering()
457     setTruncStoreAction(VT, MVT::bf16, Expand);  in X86TargetLowering()
459     setOperationAction(ISD::BF16_TO_FP, VT, Expand);  in X86TargetLowering()
460     setOperationAction(ISD::FP_TO_BF16, VT, Custom);  in X86TargetLowering()
486   for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {  in X86TargetLowering()
487     setOperationAction(ISD::SELECT, VT, Custom);  in X86TargetLowering()
488     setOperationAction(ISD::SETCC, VT, Custom);  in X86TargetLowering()
489     setOperationAction(ISD::STRICT_FSETCC, VT, Custom);  in X86TargetLowering()
490     setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);  in X86TargetLowering()
492   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {  in X86TargetLowering()
493     if (VT == MVT::i64 && !Subtarget.is64Bit())  in X86TargetLowering()
495     setOperationAction(ISD::SELECT, VT, Custom);  in X86TargetLowering()
496     setOperationAction(ISD::SETCC,  VT, Custom);  in X86TargetLowering()
505   // LLVM/Clang supports zero-cost DWARF and SEH exception handling.  in X86TargetLowering()
513   for (auto VT : { MVT::i32, MVT::i64 }) {  in X86TargetLowering()
514     if (VT == MVT::i64 && !Subtarget.is64Bit())  in X86TargetLowering()
516     setOperationAction(ISD::ConstantPool    , VT, Custom);  in X86TargetLowering()
517     setOperationAction(ISD::JumpTable       , VT, Custom);  in X86TargetLowering()
518     setOperationAction(ISD::GlobalAddress   , VT, Custom);  in X86TargetLowering()
519     setOperationAction(ISD::GlobalTLSAddress, VT, Custom);  in X86TargetLowering()
520     setOperationAction(ISD::ExternalSymbol  , VT, Custom);  in X86TargetLowering()
521     setOperationAction(ISD::BlockAddress    , VT, Custom);  in X86TargetLowering()
524   // 64-bit shl, sra, srl (iff 32-bit x86)  in X86TargetLowering()
525   for (auto VT : { MVT::i32, MVT::i64 }) {  in X86TargetLowering()
526     if (VT == MVT::i64 && !Subtarget.is64Bit())  in X86TargetLowering()
528     setOperationAction(ISD::SHL_PARTS, VT, Custom);  in X86TargetLowering()
529     setOperationAction(ISD::SRA_PARTS, VT, Custom);  in X86TargetLowering()
530     setOperationAction(ISD::SRL_PARTS, VT, Custom);  in X86TargetLowering()
539   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {  in X86TargetLowering()
540     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);  in X86TargetLowering()
541     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);  in X86TargetLowering()
542     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);  in X86TargetLowering()
543     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);  in X86TargetLowering()
544     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);  in X86TargetLowering()
545     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);  in X86TargetLowering()
546     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);  in X86TargetLowering()
553     // All CPUs supporting AVX will atomically load/store aligned 128-bit  in X86TargetLowering()
562   // FIXME - use subtarget debug flags  in X86TargetLowering()
600   auto setF16Action = [&] (MVT VT, LegalizeAction Action) {  in X86TargetLowering()  argument
601     setOperationAction(ISD::FABS, VT, Action);  in X86TargetLowering()
602     setOperationAction(ISD::FNEG, VT, Action);  in X86TargetLowering()
603     setOperationAction(ISD::FCOPYSIGN, VT, Expand);  in X86TargetLowering()
604     setOperationAction(ISD::FREM, VT, Action);  in X86TargetLowering()
605     setOperationAction(ISD::FMA, VT, Action);  in X86TargetLowering()
606     setOperationAction(ISD::FMINNUM, VT, Action);  in X86TargetLowering()
607     setOperationAction(ISD::FMAXNUM, VT, Action);  in X86TargetLowering()
608     setOperationAction(ISD::FMINIMUM, VT, Action);  in X86TargetLowering()
609     setOperationAction(ISD::FMAXIMUM, VT, Action);  in X86TargetLowering()
610     setOperationAction(ISD::FSIN, VT, Action);  in X86TargetLowering()
611     setOperationAction(ISD::FCOS, VT, Action);  in X86TargetLowering()
612     setOperationAction(ISD::FSINCOS, VT, Action);  in X86TargetLowering()
613     setOperationAction(ISD::FTAN, VT, Action);  in X86TargetLowering()
614     setOperationAction(ISD::FSQRT, VT, Action);  in X86TargetLowering()
615     setOperationAction(ISD::FPOW, VT, Action);  in X86TargetLowering()
616     setOperationAction(ISD::FLOG, VT, Action);  in X86TargetLowering()
617     setOperationAction(ISD::FLOG2, VT, Action);  in X86TargetLowering()
618     setOperationAction(ISD::FLOG10, VT, Action);  in X86TargetLowering()
619     setOperationAction(ISD::FEXP, VT, Action);  in X86TargetLowering()
620     setOperationAction(ISD::FEXP2, VT, Action);  in X86TargetLowering()
621     setOperationAction(ISD::FEXP10, VT, Action);  in X86TargetLowering()
622     setOperationAction(ISD::FCEIL, VT, Action);  in X86TargetLowering()
623     setOperationAction(ISD::FFLOOR, VT, Action);  in X86TargetLowering()
624     setOperationAction(ISD::FNEARBYINT, VT, Action);  in X86TargetLowering()
625     setOperationAction(ISD::FRINT, VT, Action);  in X86TargetLowering()
626     setOperationAction(ISD::BR_CC, VT, Action);  in X86TargetLowering()
627     setOperationAction(ISD::SETCC, VT, Action);  in X86TargetLowering()
628     setOperationAction(ISD::SELECT, VT, Custom);  in X86TargetLowering()
629     setOperationAction(ISD::SELECT_CC, VT, Action);  in X86TargetLowering()
630     setOperationAction(ISD::FROUND, VT, Action);  in X86TargetLowering()
631     setOperationAction(ISD::FROUNDEVEN, VT, Action);  in X86TargetLowering()
632     setOperationAction(ISD::FTRUNC, VT, Action);  in X86TargetLowering()
633     setOperationAction(ISD::FLDEXP, VT, Action);  in X86TargetLowering()
646     // Disable f32->f64 extload as we can only generate this in one instruction  in X86TargetLowering()
649     // non-optsize case.  in X86TargetLowering()
652     for (auto VT : { MVT::f32, MVT::f64 }) {  in X86TargetLowering()
654       setOperationAction(ISD::FABS, VT, Custom);  in X86TargetLowering()
657       setOperationAction(ISD::FNEG, VT, Custom);  in X86TargetLowering()
660       setOperationAction(ISD::FCOPYSIGN, VT, Custom);  in X86TargetLowering()
663       setOperationAction(ISD::FADD, VT, Custom);  in X86TargetLowering()
664       setOperationAction(ISD::FSUB, VT, Custom);  in X86TargetLowering()
667       setOperationAction(ISD::FSIN   , VT, Expand);  in X86TargetLowering()
668       setOperationAction(ISD::FCOS   , VT, Expand);  in X86TargetLowering()
669       setOperationAction(ISD::FSINCOS, VT, Expand);  in X86TargetLowering()
758     for (auto VT : { MVT::f32, MVT::f64 }) {  in X86TargetLowering()
759       setOperationAction(ISD::UNDEF,     VT, Expand);  in X86TargetLowering()
760       setOperationAction(ISD::FCOPYSIGN, VT, Expand);  in X86TargetLowering()
763       setOperationAction(ISD::FSIN   , VT, Expand);  in X86TargetLowering()
764       setOperationAction(ISD::FCOS   , VT, Expand);  in X86TargetLowering()
765       setOperationAction(ISD::FSINCOS, VT, Expand);  in X86TargetLowering()
774       addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS  in X86TargetLowering()
775       addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS  in X86TargetLowering()
784       addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS  in X86TargetLowering()
785       addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS  in X86TargetLowering()
793   // Handle constrained floating-point operations of scalar.  in X86TargetLowering()
832     // clang-format off  in X86TargetLowering()
843     // clang-format on  in X86TargetLowering()
857     // Handle constrained floating-point operations of scalar.  in X86TargetLowering()
869     // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten  in X86TargetLowering()
896     // clang-format off  in X86TargetLowering()
904     // clang-format on  in X86TargetLowering()
953   for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,  in X86TargetLowering()
956     // clang-format off  in X86TargetLowering()
957     setOperationAction(ISD::FSIN,      VT, Expand);  in X86TargetLowering()
958     setOperationAction(ISD::FSINCOS,   VT, Expand);  in X86TargetLowering()
959     setOperationAction(ISD::FCOS,      VT, Expand);  in X86TargetLowering()
960     setOperationAction(ISD::FTAN,      VT, Expand);  in X86TargetLowering()
961     setOperationAction(ISD::FREM,      VT, Expand);  in X86TargetLowering()
962     setOperationAction(ISD::FCOPYSIGN, VT, Expand);  in X86TargetLowering()
963     setOperationAction(ISD::FPOW,      VT, Expand);  in X86TargetLowering()
964     setOperationAction(ISD::FLOG,      VT, Expand);  in X86TargetLowering()
965     setOperationAction(ISD::FLOG2,     VT, Expand);  in X86TargetLowering()
966     setOperationAction(ISD::FLOG10,    VT, Expand);  in X86TargetLowering()
967     setOperationAction(ISD::FEXP,      VT, Expand);  in X86TargetLowering()
968     setOperationAction(ISD::FEXP2,     VT, Expand);  in X86TargetLowering()
969     setOperationAction(ISD::FEXP10,    VT, Expand);  in X86TargetLowering()
970     // clang-format on  in X86TargetLowering()
976   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {  in X86TargetLowering()  local
977     setOperationAction(ISD::SDIV, VT, Expand);  in X86TargetLowering()
978     setOperationAction(ISD::UDIV, VT, Expand);  in X86TargetLowering()
979     setOperationAction(ISD::SREM, VT, Expand);  in X86TargetLowering()
980     setOperationAction(ISD::UREM, VT, Expand);  in X86TargetLowering()
981     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);  in X86TargetLowering()
982     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);  in X86TargetLowering()
983     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);  in X86TargetLowering()
984     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);  in X86TargetLowering()
985     setOperationAction(ISD::FMA,  VT, Expand);  in X86TargetLowering()
986     setOperationAction(ISD::FFLOOR, VT, Expand);  in X86TargetLowering()
987     setOperationAction(ISD::FCEIL, VT, Expand);  in X86TargetLowering()
988     setOperationAction(ISD::FTRUNC, VT, Expand);  in X86TargetLowering()
989     setOperationAction(ISD::FRINT, VT, Expand);  in X86TargetLowering()
990     setOperationAction(ISD::FNEARBYINT, VT, Expand);  in X86TargetLowering()
991     setOperationAction(ISD::FROUNDEVEN, VT, Expand);  in X86TargetLowering()
992     setOperationAction(ISD::SMUL_LOHI, VT, Expand);  in X86TargetLowering()
993     setOperationAction(ISD::MULHS, VT, Expand);  in X86TargetLowering()
994     setOperationAction(ISD::UMUL_LOHI, VT, Expand);  in X86TargetLowering()
995     setOperationAction(ISD::MULHU, VT, Expand);  in X86TargetLowering()
996     setOperationAction(ISD::SDIVREM, VT, Expand);  in X86TargetLowering()
997     setOperationAction(ISD::UDIVREM, VT, Expand);  in X86TargetLowering()
998     setOperationAction(ISD::CTPOP, VT, Expand);  in X86TargetLowering()
999     setOperationAction(ISD::CTTZ, VT, Expand);  in X86TargetLowering()
1000     setOperationAction(ISD::CTLZ, VT, Expand);  in X86TargetLowering()
1001     setOperationAction(ISD::ROTL, VT, Expand);  in X86TargetLowering()
1002     setOperationAction(ISD::ROTR, VT, Expand);  in X86TargetLowering()
1003     setOperationAction(ISD::BSWAP, VT, Expand);  in X86TargetLowering()
1004     setOperationAction(ISD::SETCC, VT, Expand);  in X86TargetLowering()
1005     setOperationAction(ISD::FP_TO_UINT, VT, Expand);  in X86TargetLowering()
1006     setOperationAction(ISD::FP_TO_SINT, VT, Expand);  in X86TargetLowering()
1007     setOperationAction(ISD::UINT_TO_FP, VT, Expand);  in X86TargetLowering()
1008     setOperationAction(ISD::SINT_TO_FP, VT, Expand);  in X86TargetLowering()
1009     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);  in X86TargetLowering()
1010     setOperationAction(ISD::TRUNCATE, VT, Expand);  in X86TargetLowering()
1011     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);  in X86TargetLowering()
1012     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);  in X86TargetLowering()
1013     setOperationAction(ISD::ANY_EXTEND, VT, Expand);  in X86TargetLowering()
1014     setOperationAction(ISD::SELECT_CC, VT, Expand);  in X86TargetLowering()
1016       setTruncStoreAction(InnerVT, VT, Expand);  in X86TargetLowering()
1018       setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);  in X86TargetLowering()
1019       setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);  in X86TargetLowering()
1021       // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like  in X86TargetLowering()
1025       if (VT.getVectorElementType() == MVT::i1)  in X86TargetLowering()
1026         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);  in X86TargetLowering()
1030       if (VT.getVectorElementType() == MVT::f16 ||  in X86TargetLowering()
1031           VT.getVectorElementType() == MVT::bf16)  in X86TargetLowering()
1032         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);  in X86TargetLowering()
1037   // with -msoft-float, disable use of MMX as well.  in X86TargetLowering()
1073     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM  in X86TargetLowering()
1086     for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {  in X86TargetLowering()
1087       setOperationAction(ISD::FMAXIMUM, VT, Custom);  in X86TargetLowering()
1088       setOperationAction(ISD::FMINIMUM, VT, Custom);  in X86TargetLowering()
1091     for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,  in X86TargetLowering()
1093       setOperationAction(ISD::SDIV, VT, Custom);  in X86TargetLowering()
1094       setOperationAction(ISD::SREM, VT, Custom);  in X86TargetLowering()
1095       setOperationAction(ISD::UDIV, VT, Custom);  in X86TargetLowering()
1096       setOperationAction(ISD::UREM, VT, Custom);  in X86TargetLowering()
1126     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {  in X86TargetLowering()
1127       setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);  in X86TargetLowering()
1128       setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);  in X86TargetLowering()
1129       setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);  in X86TargetLowering()
1130       setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);  in X86TargetLowering()
1149     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {  in X86TargetLowering()
1150       setOperationAction(ISD::SETCC, VT, Custom);  in X86TargetLowering()
1151       setOperationAction(ISD::CTPOP, VT, Custom);  in X86TargetLowering()
1152       setOperationAction(ISD::ABS, VT, Custom);  in X86TargetLowering()
1153       setOperationAction(ISD::ABDS, VT, Custom);  in X86TargetLowering()
1154       setOperationAction(ISD::ABDU, VT, Custom);  in X86TargetLowering()
1158       setCondCodeAction(ISD::SETLT, VT, Custom);  in X86TargetLowering()
1159       setCondCodeAction(ISD::SETLE, VT, Custom);  in X86TargetLowering()
1169     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {  in X86TargetLowering()
1170       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);  in X86TargetLowering()
1171       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
1172       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
1173       setOperationAction(ISD::VSELECT,            VT, Custom);  in X86TargetLowering()
1174       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
1177     for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) {  in X86TargetLowering()
1178       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
1179       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
1180       setOperationAction(ISD::VSELECT,            VT, Custom);  in X86TargetLowering()
1182       if (VT == MVT::v2i64 && !Subtarget.is64Bit())  in X86TargetLowering()
1185       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);  in X86TargetLowering()
1186       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
1213     for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {  in X86TargetLowering()
1214       setOperationAction(ISD::FP_TO_SINT,        VT, Custom);  in X86TargetLowering()
1215       setOperationAction(ISD::FP_TO_UINT,        VT, Custom);  in X86TargetLowering()
1216       setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);  in X86TargetLowering()
1217       setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);  in X86TargetLowering()
1243     // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for  in X86TargetLowering()
1252     // Add 32-bit vector stores to help vectorization opportunities.  in X86TargetLowering()
1287     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {  in X86TargetLowering()
1288       setOperationAction(ISD::SRL,              VT, Custom);  in X86TargetLowering()
1289       setOperationAction(ISD::SHL,              VT, Custom);  in X86TargetLowering()
1290       setOperationAction(ISD::SRA,              VT, Custom);  in X86TargetLowering()
1291       if (VT == MVT::v2i64) continue;  in X86TargetLowering()
1292       setOperationAction(ISD::ROTL,             VT, Custom);  in X86TargetLowering()
1293       setOperationAction(ISD::ROTR,             VT, Custom);  in X86TargetLowering()
1294       setOperationAction(ISD::FSHL,             VT, Custom);  in X86TargetLowering()
1295       setOperationAction(ISD::FSHR,             VT, Custom);  in X86TargetLowering()
1317     for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {  in X86TargetLowering()
1318       setOperationAction(ISD::BITREVERSE,       VT, Custom);  in X86TargetLowering()
1319       setOperationAction(ISD::CTLZ,             VT, Custom);  in X86TargetLowering()
1360     // FIXME: Do we need to handle scalar-to-vector here?  in X86TargetLowering()
1370     for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {  in X86TargetLowering()
1371       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);  in X86TargetLowering()
1372       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);  in X86TargetLowering()
1386       // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can  in X86TargetLowering()
1402     for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,  in X86TargetLowering()
1404       setOperationAction(ISD::ROTL, VT, Custom);  in X86TargetLowering()
1405       setOperationAction(ISD::ROTR, VT, Custom);  in X86TargetLowering()
1409     for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })  in X86TargetLowering()
1410       setOperationAction(ISD::BITREVERSE, VT, Custom);  in X86TargetLowering()
1431     for (auto VT : { MVT::v8f32, MVT::v4f64 }) {  in X86TargetLowering()
1432       setOperationAction(ISD::FFLOOR,            VT, Legal);  in X86TargetLowering()
1433       setOperationAction(ISD::STRICT_FFLOOR,     VT, Legal);  in X86TargetLowering()
1434       setOperationAction(ISD::FCEIL,             VT, Legal);  in X86TargetLowering()
1435       setOperationAction(ISD::STRICT_FCEIL,      VT, Legal);  in X86TargetLowering()
1436       setOperationAction(ISD::FTRUNC,            VT, Legal);  in X86TargetLowering()
1437       setOperationAction(ISD::STRICT_FTRUNC,     VT, Legal);  in X86TargetLowering()
1438       setOperationAction(ISD::FRINT,             VT, Legal);  in X86TargetLowering()
1439       setOperationAction(ISD::STRICT_FRINT,      VT, Legal);  in X86TargetLowering()
1440       setOperationAction(ISD::FNEARBYINT,        VT, Legal);  in X86TargetLowering()
1441       setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);  in X86TargetLowering()
1442       setOperationAction(ISD::FROUNDEVEN,        VT, Legal);  in X86TargetLowering()
1443       setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);  in X86TargetLowering()
1445       setOperationAction(ISD::FROUND,            VT, Custom);  in X86TargetLowering()
1447       setOperationAction(ISD::FNEG,              VT, Custom);  in X86TargetLowering()
1448       setOperationAction(ISD::FABS,              VT, Custom);  in X86TargetLowering()
1449       setOperationAction(ISD::FCOPYSIGN,         VT, Custom);  in X86TargetLowering()
1451       setOperationAction(ISD::FMAXIMUM,          VT, Custom);  in X86TargetLowering()
1452       setOperationAction(ISD::FMINIMUM,          VT, Custom);  in X86TargetLowering()
1492     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {  in X86TargetLowering()
1493       setOperationAction(ISD::SRL,             VT, Custom);  in X86TargetLowering()
1494       setOperationAction(ISD::SHL,             VT, Custom);  in X86TargetLowering()
1495       setOperationAction(ISD::SRA,             VT, Custom);  in X86TargetLowering()
1496       setOperationAction(ISD::ABDS,            VT, Custom);  in X86TargetLowering()
1497       setOperationAction(ISD::ABDU,            VT, Custom);  in X86TargetLowering()
1498       if (VT == MVT::v4i64) continue;  in X86TargetLowering()
1499       setOperationAction(ISD::ROTL,            VT, Custom);  in X86TargetLowering()
1500       setOperationAction(ISD::ROTR,            VT, Custom);  in X86TargetLowering()
1501       setOperationAction(ISD::FSHL,            VT, Custom);  in X86TargetLowering()
1502       setOperationAction(ISD::FSHR,            VT, Custom);  in X86TargetLowering()
1505     // These types need custom splitting if their input is a 128-bit vector.  in X86TargetLowering()
1519     for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {  in X86TargetLowering()
1520       setOperationAction(ISD::SIGN_EXTEND,     VT, Custom);  in X86TargetLowering()
1521       setOperationAction(ISD::ZERO_EXTEND,     VT, Custom);  in X86TargetLowering()
1522       setOperationAction(ISD::ANY_EXTEND,      VT, Custom);  in X86TargetLowering()
1530     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {  in X86TargetLowering()
1531       setOperationAction(ISD::SETCC,           VT, Custom);  in X86TargetLowering()
1532       setOperationAction(ISD::CTPOP,           VT, Custom);  in X86TargetLowering()
1533       setOperationAction(ISD::CTLZ,            VT, Custom);  in X86TargetLowering()
1534       setOperationAction(ISD::BITREVERSE,      VT, Custom);  in X86TargetLowering()
1538       setCondCodeAction(ISD::SETLT, VT, Custom);  in X86TargetLowering()
1539       setCondCodeAction(ISD::SETLE, VT, Custom);  in X86TargetLowering()
1550       for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,  in X86TargetLowering()
1552         setOperationAction(ISD::FMA, VT, Legal);  in X86TargetLowering()
1553         setOperationAction(ISD::STRICT_FMA, VT, Legal);  in X86TargetLowering()
1557     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {  in X86TargetLowering()
1558       setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1559       setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1598     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {  in X86TargetLowering()
1599       setOperationAction(ISD::ABS,  VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1600       setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1601       setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1602       setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1603       setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);  in X86TargetLowering()
1606     for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {  in X86TargetLowering()
1607       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);  in X86TargetLowering()
1608       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);  in X86TargetLowering()
1613       // when we have a 256bit-wide blend with immediate.  in X86TargetLowering()
1628     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,  in X86TargetLowering()
1630       setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);  in X86TargetLowering()
1631       setOperationAction(ISD::MSTORE, VT, Legal);  in X86TargetLowering()
1635     // (result) is 128-bit but the source is 256-bit wide.  in X86TargetLowering()
1636     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,  in X86TargetLowering()
1638       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);  in X86TargetLowering()
1641     // Custom lower several nodes for 256-bit types.  in X86TargetLowering()
1642     for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,  in X86TargetLowering()
1644       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
1645       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
1646       setOperationAction(ISD::VSELECT,            VT, Custom);  in X86TargetLowering()
1647       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);  in X86TargetLowering()
1648       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
1649       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);  in X86TargetLowering()
1650       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);  in X86TargetLowering()
1651       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);  in X86TargetLowering()
1652       setOperationAction(ISD::STORE,              VT, Custom);  in X86TargetLowering()
1670       for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,  in X86TargetLowering()
1672         setOperationAction(ISD::MGATHER,  VT, Custom);  in X86TargetLowering()
1678     for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) {  in X86TargetLowering()
1679       setOperationAction(ISD::FP_ROUND,           VT, Custom);  in X86TargetLowering()
1680       setOperationAction(ISD::STRICT_FP_ROUND,    VT, Custom);  in X86TargetLowering()
1682     for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) {  in X86TargetLowering()
1683       setOperationAction(ISD::FP_EXTEND,          VT, Custom);  in X86TargetLowering()
1684       setOperationAction(ISD::STRICT_FP_EXTEND,   VT, Custom);  in X86TargetLowering()
1693   // available with AVX512. 512-bit vectors are in a separate block controlled  in X86TargetLowering()
1719     // There is no byte sized k-register load or store without AVX512DQ.  in X86TargetLowering()
1732     // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.  in X86TargetLowering()
1733     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {  in X86TargetLowering()
1734       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);  in X86TargetLowering()
1735       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);  in X86TargetLowering()
1736       setOperationAction(ISD::ANY_EXTEND,  VT, Custom);  in X86TargetLowering()
1739     for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })  in X86TargetLowering()
1740       setOperationAction(ISD::VSELECT,          VT, Expand);  in X86TargetLowering()
1742     for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {  in X86TargetLowering()
1743       setOperationAction(ISD::SETCC,            VT, Custom);  in X86TargetLowering()
1744       setOperationAction(ISD::SELECT,           VT, Custom);  in X86TargetLowering()
1745       setOperationAction(ISD::TRUNCATE,         VT, Custom);  in X86TargetLowering()
1747       setOperationAction(ISD::BUILD_VECTOR,     VT, Custom);  in X86TargetLowering()
1748       setOperationAction(ISD::CONCAT_VECTORS,   VT, Custom);  in X86TargetLowering()
1749       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
1750       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);  in X86TargetLowering()
1751       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
1752       setOperationAction(ISD::VECTOR_SHUFFLE,   VT,  Custom);  in X86TargetLowering()
1755     for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })  in X86TargetLowering()
1756       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);  in X86TargetLowering()
1759     for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {  in X86TargetLowering()
1760       setOperationAction(ISD::LRINT, VT, Legal);  in X86TargetLowering()
1761       setOperationAction(ISD::LLRINT, VT, Legal);  in X86TargetLowering()
1765   // This block controls legalization for 512-bit operations with 8/16/32/64 bit  in X86TargetLowering()
1766   // elements. 512-bits can be disabled based on prefer-vector-width and  in X86TargetLowering()
1767   // required-vector-width function attributes.  in X86TargetLowering()
1789     for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {  in X86TargetLowering()
1790       setOperationAction(ISD::FMAXIMUM, VT, Custom);  in X86TargetLowering()
1791       setOperationAction(ISD::FMINIMUM, VT, Custom);  in X86TargetLowering()
1792       setOperationAction(ISD::FNEG,  VT, Custom);  in X86TargetLowering()
1793       setOperationAction(ISD::FABS,  VT, Custom);  in X86TargetLowering()
1794       setOperationAction(ISD::FMA,   VT, Legal);  in X86TargetLowering()
1795       setOperationAction(ISD::STRICT_FMA, VT, Legal);  in X86TargetLowering()
1796       setOperationAction(ISD::FCOPYSIGN, VT, Custom);  in X86TargetLowering()
1805     for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {  in X86TargetLowering()
1806       setOperationPromotedToType(ISD::FP_TO_SINT       , VT, MVT::v16i32);  in X86TargetLowering()
1807       setOperationPromotedToType(ISD::FP_TO_UINT       , VT, MVT::v16i32);  in X86TargetLowering()
1808       setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);  in X86TargetLowering()
1809       setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);  in X86TargetLowering()
1812     for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {  in X86TargetLowering()
1813       setOperationAction(ISD::FP_TO_SINT,        VT, Custom);  in X86TargetLowering()
1814       setOperationAction(ISD::FP_TO_UINT,        VT, Custom);  in X86TargetLowering()
1815       setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);  in X86TargetLowering()
1816       setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);  in X86TargetLowering()
1846     // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE  in X86TargetLowering()
1847     // to 512-bit rather than use the AVX2 instructions so that we can use  in X86TargetLowering()
1848     // k-masks.  in X86TargetLowering()
1850       for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,  in X86TargetLowering()
1852         setOperationAction(ISD::MLOAD,  VT, Custom);  in X86TargetLowering()
1853         setOperationAction(ISD::MSTORE, VT, Custom);  in X86TargetLowering()
1871       // Extends from v64i1 masks to 512-bit vectors.  in X86TargetLowering()
1877     for (auto VT : { MVT::v16f32, MVT::v8f64 }) {  in X86TargetLowering()
1878       setOperationAction(ISD::FFLOOR,            VT, Legal);  in X86TargetLowering()
1879       setOperationAction(ISD::STRICT_FFLOOR,     VT, Legal);  in X86TargetLowering()
1880       setOperationAction(ISD::FCEIL,             VT, Legal);  in X86TargetLowering()
1881       setOperationAction(ISD::STRICT_FCEIL,      VT, Legal);  in X86TargetLowering()
1882       setOperationAction(ISD::FTRUNC,            VT, Legal);  in X86TargetLowering()
1883       setOperationAction(ISD::STRICT_FTRUNC,     VT, Legal);  in X86TargetLowering()
1884       setOperationAction(ISD::FRINT,             VT, Legal);  in X86TargetLowering()
1885       setOperationAction(ISD::STRICT_FRINT,      VT, Legal);  in X86TargetLowering()
1886       setOperationAction(ISD::FNEARBYINT,        VT, Legal);  in X86TargetLowering()
1887       setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);  in X86TargetLowering()
1888       setOperationAction(ISD::FROUNDEVEN,        VT, Legal);  in X86TargetLowering()
1889       setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);  in X86TargetLowering()
1891       setOperationAction(ISD::FROUND,            VT, Custom);  in X86TargetLowering()
1894     for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {  in X86TargetLowering()
1895       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);  in X86TargetLowering()
1896       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);  in X86TargetLowering()
1921     for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {  in X86TargetLowering()
1922       setOperationAction(ISD::SRL,              VT, Custom);  in X86TargetLowering()
1923       setOperationAction(ISD::SHL,              VT, Custom);  in X86TargetLowering()
1924       setOperationAction(ISD::SRA,              VT, Custom);  in X86TargetLowering()
1925       setOperationAction(ISD::ROTL,             VT, Custom);  in X86TargetLowering()
1926       setOperationAction(ISD::ROTR,             VT, Custom);  in X86TargetLowering()
1927       setOperationAction(ISD::SETCC,            VT, Custom);  in X86TargetLowering()
1928       setOperationAction(ISD::ABDS,             VT, Custom);  in X86TargetLowering()
1929       setOperationAction(ISD::ABDU,             VT, Custom);  in X86TargetLowering()
1930       setOperationAction(ISD::BITREVERSE,       VT, Custom);  in X86TargetLowering()
1934       setCondCodeAction(ISD::SETLT, VT, Custom);  in X86TargetLowering()
1935       setCondCodeAction(ISD::SETLE, VT, Custom);  in X86TargetLowering()
1945     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {  in X86TargetLowering()
1946       setOperationAction(ISD::SMAX,             VT, Legal);  in X86TargetLowering()
1947       setOperationAction(ISD::UMAX,             VT, Legal);  in X86TargetLowering()
1948       setOperationAction(ISD::SMIN,             VT, Legal);  in X86TargetLowering()
1949       setOperationAction(ISD::UMIN,             VT, Legal);  in X86TargetLowering()
1950       setOperationAction(ISD::ABS,              VT, Legal);  in X86TargetLowering()
1951       setOperationAction(ISD::CTPOP,            VT, Custom);  in X86TargetLowering()
1954     for (auto VT : { MVT::v64i8, MVT::v32i16 }) {  in X86TargetLowering()
1955       setOperationAction(ISD::ABS,     VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1956       setOperationAction(ISD::CTPOP,   VT, Subtarget.hasBITALG() ? Legal : Custom);  in X86TargetLowering()
1957       setOperationAction(ISD::CTLZ,    VT, Custom);  in X86TargetLowering()
1958       setOperationAction(ISD::SMAX,    VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1959       setOperationAction(ISD::UMAX,    VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1960       setOperationAction(ISD::SMIN,    VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1961       setOperationAction(ISD::UMIN,    VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1962       setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1963       setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1964       setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1965       setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);  in X86TargetLowering()
1984       // NonVLX sub-targets extend 128/256 vectors to use the 512 version.  in X86TargetLowering()
1985       for (auto VT : { MVT::v16i32, MVT::v8i64} ) {  in X86TargetLowering()
1986         setOperationAction(ISD::CTLZ,            VT, Legal);  in X86TargetLowering()
1991       for (auto VT : { MVT::v16i32, MVT::v8i64 })  in X86TargetLowering()
1992         setOperationAction(ISD::CTPOP, VT, Legal);  in X86TargetLowering()
1996     // (result) is 256-bit but the source is 512-bit wide.  in X86TargetLowering()
1997     // 128-bit was made Legal under AVX1.  in X86TargetLowering()
1998     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,  in X86TargetLowering()
2000       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);  in X86TargetLowering()
2002     for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,  in X86TargetLowering()
2004       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);  in X86TargetLowering()
2005       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Legal);  in X86TargetLowering()
2006       setOperationAction(ISD::SELECT,             VT, Custom);  in X86TargetLowering()
2007       setOperationAction(ISD::VSELECT,            VT, Custom);  in X86TargetLowering()
2008       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
2009       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
2010       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
2011       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);  in X86TargetLowering()
2012       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);  in X86TargetLowering()
2022     for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {  in X86TargetLowering()
2023       setOperationAction(ISD::MLOAD,               VT, Legal);  in X86TargetLowering()
2024       setOperationAction(ISD::MSTORE,              VT, Legal);  in X86TargetLowering()
2025       setOperationAction(ISD::MGATHER,             VT, Custom);  in X86TargetLowering()
2026       setOperationAction(ISD::MSCATTER,            VT, Custom);  in X86TargetLowering()
2029       for (auto VT : { MVT::v64i8, MVT::v32i16 }) {  in X86TargetLowering()
2030         setOperationAction(ISD::MLOAD,        VT, Legal);  in X86TargetLowering()
2031         setOperationAction(ISD::MSTORE,       VT, Legal);  in X86TargetLowering()
2039       for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {  in X86TargetLowering()
2040         setOperationAction(ISD::FSHL, VT, Custom);  in X86TargetLowering()
2041         setOperationAction(ISD::FSHR, VT, Custom);  in X86TargetLowering()
2054     for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32,  in X86TargetLowering()
2056       setOperationAction(ISD::FSHL, VT, Custom);  in X86TargetLowering()
2057       setOperationAction(ISD::FSHR, VT, Custom);  in X86TargetLowering()
2062   // pre-AVX512 equivalents. Without VLX we use 512-bit operations for  in X86TargetLowering()
2065     // These operations are handled on non-VLX by artificially widening in  in X86TargetLowering()
2085     for (auto VT : { MVT::v2i64, MVT::v4i64 }) {  in X86TargetLowering()
2086       setOperationAction(ISD::SMAX, VT, Legal);  in X86TargetLowering()
2087       setOperationAction(ISD::UMAX, VT, Legal);  in X86TargetLowering()
2088       setOperationAction(ISD::SMIN, VT, Legal);  in X86TargetLowering()
2089       setOperationAction(ISD::UMIN, VT, Legal);  in X86TargetLowering()
2090       setOperationAction(ISD::ABS,  VT, Legal);  in X86TargetLowering()
2093     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {  in X86TargetLowering()
2094       setOperationAction(ISD::ROTL,     VT, Custom);  in X86TargetLowering()
2095       setOperationAction(ISD::ROTR,     VT, Custom);  in X86TargetLowering()
2102     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,  in X86TargetLowering()
2104       setOperationAction(ISD::MSCATTER, VT, Custom);  in X86TargetLowering()
2118       for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {  in X86TargetLowering()
2119         setOperationAction(ISD::CTLZ,            VT, Legal);  in X86TargetLowering()
2124       for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })  in X86TargetLowering()
2125         setOperationAction(ISD::CTPOP, VT, Legal);  in X86TargetLowering()
2135     for (auto VT : { MVT::v32i1, MVT::v64i1 }) {  in X86TargetLowering()
2136       setOperationAction(ISD::VSELECT,            VT, Expand);  in X86TargetLowering()
2137       setOperationAction(ISD::TRUNCATE,           VT, Custom);  in X86TargetLowering()
2138       setOperationAction(ISD::SETCC,              VT, Custom);  in X86TargetLowering()
2139       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
2140       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);  in X86TargetLowering()
2141       setOperationAction(ISD::SELECT,             VT, Custom);  in X86TargetLowering()
2142       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
2143       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
2144       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);  in X86TargetLowering()
2145       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);  in X86TargetLowering()
2148     for (auto VT : { MVT::v16i1, MVT::v32i1 })  in X86TargetLowering()
2149       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);  in X86TargetLowering()
2151     // Extends from v32i1 masks to 256-bit vectors.  in X86TargetLowering()
2156     for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {  in X86TargetLowering()
2157       setOperationAction(ISD::MLOAD,  VT, Subtarget.hasVLX() ? Legal : Custom);  in X86TargetLowering()
2158       setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);  in X86TargetLowering()
2161     // These operations are handled on non-VLX by artificially widening in  in X86TargetLowering()
2163     // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?  in X86TargetLowering()
2166       for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })  in X86TargetLowering()
2167         setOperationAction(ISD::CTPOP, VT, Legal);  in X86TargetLowering()
2172     auto setGroup = [&] (MVT VT) {  in X86TargetLowering()  argument
2173       setOperationAction(ISD::FADD,               VT, Legal);  in X86TargetLowering()
2174       setOperationAction(ISD::STRICT_FADD,        VT, Legal);  in X86TargetLowering()
2175       setOperationAction(ISD::FSUB,               VT, Legal);  in X86TargetLowering()
2176       setOperationAction(ISD::STRICT_FSUB,        VT, Legal);  in X86TargetLowering()
2177       setOperationAction(ISD::FMUL,               VT, Legal);  in X86TargetLowering()
2178       setOperationAction(ISD::STRICT_FMUL,        VT, Legal);  in X86TargetLowering()
2179       setOperationAction(ISD::FDIV,               VT, Legal);  in X86TargetLowering()
2180       setOperationAction(ISD::STRICT_FDIV,        VT, Legal);  in X86TargetLowering()
2181       setOperationAction(ISD::FSQRT,              VT, Legal);  in X86TargetLowering()
2182       setOperationAction(ISD::STRICT_FSQRT,       VT, Legal);  in X86TargetLowering()
2184       setOperationAction(ISD::FFLOOR,             VT, Legal);  in X86TargetLowering()
2185       setOperationAction(ISD::STRICT_FFLOOR,      VT, Legal);  in X86TargetLowering()
2186       setOperationAction(ISD::FCEIL,              VT, Legal);  in X86TargetLowering()
2187       setOperationAction(ISD::STRICT_FCEIL,       VT, Legal);  in X86TargetLowering()
2188       setOperationAction(ISD::FTRUNC,             VT, Legal);  in X86TargetLowering()
2189       setOperationAction(ISD::STRICT_FTRUNC,      VT, Legal);  in X86TargetLowering()
2190       setOperationAction(ISD::FRINT,              VT, Legal);  in X86TargetLowering()
2191       setOperationAction(ISD::STRICT_FRINT,       VT, Legal);  in X86TargetLowering()
2192       setOperationAction(ISD::FNEARBYINT,         VT, Legal);  in X86TargetLowering()
2193       setOperationAction(ISD::STRICT_FNEARBYINT,  VT, Legal);  in X86TargetLowering()
2194       setOperationAction(ISD::FROUNDEVEN, VT, Legal);  in X86TargetLowering()
2195       setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);  in X86TargetLowering()
2197       setOperationAction(ISD::FROUND,             VT, Custom);  in X86TargetLowering()
2199       setOperationAction(ISD::LOAD,               VT, Legal);  in X86TargetLowering()
2200       setOperationAction(ISD::STORE,              VT, Legal);  in X86TargetLowering()
2202       setOperationAction(ISD::FMA,                VT, Legal);  in X86TargetLowering()
2203       setOperationAction(ISD::STRICT_FMA,         VT, Legal);  in X86TargetLowering()
2204       setOperationAction(ISD::VSELECT,            VT, Legal);  in X86TargetLowering()
2205       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);  in X86TargetLowering()
2206       setOperationAction(ISD::SELECT,             VT, Custom);  in X86TargetLowering()
2208       setOperationAction(ISD::FNEG,               VT, Custom);  in X86TargetLowering()
2209       setOperationAction(ISD::FABS,               VT, Custom);  in X86TargetLowering()
2210       setOperationAction(ISD::FCOPYSIGN,          VT, Custom);  in X86TargetLowering()
2211       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);  in X86TargetLowering()
2212       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);  in X86TargetLowering()
2214       setOperationAction(ISD::SETCC,              VT, Custom);  in X86TargetLowering()
2215       setOperationAction(ISD::STRICT_FSETCC,      VT, Custom);  in X86TargetLowering()
2216       setOperationAction(ISD::STRICT_FSETCCS,     VT, Custom);  in X86TargetLowering()
2334     for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {  in X86TargetLowering()
2335       setF16Action(VT, Expand);  in X86TargetLowering()
2336       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);  in X86TargetLowering()
2337       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);  in X86TargetLowering()
2338       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);  in X86TargetLowering()
2339       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);  in X86TargetLowering()
2380       // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64  in X86TargetLowering()
2389       // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16  in X86TargetLowering()
2398       // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16  in X86TargetLowering()
2403       // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32  in X86TargetLowering()
2423   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't  in X86TargetLowering()
2427   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better  in X86TargetLowering()
2428   // than generic legalization for 64-bit multiplication-with-overflow, though.  in X86TargetLowering()
2429   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {  in X86TargetLowering()
2430     if (VT == MVT::i64 && !Subtarget.is64Bit())  in X86TargetLowering()
2433     setOperationAction(ISD::SADDO, VT, Custom);  in X86TargetLowering()
2434     setOperationAction(ISD::UADDO, VT, Custom);  in X86TargetLowering()
2435     setOperationAction(ISD::SSUBO, VT, Custom);  in X86TargetLowering()
2436     setOperationAction(ISD::USUBO, VT, Custom);  in X86TargetLowering()
2437     setOperationAction(ISD::SMULO, VT, Custom);  in X86TargetLowering()
2438     setOperationAction(ISD::UMULO, VT, Custom);  in X86TargetLowering()
2441     setOperationAction(ISD::UADDO_CARRY, VT, Custom);  in X86TargetLowering()
2442     setOperationAction(ISD::USUBO_CARRY, VT, Custom);  in X86TargetLowering()
2443     setOperationAction(ISD::SETCCCARRY, VT, Custom);  in X86TargetLowering()
2444     setOperationAction(ISD::SADDO_CARRY, VT, Custom);  in X86TargetLowering()
2445     setOperationAction(ISD::SSUBO_CARRY, VT, Custom);  in X86TargetLowering()
2470   // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`  in X86TargetLowering()
2471   // is. We should promote the value to 64-bits to solve this.  in X86TargetLowering()
2472   // This is what the CRT headers do - `fmodf` is an inline header  in X86TargetLowering()
2476     // clang-format off  in X86TargetLowering()
2496   // clang-format on  in X86TargetLowering()
2498   // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined.  MinGW has  in X86TargetLowering()
2506   // We have target-specific dag combine patterns for the following nodes:  in X86TargetLowering()
2567   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores  in X86TargetLowering()
2569   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores  in X86TargetLowering()
2571   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores  in X86TargetLowering()
2580   // Default loop alignment, which can be overridden by -align-loops.  in X86TargetLowering()
2583   // An out-of-order CPU can speculatively execute past a predictable branch,  in X86TargetLowering()
2591   // Default to having -disable-strictnode-mutation on  in X86TargetLowering()
2595 // This has so far only been implemented for 64-bit MachO.
2614 X86TargetLowering::getPreferredVectorAction(MVT VT) const {  in getPreferredVectorAction()
2615   if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&  in getPreferredVectorAction()
2619   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&  in getPreferredVectorAction()
2620       !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16)  in getPreferredVectorAction()
2623   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&  in getPreferredVectorAction()
2624       VT.getVectorElementType() != MVT::i1)  in getPreferredVectorAction()
2627   return TargetLoweringBase::getPreferredVectorAction(VT);  in getPreferredVectorAction()
2636 //===----------------------------------------------------------------------===//
2638 //===----------------------------------------------------------------------===//
2650       Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16))  in mayFoldLoad()
2653   // TODO: If this is a non-temporal load and the target has an instruction  in mayFoldLoad()
2666   // We can not replace a wide volatile load with a broadcast-from-memory,  in mayFoldLoadIntoBroadcastFromMem()
2669   return !Ld->isVolatile() ||  in mayFoldLoadIntoBroadcastFromMem()
2670          Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();  in mayFoldLoadIntoBroadcastFromMem()
2674   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());  in mayFoldIntoStore()
2679     unsigned Opcode = Op.getNode()->use_begin()->getOpcode();  in mayFoldIntoZeroExtend()
2754   int ReturnAddrIndex = FuncInfo->getRAIndex();  in getReturnAddressFrameIndex()
2758     unsigned SlotSize = RegInfo->getSlotSize();  in getReturnAddressFrameIndex()
2760                                                           -(int64_t)SlotSize,  in getReturnAddressFrameIndex()
2762     FuncInfo->setRAIndex(ReturnAddrIndex);  in getReturnAddressFrameIndex()
2774   // If we don't have a symbolic displacement - we don't have any extra  in isOffsetSuitableForCodeModel()
2780   // 64-bit offsets.  in isOffsetSuitableForCodeModel()
2790   // For other non-large code models we assume that latest small object is 16MB  in isOffsetSuitableForCodeModel()
2819   // clang-format off  in TranslateIntegerX86CC()
2831   // clang-format on  in TranslateIntegerX86CC()
2835 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific
2843       if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {  in TranslateX86CC()
2844         // X > -1   -> X == 0, jump !sign.  in TranslateX86CC()
2848       if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {  in TranslateX86CC()
2849         // X < 0   -> X == 0, jump on sign.  in TranslateX86CC()
2852       if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {  in TranslateX86CC()
2853         // X >= 0   -> X == 0, jump on !sign.  in TranslateX86CC()
2856       if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {  in TranslateX86CC()
2857         // X < 1   -> X <= 0  in TranslateX86CC()
2892   // clang-format off  in TranslateX86CC()
2893   default: llvm_unreachable("Condcode should be pre-legalized away");  in TranslateX86CC()
2914   // clang-format on  in TranslateX86CC()
2937 static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {  in useVPTERNLOG()  argument
2939          VT.is512BitVector();  in useVPTERNLOG()
2956       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);  in getTgtMemIntrinsic()
2964       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);  in getTgtMemIntrinsic()
2972       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);  in getTgtMemIntrinsic()
2980       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);  in getTgtMemIntrinsic()
2991       unsigned Size = I.getType()->getScalarSizeInBits();  in getTgtMemIntrinsic()
2992       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);  in getTgtMemIntrinsic()
3003       unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();  in getTgtMemIntrinsic()
3004       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);  in getTgtMemIntrinsic()
3025       unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits();  in getTgtMemIntrinsic()
3026       Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size);  in getTgtMemIntrinsic()
3036   switch (IntrData->Type) {  in getTgtMemIntrinsic()
3042     MVT VT  = MVT::getVT(I.getArgOperand(1)->getType());  in getTgtMemIntrinsic()  local
3044     if (IntrData->Type == TRUNCATE_TO_MEM_VI8)  in getTgtMemIntrinsic()
3046     else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)  in getTgtMemIntrinsic()
3048     else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)  in getTgtMemIntrinsic()
3051     Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());  in getTgtMemIntrinsic()
3061     MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());  in getTgtMemIntrinsic()
3072     MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());  in getTgtMemIntrinsic()
3073     MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());  in getTgtMemIntrinsic()
3091 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,  in isFPImmLegal()  argument
3102   assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");  in shouldReduceLoadWidth()
3104   // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF  in shouldReduceLoadWidth()
3106   SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();  in shouldReduceLoadWidth()
3109       return GA->getTargetFlags() != X86II::MO_GOTTPOFF;  in shouldReduceLoadWidth()
3113   // can be store-folded. Therefore, it's probably not worth splitting the load.  in shouldReduceLoadWidth()
3114   EVT VT = Load->getValueType(0);  in shouldReduceLoadWidth()  local
3115   if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {  in shouldReduceLoadWidth()
3116     for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {  in shouldReduceLoadWidth()
3122       if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||  in shouldReduceLoadWidth()
3123           UI->use_begin()->getOpcode() != ISD::STORE)  in shouldReduceLoadWidth()
3126     // All non-chain uses are extract + store.  in shouldReduceLoadWidth()
3137   assert(Ty->isIntegerTy());  in shouldConvertConstantLoadToIntImm()
3139   unsigned BitSize = Ty->getPrimitiveSizeInBits();  in shouldConvertConstantLoadToIntImm()
3147   // a floating-point compare and we have blendv or conditional move, then it is  in reduceSelectOfFPConstantLoads()
3148   // cheaper to select instead of doing a cross-register move and creating a  in reduceSelectOfFPConstantLoads()
3154 bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {  in convertSelectOfConstantsToMath()
3157   if (VT.isVector() && Subtarget.hasAVX512())  in convertSelectOfConstantsToMath()
3163 bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,  in decomposeMulByConstant()  argument
3175   // through type legalization on 32-bit targets so we would need to special  in decomposeMulByConstant()
3177   while (getTypeAction(Context, VT) != TypeLegal)  in decomposeMulByConstant()
3178     VT = getTypeToTransformTo(Context, VT);  in decomposeMulByConstant()
3182   // most implementations, sub-vXi32 vector multiplies are always fast,  in decomposeMulByConstant()
3185   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in decomposeMulByConstant()
3186   if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&  in decomposeMulByConstant()
3191   return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||  in decomposeMulByConstant()
3192          (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();  in decomposeMulByConstant()
3213   // TODO - do we have any exceptions?  in shouldScalarizeBinop()
3228 bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,  in shouldFormOverflowOp()  argument
3231   if (VT.isVector())  in shouldFormOverflowOp()
3233   return VT.isSimple() || !isOperationExpand(Opcode, VT);  in shouldFormOverflowOp()
3239          (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);  in isCheapToSpeculateCttz()
3247 bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {  in ShouldShrinkFPConstant()
3251   return !Subtarget.hasSSE2() || VT == MVT::f80;  in ShouldShrinkFPConstant()
3254 bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {  in isScalarFPTypeInSSEReg()
3255   return (VT == MVT::f64 && Subtarget.hasSSE2()) ||  in isScalarFPTypeInSSEReg()
3256          (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16;  in isScalarFPTypeInSSEReg()
3305   EVT VT = Y.getValueType();  in hasAndNotCompare()  local
3307   if (VT.isVector())  in hasAndNotCompare()
3313   // There are only 32-bit and 64-bit forms for 'andn'.  in hasAndNotCompare()
3314   if (VT != MVT::i32 && VT != MVT::i64)  in hasAndNotCompare()
3317   return !isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque();  in hasAndNotCompare()
3321   EVT VT = Y.getValueType();  in hasAndNot()  local
3323   if (!VT.isVector())  in hasAndNot()
3328   if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)  in hasAndNot()
3331   if (VT == MVT::v4i32)  in hasAndNot()
3360   // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.  in shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd()
3365     EVT VT, unsigned ShiftOpc, bool MayTransformRotate,  in preferedOpcodeForCmpEqPiecesOfOperand()  argument
3367   if (!VT.isInteger())  in preferedOpcodeForCmpEqPiecesOfOperand()
3371   if (VT.isVector()) {  in preferedOpcodeForCmpEqPiecesOfOperand()
3374     PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 ||  in preferedOpcodeForCmpEqPiecesOfOperand()
3375                                              VT.getScalarType() == MVT::i64);  in preferedOpcodeForCmpEqPiecesOfOperand()
3382           VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue();  in preferedOpcodeForCmpEqPiecesOfOperand()
3396     if (VT.isVector())  in preferedOpcodeForCmpEqPiecesOfOperand()
3402       // at least imm32 mask (or be zext i32 -> i64).  in preferedOpcodeForCmpEqPiecesOfOperand()
3403       if (VT == MVT::i64)  in preferedOpcodeForCmpEqPiecesOfOperand()
3404         return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL  in preferedOpcodeForCmpEqPiecesOfOperand()
3407       // We can only benefit if req at least 7-bit for the mask. We  in preferedOpcodeForCmpEqPiecesOfOperand()
3413     if (VT == MVT::i64)  in preferedOpcodeForCmpEqPiecesOfOperand()
3414       // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is  in preferedOpcodeForCmpEqPiecesOfOperand()
3416       return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc;  in preferedOpcodeForCmpEqPiecesOfOperand()
3424   if (PreferRotate || !MayTransformRotate || VT.isVector())  in preferedOpcodeForCmpEqPiecesOfOperand()
3427   // Non-vector type and we have a zext mask with SRL.  in preferedOpcodeForCmpEqPiecesOfOperand()
3453   return N->getOpcode() != ISD::FP_EXTEND;  in preferScalarizeSplat()
3458   assert(((N->getOpcode() == ISD::SHL &&  in shouldFoldConstantShiftPairToMask()
3459            N->getOperand(0).getOpcode() == ISD::SRL) ||  in shouldFoldConstantShiftPairToMask()
3460           (N->getOpcode() == ISD::SRL &&  in shouldFoldConstantShiftPairToMask()
3461            N->getOperand(0).getOpcode() == ISD::SHL)) &&  in shouldFoldConstantShiftPairToMask()
3462          "Expected shift-shift mask");  in shouldFoldConstantShiftPairToMask()
3464   EVT VT = N->getValueType(0);  in shouldFoldConstantShiftPairToMask()  local
3465   if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||  in shouldFoldConstantShiftPairToMask()
3466       (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {  in shouldFoldConstantShiftPairToMask()
3467     // Only fold if the shift values are equal - so it folds to AND.  in shouldFoldConstantShiftPairToMask()
3468     // TODO - we should fold if either is a non-uniform vector but we don't do  in shouldFoldConstantShiftPairToMask()
3469     // the fold for non-splats yet.  in shouldFoldConstantShiftPairToMask()
3470     return N->getOperand(1) == N->getOperand(0).getOperand(1);  in shouldFoldConstantShiftPairToMask()
3476   EVT VT = Y.getValueType();  in shouldFoldMaskToVariableShiftPair()  local
3479   if (VT.isVector())  in shouldFoldMaskToVariableShiftPair()
3482   // 64-bit shifts on 32-bit targets produce really bad bloated code.  in shouldFoldMaskToVariableShiftPair()
3483   if (VT == MVT::i64 && !Subtarget.is64Bit())  in shouldFoldMaskToVariableShiftPair()
3499 bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {  in shouldSplatInsEltVarIndex()
3502   return isTypeLegal(VT);  in shouldSplatInsEltVarIndex()
3506   MVT VT = MVT::getIntegerVT(NumBits);  in hasFastEqualityCompare()  local
3507   if (isTypeLegal(VT))  in hasFastEqualityCompare()
3508     return VT;  in hasFastEqualityCompare()
3518   // TODO: Allow 64-bit type for 32-bit target.  in hasFastEqualityCompare()
3519   // TODO: 512-bit types should be allowed, but make sure that those  in hasFastEqualityCompare()
3612 /// Return true if every element in Mask, is an in-place blend/select mask or is
3624 /// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
3670 /// shuffle masks. The latter have the special property of a '-2' representing
3671 /// a zero-ed lane of a vector.
3686     // a pair of values. If we find such a case, use the non-undef mask's value.  in canWidenShuffleElements()
3730     assert(!Zeroable.isZero() && "V2's non-undef elements are used?!");  in canWidenShuffleElements()
3784 // Use an UNDEF node if MaskElt == -1.
3785 // Split 64-bit constants in the 32-bit mode.
3786 static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,  in getConstVector()  argument
3792   MVT ConstVecVT = VT;  in getConstVector()
3793   unsigned NumElts = VT.getVectorNumElements();  in getConstVector()
3795   if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {  in getConstVector()
3812     ConstsNode = DAG.getBitcast(VT, ConstsNode);  in getConstVector()
3817                               MVT VT, SelectionDAG &DAG, const SDLoc &dl) {  in getConstVector()  argument
3823   MVT ConstVecVT = VT;  in getConstVector()
3824   unsigned NumElts = VT.getVectorNumElements();  in getConstVector()
3826   if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {  in getConstVector()
3838     assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");  in getConstVector()
3854   return DAG.getBitcast(VT, ConstsNode);  in getConstVector()
3857 static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT,  in getConstVector()  argument
3860   return getConstVector(Bits, Undefs, VT, DAG, dl);  in getConstVector()
3864 static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,  in getZeroVector()  argument
3866   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||  in getZeroVector()
3867           VT.getVectorElementType() == MVT::i1) &&  in getZeroVector()
3872   // available, use a floating-point +0.0 instead.  in getZeroVector()
3875   if (!Subtarget.hasSSE2() && VT.is128BitVector()) {  in getZeroVector()
3877   } else if (VT.isFloatingPoint() &&  in getZeroVector()
3878              TLI.isTypeLegal(VT.getVectorElementType())) {  in getZeroVector()
3879     Vec = DAG.getConstantFP(+0.0, dl, VT);  in getZeroVector()
3880   } else if (VT.getVectorElementType() == MVT::i1) {  in getZeroVector()
3881     assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&  in getZeroVector()
3883     Vec = DAG.getConstant(0, dl, VT);  in getZeroVector()
3885     unsigned Num32BitElts = VT.getSizeInBits() / 32;  in getZeroVector()
3888   return DAG.getBitcast(VT, Vec);  in getZeroVector()
3916   EVT VT = Vec.getValueType();  in extractSubVector()  local
3917   EVT ElVT = VT.getVectorElementType();  in extractSubVector()
3918   unsigned Factor = VT.getSizeInBits() / vectorWidth;  in extractSubVector()
3920                                   VT.getVectorNumElements() / Factor);  in extractSubVector()
3926   // This is the index of the first element of the vectorWidth-bit chunk  in extractSubVector()
3928   IdxVal &= ~(ElemsPerChunk - 1);  in extractSubVector()
3933                               Vec->ops().slice(IdxVal, ElemsPerChunk));  in extractSubVector()
3945 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
3947 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
3949 /// 128 bits we want.  It need not be aligned to a 128-bit boundary.  That makes
3958 /// Generate a DAG to grab 256-bits from a 512-bit vector.
3973   EVT VT = Vec.getValueType();  in insertSubVector()  local
3974   EVT ElVT = VT.getVectorElementType();  in insertSubVector()
3981   // This is the index of the first element of the vectorWidth-bit chunk  in insertSubVector()
3983   IdxVal &= ~(ElemsPerChunk - 1);  in insertSubVector()
3989 /// Generate a DAG to put 128-bits into a vector > 128 bits.  This
3991 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
3993 /// we want.  It need not be aligned to a 128-bit boundary.  That makes
4003 static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,  in widenSubVector()  argument
4006   assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() &&  in widenSubVector()
4007          Vec.getValueType().getScalarType() == VT.getScalarType() &&  in widenSubVector()
4009   SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)  in widenSubVector()
4010                                 : DAG.getUNDEF(VT);  in widenSubVector()
4011   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,  in widenSubVector()
4025   MVT VT = MVT::getVectorVT(SVT, WideNumElts);  in widenSubVector()  local
4026   return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);  in widenSubVector()
4031 static MVT widenMaskVectorType(MVT VT, const X86Subtarget &Subtarget) {  in widenMaskVectorType()  argument
4032   assert(VT.getVectorElementType() == MVT::i1 && "Expected bool vector");  in widenMaskVectorType()
4033   unsigned NumElts = VT.getVectorNumElements();  in widenMaskVectorType()
4036   return VT;  in widenMaskVectorType()
4044   MVT VT = widenMaskVectorType(Vec.getSimpleValueType(), Subtarget);  in widenMaskVector()  local
4045   return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);  in widenMaskVector()
4055   if (N->getOpcode() == ISD::CONCAT_VECTORS) {  in collectConcatOps()
4056     Ops.append(N->op_begin(), N->op_end());  in collectConcatOps()
4060   if (N->getOpcode() == ISD::INSERT_SUBVECTOR) {  in collectConcatOps()
4061     SDValue Src = N->getOperand(0);  in collectConcatOps()
4062     SDValue Sub = N->getOperand(1);  in collectConcatOps()
4063     const APInt &Idx = N->getConstantOperandAPInt(2);  in collectConcatOps()
4064     EVT VT = Src.getValueType();  in collectConcatOps()  local
4067     if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) {  in collectConcatOps()
4074       if (Idx == (VT.getVectorNumElements() / 2)) {  in collectConcatOps()
4143   EVT VT = Op.getValueType();  in splitVector()  local
4144   unsigned NumElems = VT.getVectorNumElements();  in splitVector()
4145   unsigned SizeInBits = VT.getSizeInBits();  in splitVector()
4149   // If this is a splat value (with no-undefs) then use the lower subvector,  in splitVector()
4162   EVT VT = Op.getValueType();  in splitVectorOp()  local
4177   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in splitVectorOp()
4178   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,  in splitVectorOp()
4187   // Make sure we only try to split 256/512-bit types to avoid creating  in splitVectorIntUnary()
4189   [[maybe_unused]] EVT VT = Op.getValueType();  in splitVectorIntUnary()  local
4192          (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");  in splitVectorIntUnary()
4194              VT.getVectorNumElements() &&  in splitVectorIntUnary()
4204   [[maybe_unused]] EVT VT = Op.getValueType();  in splitVectorIntBinary()  local
4205   assert(Op.getOperand(0).getValueType() == VT &&  in splitVectorIntBinary()
4206          Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");  in splitVectorIntBinary()
4207   assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");  in splitVectorIntBinary()
4213 // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
4214 // 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
4215 // deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
4220                          const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,  in SplitOpsAndApply()  argument
4226     if (VT.getSizeInBits() > 512) {  in SplitOpsAndApply()
4227       NumSubs = VT.getSizeInBits() / 512;  in SplitOpsAndApply()
4228       assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");  in SplitOpsAndApply()
4231     if (VT.getSizeInBits() > 256) {  in SplitOpsAndApply()
4232       NumSubs = VT.getSizeInBits() / 256;  in SplitOpsAndApply()
4233       assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size");  in SplitOpsAndApply()
4236     if (VT.getSizeInBits() > 128) {  in SplitOpsAndApply()
4237       NumSubs = VT.getSizeInBits() / 128;  in SplitOpsAndApply()
4238       assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size");  in SplitOpsAndApply()
4256   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);  in SplitOpsAndApply()
4259 // Helper function that extends a non-512-bit vector op to 512-bits on non-VLX
4261 static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,  in getAVX512Node()  argument
4265   MVT SVT = VT.getScalarType();  in getAVX512Node()
4271     // AVX512 broadcasts 32/64-bit operands.  in getAVX512Node()
4272     // TODO: Support float once getAVX512Node is used by fp-ops.  in getAVX512Node()
4283       if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,  in getAVX512Node()
4291   bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector());  in getAVX512Node()
4293   MVT DstVT = VT;  in getAVX512Node()
4304     assert(OpVT == VT && "Vector type mismatch");  in getAVX512Node()
4318   // Perform the 512-bit op then extract the bottom subvector.  in getAVX512Node()
4320     Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());  in getAVX512Node()
4324 /// Insert i1-subvector to i1-vector.
4394     if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems),  in insert1BitVector()
4400       unsigned ShiftLeft = NumElems - SubVecNumElems;  in insert1BitVector()
4401       unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;  in insert1BitVector()
4427       SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);  in insert1BitVector()
4442   unsigned ShiftLeft = NumElems - SubVecNumElems;  in insert1BitVector()
4443   unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;  in insert1BitVector()
4469   unsigned LowShift = NumElems - IdxVal;  in insert1BitVector()
4477   SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,  in insert1BitVector()  local
4479   High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High,  in insert1BitVector()
4483   Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High);  in insert1BitVector()
4497   EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts);  in concatSubVectors()  local
4498   SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth);  in concatSubVectors()
4505 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {  in getOnesVector()  argument
4506   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&  in getOnesVector()
4507          "Expected a 128/256/512-bit vector type");  in getOnesVector()
4508   unsigned NumElts = VT.getSizeInBits() / 32;  in getOnesVector()
4510   return DAG.getBitcast(VT, Vec);  in getOnesVector()
4513 static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,  in getEXTEND_VECTOR_INREG()  argument
4516   assert(VT.isVector() && InVT.isVector() && "Expected vector VTs.");  in getEXTEND_VECTOR_INREG()
4521   // For 256-bit vectors, we only need the lower (128-bit) input half.  in getEXTEND_VECTOR_INREG()
4522   // For 512-bit vectors, we only need the lower input half or quarter.  in getEXTEND_VECTOR_INREG()
4524     assert(VT.getSizeInBits() == InVT.getSizeInBits() &&  in getEXTEND_VECTOR_INREG()
4526     unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();  in getEXTEND_VECTOR_INREG()
4528                           std::max(128U, (unsigned)VT.getSizeInBits() / Scale));  in getEXTEND_VECTOR_INREG()
4532   if (VT.getVectorNumElements() != InVT.getVectorNumElements())  in getEXTEND_VECTOR_INREG()
4535   return DAG.getNode(Opcode, DL, VT, In);  in getEXTEND_VECTOR_INREG()
4539 static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,  in getBitSelect()  argument
4541   LHS = DAG.getNode(ISD::AND, DL, VT, LHS, Mask);  in getBitSelect()
4542   RHS = DAG.getNode(X86ISD::ANDNP, DL, VT, Mask, RHS);  in getBitSelect()
4543   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);  in getBitSelect()
4546 void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask,  in createUnpackShuffleMask()  argument
4548   assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&  in createUnpackShuffleMask()
4551   int NumElts = VT.getVectorNumElements();  in createUnpackShuffleMask()
4552   int NumEltsInLane = 128 / VT.getScalarSizeInBits();  in createUnpackShuffleMask()
4562 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
4564 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
4565 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
4566 void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,  in createSplat2ShuffleMask()  argument
4569   int NumElts = VT.getVectorNumElements();  in createSplat2ShuffleMask()
4578 static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl,  in getVectorShuffle()  argument
4582     SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType()));  in getVectorShuffle()
4592     return DAG.getBuildVector(VT, dl, Ops);  in getVectorShuffle()
4595   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);  in getVectorShuffle()
4599 static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,  in getUnpackl()  argument
4602   createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);  in getUnpackl()
4603   return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);  in getUnpackl()
4607 static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,  in getUnpackh()  argument
4610   createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);  in getUnpackh()
4611   return getVectorShuffle(DAG, VT, dl, V1, V2, Mask);  in getUnpackh()
4618                        const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,  in getPack()  argument
4621   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in getPack()
4624          VT.getSizeInBits() == OpVT.getSizeInBits() &&  in getPack()
4630   // Rely on vector shuffles for vXi64 -> vXi32 packing.  in getPack()
4634     int NumElts = VT.getVectorNumElements();  in getPack()
4641     return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, LHS),  in getPack()
4642                                 DAG.getBitcast(VT, RHS), PackMask);  in getPack()
4650       return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);  in getPack()
4654       return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);  in getPack()
4664       SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT);  in getPack()
4668     return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);  in getPack()
4677   return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);  in getPack()
4688   MVT VT = V2.getSimpleValueType();  in getShuffleVectorZeroOrUndef()  local
4690     ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);  in getShuffleVectorZeroOrUndef()
4691   int NumElems = VT.getVectorNumElements();  in getShuffleVectorZeroOrUndef()
4696   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);  in getShuffleVectorZeroOrUndef()
4706 // TODO: Add support for non-zero offsets.
4709   if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0)  in getTargetConstantFromBasePtr()
4711   return CNode->getConstVal();  in getTargetConstantFromBasePtr()
4717   return getTargetConstantFromBasePtr(Load->getBasePtr());  in getTargetConstantFromNode()
4741   EVT VT = Op.getValueType();  in getTargetConstantBitsFromNode()  local
4742   unsigned SizeInBits = VT.getSizeInBits();  in getTargetConstantBitsFromNode()
4812       Mask = CInt->getValue();  in getTargetConstantBitsFromNode()
4816       Mask = CFP->getValueAPF().bitcastToAPInt();  in getTargetConstantBitsFromNode()
4820       Type *Ty = CDS->getType();  in getTargetConstantBitsFromNode()
4821       Mask = APInt::getZero(Ty->getPrimitiveSizeInBits());  in getTargetConstantBitsFromNode()
4822       Type *EltTy = CDS->getElementType();  in getTargetConstantBitsFromNode()
4823       bool IsInteger = EltTy->isIntegerTy();  in getTargetConstantBitsFromNode()
4825           EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();  in getTargetConstantBitsFromNode()
4828       unsigned EltBits = EltTy->getPrimitiveSizeInBits();  in getTargetConstantBitsFromNode()
4829       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)  in getTargetConstantBitsFromNode()
4831           Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);  in getTargetConstantBitsFromNode()
4833           Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),  in getTargetConstantBitsFromNode()
4850     SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());  in getTargetConstantBitsFromNode()
4855     APInt RawBits = Cst->getValueAPF().bitcastToAPInt();  in getTargetConstantBitsFromNode()
4864     unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4865     if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) {  in getTargetConstantBitsFromNode()
4876     Type *CstTy = Cst->getType();  in getTargetConstantBitsFromNode()
4877     unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();  in getTargetConstantBitsFromNode()
4878     if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0)  in getTargetConstantBitsFromNode()
4881     unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4889       if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],  in getTargetConstantBitsFromNode()
4898       EltSizeInBits <= VT.getScalarSizeInBits()) {  in getTargetConstantBitsFromNode()
4900     if (MemIntr->getMemoryVT().getStoreSizeInBits() != VT.getScalarSizeInBits())  in getTargetConstantBitsFromNode()
4903     SDValue Ptr = MemIntr->getBasePtr();  in getTargetConstantBitsFromNode()
4905       unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4915         SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);  in getTargetConstantBitsFromNode()
4924     SDValue Ptr = MemIntr->getBasePtr();  in getTargetConstantBitsFromNode()
4928       Type *CstTy = Cst->getType();  in getTargetConstantBitsFromNode()
4929       unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();  in getTargetConstantBitsFromNode()
4930       unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();  in getTargetConstantBitsFromNode()
4931       if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||  in getTargetConstantBitsFromNode()
4934       unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4941         if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],  in getTargetConstantBitsFromNode()
4957     unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4964     SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));  in getTargetConstantBitsFromNode()
4970     // If bitcasts to larger elements we might lose track of undefs - don't  in getTargetConstantBitsFromNode()
4972     unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();  in getTargetConstantBitsFromNode()
4995     // TODO - support extract_subvector through bitcasts.  in getTargetConstantBitsFromNode()
4996     if (EltSizeInBits != VT.getScalarSizeInBits())  in getTargetConstantBitsFromNode()
5004       unsigned NumSubElts = VT.getVectorNumElements();  in getTargetConstantBitsFromNode()
5017     // TODO - support shuffle through bitcasts.  in getTargetConstantBitsFromNode()
5018     if (EltSizeInBits != VT.getScalarSizeInBits())  in getTargetConstantBitsFromNode()
5021     ArrayRef<int> Mask = SVN->getMask();  in getTargetConstantBitsFromNode()
5050         if (UndefElts1[M - NumElts])  in getTargetConstantBitsFromNode()
5052         EltBits.push_back(EltBits1[M - NumElts]);  in getTargetConstantBitsFromNode()
5069     int SplatIndex = -1;  in isConstantSplat()
5074         SplatIndex = -1;  in isConstantSplat()
5108 // Match not(xor X, -1) -> X.
5109 // Match not(pcmpgt(C, X)) -> pcmpgt(X, C - 1).
5110 // Match not(extract_subvector(xor X, -1)) -> extract_subvector(X).
5111 // Match not(concat_vectors(xor X, -1, xor Y, -1)) -> concat_vectors(X, Y).
5135       // Don't fold min_signed_value -> (min_signed_value - 1)  in IsNOT()
5139         Elt -= 1;  in IsNOT()
5143         MVT VT = V.getSimpleValueType();  in IsNOT()  local
5144         return DAG.getNode(X86ISD::PCMPGT, DL, VT, V.getOperand(1),  in IsNOT()
5145                            getConstVector(EltBits, UndefElts, VT, DAG, DL));  in IsNOT()
5162 /// A multi-stage pack shuffle mask is created by specifying NumStages > 1.
5164 static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,  in createPackShuffleMask()  argument
5167   unsigned NumElts = VT.getVectorNumElements();  in createPackShuffleMask()
5168   unsigned NumLanes = VT.getSizeInBits() / 128;  in createPackShuffleMask()
5169   unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();  in createPackShuffleMask()
5171   unsigned Repetitions = 1u << (NumStages - 1);  in createPackShuffleMask()
5186 static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,  in getPackDemandedElts()  argument
5188   int NumLanes = VT.getSizeInBits() / 128;  in getPackDemandedElts()
5211 static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,  in getHorizDemandedElts()  argument
5213   getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts,  in getHorizDemandedElts()
5219 /// Calculates the shuffle mask corresponding to the target-specific opcode.
5225 /// It is an error to call this with non-empty Mask/Ops vectors.
5232   MVT VT = N.getSimpleValueType();  in getTargetShuffleMask()  local
5233   unsigned NumElems = VT.getVectorNumElements();  in getTargetShuffleMask()
5234   unsigned MaskEltSize = VT.getScalarSizeInBits();  in getTargetShuffleMask()
5246     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5247     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5248     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5253     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5254     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5255     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5260     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5261     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5262     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5267     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5277     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5278     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5288     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5289     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5294     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5295     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5300     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5301     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5306     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5307     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5312     assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&  in getTargetShuffleMask()
5313            "Only 32-bit and 64-bit elements are supported!");  in getTargetShuffleMask()
5314     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5315     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5316     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5323     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");  in getTargetShuffleMask()
5324     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5325     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5326     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5333     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");  in getTargetShuffleMask()
5334     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5335     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5340     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");  in getTargetShuffleMask()
5341     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5342     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5348     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5349     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5354     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5355     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5360     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5361     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5366     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5371     // We only decode broadcasts of same-sized vectors, peeking through to  in getTargetShuffleMask()
5374     if (N.getOperand(0).getValueType() == VT) {  in getTargetShuffleMask()
5381     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5392     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");  in getTargetShuffleMask()
5393     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5394     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5404     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5405     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5412     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5413     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5417     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5418     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5419     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5424     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5425     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5426     ImmN = N.getConstantOperandVal(N.getNumOperands() - 1);  in getTargetShuffleMask()
5431     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5436     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5441     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5446     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5447     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5452       unsigned CtrlImm = CtrlOp->getZExtValue();  in getTargetShuffleMask()
5463     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5464     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5474     assert(N.getOperand(1).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5487     assert(N.getOperand(0).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5488     assert(N.getOperand(2).getValueType() == VT && "Unexpected value type");  in getTargetShuffleMask()
5514   // inputs that are actually the same node. Re-map the mask to always point  in getTargetShuffleMask()
5519         M -= Mask.size();  in getTargetShuffleMask()
5521   // If we didn't already add operands in the opcode-specific code, default to  in getTargetShuffleMask()
5587       int Scale = Size / V->getNumOperands();  in computeZeroableShuffleElements()
5594         APInt Val = Cst->getAPIntValue();  in computeZeroableShuffleElements()
5599         APInt Val = Cst->getValueAPF().bitcastToAPInt();  in computeZeroableShuffleElements()
5610       int Scale = V->getNumOperands() / Size;  in computeZeroableShuffleElements()
5638   MVT VT = N.getSimpleValueType();  in getTargetShuffleAndZeroables()  local
5650   assert((VT.getSizeInBits() % Size) == 0 &&  in getTargetShuffleAndZeroables()
5652   unsigned EltSizeInBits = VT.getSizeInBits() / Size;  in getTargetShuffleAndZeroables()
5689     // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.  in getTargetShuffleAndZeroables()
5690     // TODO: We currently only set UNDEF for integer types - floats use the same  in getTargetShuffleAndZeroables()
5697       if (Idx != 0 && !VT.isFloatingPoint())  in getTargetShuffleAndZeroables()
5704     // INSERT_SUBVECTOR - to widen vectors we often insert them into UNDEF  in getTargetShuffleAndZeroables()
5727   assert(VT.getVectorNumElements() == (unsigned)Size &&  in getTargetShuffleAndZeroables()
5813   MVT VT = N.getSimpleValueType();  in getFauxShuffleMask()  local
5814   unsigned NumElts = VT.getVectorNumElements();  in getFauxShuffleMask()
5815   unsigned NumSizeInBits = VT.getSizeInBits();  in getFauxShuffleMask()
5816   unsigned NumBitsPerElt = VT.getScalarSizeInBits();  in getFauxShuffleMask()
5827     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask();  in getFauxShuffleMask()
5838     // Attempt to decode as a per-byte mask.  in getFauxShuffleMask()
5849     // We can't assume an undef src element gives an undef dst - the other src  in getFauxShuffleMask()
5906     if (!N->isOnlyUserOf(Sub.getNode()))  in getFauxShuffleMask()
5933     // Limit this to vXi64 512-bit vector cases to make the most of AVX512  in getFauxShuffleMask()
6011       // Check we have an in-range constant insertion index.  in getFauxShuffleMask()
6092     getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS);  in getFauxShuffleMask()
6104       // PACKSS then it was likely being used for sign-extension for a  in getFauxShuffleMask()
6106       if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) &&  in getFauxShuffleMask()
6111       if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) &&  in getFauxShuffleMask()
6131     createPackShuffleMask(VT, Mask, IsUnary);  in getFauxShuffleMask()
6164     Mask.append(NumElts - NumSrcElts, SM_SentinelZero);  in getFauxShuffleMask()
6190           Mask[i + j] = i + j - ByteShift;  in getFauxShuffleMask()
6194           Mask[i + j - ByteShift] = i + j;  in getFauxShuffleMask()
6206     Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset);  in getFauxShuffleMask()
6221               VT.getScalarType())  in getFauxShuffleMask()
6239     // We can only handle all-signbits extensions.  in getFauxShuffleMask()
6295           M -= MaskWidth;  in resolveTargetShuffleInputsAndMask()
6306           M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth);  in resolveTargetShuffleInputsAndMask()
6330   EVT VT = Op.getValueType();  in getTargetShuffleInputs()  local
6331   if (!VT.isSimple() || !VT.isVector())  in getTargetShuffleInputs()
6361   EVT VT = Op.getValueType();  in getTargetShuffleInputs()  local
6362   if (!VT.isSimple() || !VT.isVector())  in getTargetShuffleInputs()
6372 static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT,  in getBROADCAST_LOAD()  argument
6379   // Ensure this is a simple (non-atomic, non-voltile), temporal read memop.  in getBROADCAST_LOAD()
6380   if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal())  in getBROADCAST_LOAD()
6383   SDValue Ptr = DAG.getMemBasePlusOffset(Mem->getBasePtr(),  in getBROADCAST_LOAD()
6385   SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in getBROADCAST_LOAD()
6386   SDValue Ops[] = {Mem->getChain(), Ptr};  in getBROADCAST_LOAD()
6390           Mem->getMemOperand(), Offset, MemVT.getStoreSize()));  in getBROADCAST_LOAD()
6402   EVT VT = Op.getValueType();  in getShuffleScalarElt()  local
6404   unsigned NumElems = VT.getVectorNumElements();  in getShuffleScalarElt()
6408     int Elt = SV->getMaskElt(Index);  in getShuffleScalarElt()
6411       return DAG.getUNDEF(VT.getVectorElementType());  in getShuffleScalarElt()
6413     SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1);  in getShuffleScalarElt()
6419     MVT ShufVT = VT.getSimpleVT();  in getShuffleScalarElt()
6447       return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1);  in getShuffleScalarElt()
6478   // For insert_vector_elt - either return the index matching scalar or recurse  in getShuffleScalarElt()
6489                         : DAG.getUNDEF(VT.getVectorElementType());  in getShuffleScalarElt()
6503   MVT VT = Op.getSimpleValueType();  in LowerBuildVectorAsInsert()  local
6504   unsigned NumElts = VT.getVectorNumElements();  in LowerBuildVectorAsInsert()
6505   assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) ||  in LowerBuildVectorAsInsert()
6506           ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) &&  in LowerBuildVectorAsInsert()
6523         V = getZeroVector(VT, Subtarget, DAG, DL);  in LowerBuildVectorAsInsert()
6525         assert(0 == i && "Expected insertion into zero-index");  in LowerBuildVectorAsInsert()
6528         V = DAG.getBitcast(VT, V);  in LowerBuildVectorAsInsert()
6532     V = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, V, Op.getOperand(i),  in LowerBuildVectorAsInsert()
6548   // SSE4.1 - use PINSRB to insert each byte directly.  in LowerBuildVectorv16i8()
6555   // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.  in LowerBuildVectorv16i8()
6556   // If both the lowest 16-bits are non-zero, then convert to MOVD.  in LowerBuildVectorv16i8()
6602     // insert into zero vector. Otherwise, use SCALAR_TO_VECTOR (leaves high  in LowerBuildVectorv16i8()
6647     MVT VT = Op.getSimpleValueType();  in LowerBuildVectorv4x32()  local
6648     MVT EltVT = VT.getVectorElementType();  in LowerBuildVectorv4x32()
6653     SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops));  in LowerBuildVectorv4x32()
6655     return DAG.getBitcast(VT, Dup);  in LowerBuildVectorv4x32()
6665   assert(Zeroable.size() - Zeroable.count() > 1 &&  in LowerBuildVectorv4x32()
6666          "We expect at least two non-zero elements!");  in LowerBuildVectorv4x32()
6679     // Make sure that this node is extracting from a 128-bit vector.  in LowerBuildVectorv4x32()
6680     MVT VT = Elt.getOperand(0).getSimpleValueType();  in LowerBuildVectorv4x32()  local
6681     if (!VT.is128BitVector())  in LowerBuildVectorv4x32()
6691   MVT VT = V1.getSimpleValueType();  in LowerBuildVectorv4x32()  local
6704     Elt = Op->getOperand(EltIdx);  in LowerBuildVectorv4x32()
6715                                ? DAG.getUNDEF(VT)  in LowerBuildVectorv4x32()
6716                                : getZeroVector(VT, Subtarget, DAG, DL);  in LowerBuildVectorv4x32()
6717     if (V1.getSimpleValueType() != VT)  in LowerBuildVectorv4x32()
6718       V1 = DAG.getBitcast(VT, V1);  in LowerBuildVectorv4x32()
6719     return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask);  in LowerBuildVectorv4x32()
6735     SDValue Current = Op->getOperand(i);  in LowerBuildVectorv4x32()
6736     SDValue SrcVector = Current->getOperand(0);  in LowerBuildVectorv4x32()
6745   assert(V1.getNode() && "Expected at least two non-zero elements!");  in LowerBuildVectorv4x32()
6758   return DAG.getBitcast(VT, Result);  in LowerBuildVectorv4x32()
6762 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,  in getVShift()  argument
6765   assert(VT.is128BitVector() && "Unknown type for VShift");  in getVShift()
6771   return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));  in getVShift()
6774 static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,  in LowerAsSplatVectorLoad()  argument
6781     SDValue Ptr = LD->getBasePtr();  in LowerAsSplatVectorLoad()
6782     if (!ISD::isNormalLoad(LD) || !LD->isSimple())  in LowerAsSplatVectorLoad()
6784     EVT PVT = LD->getValueType(0);  in LowerAsSplatVectorLoad()
6788     int FI = -1;  in LowerAsSplatVectorLoad()
6791       FI = FINode->getIndex();  in LowerAsSplatVectorLoad()
6795       FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();  in LowerAsSplatVectorLoad()
6802     // FIXME: 256-bit vector instructions don't require a strict alignment,  in LowerAsSplatVectorLoad()
6804     Align RequiredAlign(VT.getSizeInBits() / 8);  in LowerAsSplatVectorLoad()
6805     SDValue Chain = LD->getChain();  in LowerAsSplatVectorLoad()
6826     int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1);  in LowerAsSplatVectorLoad()
6833     int EltNo = (Offset - StartOffset) >> 2;  in LowerAsSplatVectorLoad()
6834     unsigned NumElems = VT.getVectorNumElements();  in LowerAsSplatVectorLoad()
6838                              LD->getPointerInfo().getWithOffset(StartOffset));  in LowerAsSplatVectorLoad()
6852     if (!BaseLd->isSimple())  in findEltLoadSrc()
6866       uint64_t Amt = AmtC->getZExtValue();  in findEltLoadSrc()
6880         uint64_t Idx = IdxC->getZExtValue();  in findEltLoadSrc()
6891 /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6895 /// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6896 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,  in EltsFromConsecutiveLoads()  argument
6900   if ((VT.getScalarSizeInBits() % 8) != 0)  in EltsFromConsecutiveLoads()
6905   int LastLoadedElt = -1;  in EltsFromConsecutiveLoads()
6931     if ((NumElems * EltSizeInBits) != VT.getSizeInBits())  in EltsFromConsecutiveLoads()
6936     unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0);  in EltsFromConsecutiveLoads()
6947   // Handle Special Cases - all undef or undef/zero.  in EltsFromConsecutiveLoads()
6949     return DAG.getUNDEF(VT);  in EltsFromConsecutiveLoads()
6951     return VT.isInteger() ? DAG.getConstant(0, DL, VT)  in EltsFromConsecutiveLoads()
6952                           : DAG.getConstantFP(0.0, DL, VT);  in EltsFromConsecutiveLoads()
6964   int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt);  in EltsFromConsecutiveLoads()
6966   assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected");  in EltsFromConsecutiveLoads()
6978       int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);  in EltsFromConsecutiveLoads()
6983                                               EltIdx - FirstLoadedElt);  in EltsFromConsecutiveLoads()
7003   auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {  in EltsFromConsecutiveLoads()  argument
7004     auto MMOFlags = LDBase->getMemOperand()->getFlags();  in EltsFromConsecutiveLoads()
7005     assert(LDBase->isSimple() &&  in EltsFromConsecutiveLoads()
7008         DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),  in EltsFromConsecutiveLoads()
7009                     LDBase->getPointerInfo(), LDBase->getOriginalAlign(),  in EltsFromConsecutiveLoads()
7018   bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable(  in EltsFromConsecutiveLoads()
7019       VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout());  in EltsFromConsecutiveLoads()
7021   // LOAD - all consecutive load/undefs (must start/end with a load or be  in EltsFromConsecutiveLoads()
7029     if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))  in EltsFromConsecutiveLoads()
7032     // Don't create 256-bit non-temporal aligned loads without AVX2 as these  in EltsFromConsecutiveLoads()
7034     if (LDBase->isNonTemporal() && LDBase->getAlign() >= Align(32) &&  in EltsFromConsecutiveLoads()
7035         VT.is256BitVector() && !Subtarget.hasInt256())  in EltsFromConsecutiveLoads()
7039       return DAG.getBitcast(VT, Elts[FirstLoadedElt]);  in EltsFromConsecutiveLoads()
7042       return CreateLoad(VT, LDBase);  in EltsFromConsecutiveLoads()
7044     // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded  in EltsFromConsecutiveLoads()
7046     if (!IsAfterLegalize && VT.isVector()) {  in EltsFromConsecutiveLoads()
7047       unsigned NumMaskElts = VT.getVectorNumElements();  in EltsFromConsecutiveLoads()
7050         SmallVector<int, 4> ClearMask(NumMaskElts, -1);  in EltsFromConsecutiveLoads()
7058         SDValue V = CreateLoad(VT, LDBase);  in EltsFromConsecutiveLoads()
7059         SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)  in EltsFromConsecutiveLoads()
7060                                    : DAG.getConstantFP(0.0, DL, VT);  in EltsFromConsecutiveLoads()
7061         return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);  in EltsFromConsecutiveLoads()
7067   if (VT.is256BitVector() || VT.is512BitVector()) {  in EltsFromConsecutiveLoads()
7071           EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);  in EltsFromConsecutiveLoads()
7076         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),  in EltsFromConsecutiveLoads()
7081   // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.  in EltsFromConsecutiveLoads()
7085       ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {  in EltsFromConsecutiveLoads()
7086     MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)  in EltsFromConsecutiveLoads()
7088     MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits);  in EltsFromConsecutiveLoads()
7090     // FIXME: Add more isel patterns so we can just use VT directly.  in EltsFromConsecutiveLoads()
7091     if (!Subtarget.hasSSE2() && VT == MVT::v4f32)  in EltsFromConsecutiveLoads()
7095       SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };  in EltsFromConsecutiveLoads()
7097           X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(),  in EltsFromConsecutiveLoads()
7098           LDBase->getOriginalAlign(), MachineMemOperand::MOLoad);  in EltsFromConsecutiveLoads()
7102       return DAG.getBitcast(VT, ResNode);  in EltsFromConsecutiveLoads()
7106   // BROADCAST - match the smallest possible repetition pattern, load that  in EltsFromConsecutiveLoads()
7109       (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {  in EltsFromConsecutiveLoads()
7116       // Don't attempt a 1:N subvector broadcast - it should be caught by  in EltsFromConsecutiveLoads()
7140           VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64))  in EltsFromConsecutiveLoads()
7148                            VT.getSizeInBits() / ScalarSize);  in EltsFromConsecutiveLoads()
7154             while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())  in EltsFromConsecutiveLoads()
7166           return DAG.getBitcast(VT, Broadcast);  in EltsFromConsecutiveLoads()
7177 // are consecutive, non-overlapping, and in the right order.
7178 static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,  in combineToConsecutiveLoads()  argument
7183   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {  in combineToConsecutiveLoads()
7190   assert(Elts.size() == VT.getVectorNumElements());  in combineToConsecutiveLoads()
7191   return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget,  in combineToConsecutiveLoads()
7195 static Constant *getConstantVector(MVT VT, ArrayRef<APInt> Bits,  in getConstantVector()  argument
7197   unsigned ScalarSize = VT.getScalarSizeInBits();  in getConstantVector()
7198   Type *Ty = EVT(VT.getScalarType()).getTypeForEVT(C);  in getConstantVector()
7200   auto getConstantScalar = [&](const APInt &Val) -> Constant * {  in getConstantVector()
7201     if (VT.isFloatingPoint()) {  in getConstantVector()
7220 static Constant *getConstantVector(MVT VT, const APInt &SplatValue,  in getConstantVector()  argument
7222   unsigned ScalarSize = VT.getScalarSizeInBits();  in getConstantVector()
7224   auto getConstantScalar = [&](const APInt &Val) -> Constant * {  in getConstantVector()
7225     if (VT.isFloatingPoint()) {  in getConstantVector()
7249   for (auto *U : N->uses()) {  in isFoldableUseOfShuffle()
7250     unsigned Opc = U->getOpcode();  in isFoldableUseOfShuffle()
7252     if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N)  in isFoldableUseOfShuffle()
7254     if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N)  in isFoldableUseOfShuffle()
7260     if (N->hasOneUse()) {  in isFoldableUseOfShuffle()
7263       if (Opc == X86ISD::VPDPBUSD && U->getOperand(2).getNode() != N)  in isFoldableUseOfShuffle()
7283   // TODO: Splats could be generated for non-AVX CPUs using SSE  in lowerBuildVectorAsBroadcast()
7284   // instructions, but there's less potential gain for only 128-bit vectors.  in lowerBuildVectorAsBroadcast()
7288   MVT VT = BVOp->getSimpleValueType(0);  in lowerBuildVectorAsBroadcast()  local
7289   unsigned NumElts = VT.getVectorNumElements();  in lowerBuildVectorAsBroadcast()
7291   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&  in lowerBuildVectorAsBroadcast()
7298   if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) {  in lowerBuildVectorAsBroadcast()
7325       MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);  in lowerBuildVectorAsBroadcast()
7329         if (!VT.is512BitVector() && !Subtarget.hasVLX()) {  in lowerBuildVectorAsBroadcast()
7330           unsigned Scale = 512 / VT.getSizeInBits();  in lowerBuildVectorAsBroadcast()
7334         if (BcstVT.getSizeInBits() != VT.getSizeInBits())  in lowerBuildVectorAsBroadcast()
7335           Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits());  in lowerBuildVectorAsBroadcast()
7336         return DAG.getBitcast(VT, Bcst);  in lowerBuildVectorAsBroadcast()
7342   if (!Ld || (NumElts - NumUndefElts) <= 1) {  in lowerBuildVectorAsBroadcast()
7347     if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&  in lowerBuildVectorAsBroadcast()
7348         SplatBitSize > VT.getScalarSizeInBits() &&  in lowerBuildVectorAsBroadcast()
7349         SplatBitSize < VT.getSizeInBits()) {  in lowerBuildVectorAsBroadcast()
7361         Constant *C = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);  in lowerBuildVectorAsBroadcast()
7363         unsigned Repeat = VT.getSizeInBits() / SplatBitSize;  in lowerBuildVectorAsBroadcast()
7365         Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();  in lowerBuildVectorAsBroadcast()
7373         return DAG.getBitcast(VT, Brdcst);  in lowerBuildVectorAsBroadcast()
7377         Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);  in lowerBuildVectorAsBroadcast()
7379         unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();  in lowerBuildVectorAsBroadcast()
7380         MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm);  in lowerBuildVectorAsBroadcast()
7381         Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();  in lowerBuildVectorAsBroadcast()
7382         SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in lowerBuildVectorAsBroadcast()
7397     if (!Ld || NumElts - NumUndefElts != 1)  in lowerBuildVectorAsBroadcast()
7408   // TODO: Handle broadcasts of non-constant sequences.  in lowerBuildVectorAsBroadcast()
7410   // Make sure that all of the users of a non-constant load are from the  in lowerBuildVectorAsBroadcast()
7412   // FIXME: Is the use count needed for non-constant, non-load case?  in lowerBuildVectorAsBroadcast()
7413   if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode()))  in lowerBuildVectorAsBroadcast()
7417   bool IsGE256 = (VT.getSizeInBits() >= 256);  in lowerBuildVectorAsBroadcast()
7431   // TODO: Check if splatting is recommended for other AVX-capable CPUs.  in lowerBuildVectorAsBroadcast()
7446         C = CI->getConstantIntValue();  in lowerBuildVectorAsBroadcast()
7448         C = CF->getConstantFPValue();  in lowerBuildVectorAsBroadcast()
7454       Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();  in lowerBuildVectorAsBroadcast()
7456       SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in lowerBuildVectorAsBroadcast()
7465   // Handle AVX2 in-register broadcasts.  in lowerBuildVectorAsBroadcast()
7468     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);  in lowerBuildVectorAsBroadcast()
7474   // Make sure the non-chain result is only used by this build vector.  in lowerBuildVectorAsBroadcast()
7475   if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0))  in lowerBuildVectorAsBroadcast()
7481     SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in lowerBuildVectorAsBroadcast()
7482     SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};  in lowerBuildVectorAsBroadcast()
7485                                 LN->getMemoryVT(), LN->getMemOperand());  in lowerBuildVectorAsBroadcast()
7490   // The integer check is needed for the 64-bit into 128-bit so it doesn't match  in lowerBuildVectorAsBroadcast()
7495     SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in lowerBuildVectorAsBroadcast()
7496     SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};  in lowerBuildVectorAsBroadcast()
7499                                 LN->getMemoryVT(), LN->getMemOperand());  in lowerBuildVectorAsBroadcast()
7505     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);  in lowerBuildVectorAsBroadcast()
7518   int Idx = ExtIdx->getAsZExtVal();  in getUnderlyingExtractedFromVec()
7522   // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already  in getUnderlyingExtractedFromVec()
7533   SDValue ShuffleVec = SVOp->getOperand(0);  in getUnderlyingExtractedFromVec()
7538   int ShuffleIdx = SVOp->getMaskElt(Idx);  in getUnderlyingExtractedFromVec()
7548   MVT VT = Op.getSimpleValueType();  in buildFromShuffleMostly()  local
7552   if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))  in buildFromShuffleMostly()
7559   SmallVector<int, 8> Mask(NumElems, -1);  in buildFromShuffleMostly()
7579     // Quit if non-constant index.  in buildFromShuffleMostly()
7585     if (ExtractedFromVec.getValueType() != VT)  in buildFromShuffleMostly()
7607   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);  in buildFromShuffleMostly()
7608   SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);  in buildFromShuffleMostly()
7611     NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),  in buildFromShuffleMostly()
7620   MVT VT = Op.getSimpleValueType();  in LowerBUILD_VECTORvXbf16()  local
7622       VT.changeVectorElementType(Subtarget.hasFP16() ? MVT::f16 : MVT::i16);  in LowerBUILD_VECTORvXbf16()
7628   return DAG.getBitcast(VT, Res);  in LowerBUILD_VECTORvXbf16()
7636   MVT VT = Op.getSimpleValueType();  in LowerBUILD_VECTORvXi1()  local
7637   assert((VT.getVectorElementType() == MVT::i1) &&  in LowerBUILD_VECTORvXi1()
7647   int SplatIdx = -1;  in LowerBUILD_VECTORvXi1()
7653       Immediate |= (InC->getZExtValue() & 0x1) << idx;  in LowerBUILD_VECTORvXi1()
7664   // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"  in LowerBUILD_VECTORvXi1()
7671     assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!");  in LowerBUILD_VECTORvXi1()
7677     if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {  in LowerBUILD_VECTORvXi1()
7684       MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));  in LowerBUILD_VECTORvXi1()
7688       MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;  in LowerBUILD_VECTORvXi1()
7690       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select,  in LowerBUILD_VECTORvXi1()
7698     if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {  in LowerBUILD_VECTORvXi1()
7705       MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U));  in LowerBUILD_VECTORvXi1()
7707       MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1;  in LowerBUILD_VECTORvXi1()
7709       DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec,  in LowerBUILD_VECTORvXi1()
7713     DstVec = DAG.getUNDEF(VT);  in LowerBUILD_VECTORvXi1()
7716     DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,  in LowerBUILD_VECTORvXi1()
7738 /// 128-bit partial horizontal operation on a 256-bit vector, but that operation
7739 /// may not match the layout of an x86 256-bit horizontal instruction.
7753 /// horizontal operations, but the index-matching logic is incorrect for that.
7755 /// code because it is only used for partial h-op matching now?
7760   EVT VT = N->getValueType(0);  in isHorizontalBinOpPart()  local
7761   assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops");  in isHorizontalBinOpPart()
7763   assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&  in isHorizontalBinOpPart()
7769   unsigned NumElts = LastIdx - BaseIdx;  in isHorizontalBinOpPart()
7770   V0 = DAG.getUNDEF(VT);  in isHorizontalBinOpPart()
7771   V1 = DAG.getUNDEF(VT);  in isHorizontalBinOpPart()
7775     SDValue Op = N->getOperand(i + BaseIdx);  in isHorizontalBinOpPart()
7778     if (Op->isUndef()) {  in isHorizontalBinOpPart()
7786     CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();  in isHorizontalBinOpPart()
7810         if (V0.getValueType() != VT)  in isHorizontalBinOpPart()
7816         if (V1.getValueType() != VT)  in isHorizontalBinOpPart()
7839 /// Emit a sequence of two 128-bit horizontal add/sub followed by
7843 /// This function expects two 256-bit vectors called V0 and V1.
7844 /// At first, each vector is split into two separate 128-bit vectors.
7845 /// Then, the resulting 128-bit vectors are used to implement two
7850 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7853 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7854 /// horizontal binop dag node would take as input the lower 128-bit of V1
7855 /// and the upper 128-bit of V1.
7861 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7862 /// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7868 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7869 /// the upper 128-bits of the result.
7874   MVT VT = V0.getSimpleValueType();  in ExpandHorizontalBinOp()  local
7875   assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&  in ExpandHorizontalBinOp()
7878   unsigned NumElts = VT.getVectorNumElements();  in ExpandHorizontalBinOp()
7890     if (!isUndefLO && !V0->isUndef())  in ExpandHorizontalBinOp()
7892     if (!isUndefHI && !V1->isUndef())  in ExpandHorizontalBinOp()
7896     if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))  in ExpandHorizontalBinOp()
7899     if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))  in ExpandHorizontalBinOp()
7903   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);  in ExpandHorizontalBinOp()
7908 /// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1
7909 /// (SUBADD = Opnd0 -+ Opnd1) operation are written to the parameters
7917   MVT VT = BV->getSimpleValueType(0);  in isAddSubOrSubAdd()  local
7918   if (!Subtarget.hasSSE3() || !VT.isFloatingPoint())  in isAddSubOrSubAdd()
7921   unsigned NumElts = VT.getVectorNumElements();  in isAddSubOrSubAdd()
7922   SDValue InVec0 = DAG.getUNDEF(VT);  in isAddSubOrSubAdd()
7923   SDValue InVec1 = DAG.getUNDEF(VT);  in isAddSubOrSubAdd()
7927   // Odd-numbered elements in the input build vector are obtained from  in isAddSubOrSubAdd()
7929   // Even-numbered elements in the input build vector are obtained from  in isAddSubOrSubAdd()
7933     SDValue Op = BV->getOperand(i);  in isAddSubOrSubAdd()
7969       if (InVec0.getSimpleValueType() != VT)  in isAddSubOrSubAdd()
7974       if (InVec1.getSimpleValueType() != VT)  in isAddSubOrSubAdd()
8036 /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
8043       !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())  in isFMAddSubOrFMSubAdd()
8077   MVT VT = BV->getSimpleValueType(0);  in lowerToAddSubOrFMAddSub()  local
8083     return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);  in lowerToAddSubOrFMAddSub()
8090   // There are no known X86 targets with 512-bit ADDSUB instructions!  in lowerToAddSubOrFMAddSub()
8092   if (VT.is512BitVector()) {  in lowerToAddSubOrFMAddSub()
8094     for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {  in lowerToAddSubOrFMAddSub()
8098     SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);  in lowerToAddSubOrFMAddSub()
8099     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);  in lowerToAddSubOrFMAddSub()
8100     return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask);  in lowerToAddSubOrFMAddSub()
8103   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);  in lowerToAddSubOrFMAddSub()
8109   MVT VT = BV->getSimpleValueType(0);  in isHopBuildVector()  local
8111   V0 = DAG.getUNDEF(VT);  in isHopBuildVector()
8112   V1 = DAG.getUNDEF(VT);  in isHopBuildVector()
8114   // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit  in isHopBuildVector()
8115   // half of the result is calculated independently from the 128-bit halves of  in isHopBuildVector()
8116   // the inputs, so that makes the index-checking logic below more complicated.  in isHopBuildVector()
8117   unsigned NumElts = VT.getVectorNumElements();  in isHopBuildVector()
8119   unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1;  in isHopBuildVector()
8125       SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j);  in isHopBuildVector()
8137         // clang-format off  in isHopBuildVector()
8143         // clang-format on  in isHopBuildVector()
8156       // The source vector is chosen based on which 64-bit half of the  in isHopBuildVector()
8200   // This is free (examples: zmm --> xmm, xmm --> ymm).  in getHopForBuildVector()
8201   MVT VT = BV->getSimpleValueType(0);  in getHopForBuildVector()  local
8202   unsigned Width = VT.getSizeInBits();  in getHopForBuildVector()
8206     V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, DL, Width);  in getHopForBuildVector()
8211     V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, DL, Width);  in getHopForBuildVector()
8213   unsigned NumElts = VT.getVectorNumElements();  in getHopForBuildVector()
8216     if (BV->getOperand(i).isUndef())  in getHopForBuildVector()
8221   if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) {  in getHopForBuildVector()
8222     MVT HalfVT = VT.getHalfNumVectorElementsVT();  in getHopForBuildVector()
8226     return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, DL, 256);  in getHopForBuildVector()
8229   return DAG.getNode(HOpcode, DL, VT, V0, V1);  in getHopForBuildVector()
8236   // We need at least 2 non-undef elements to make this worthwhile by default.  in LowerToHorizontalOp()
8238       count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); });  in LowerToHorizontalOp()
8243   // int/FP at 128-bit/256-bit. Each type was introduced with a different  in LowerToHorizontalOp()
8245   MVT VT = BV->getSimpleValueType(0);  in LowerToHorizontalOp()  local
8246   if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) ||  in LowerToHorizontalOp()
8247       ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) ||  in LowerToHorizontalOp()
8248       ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) ||  in LowerToHorizontalOp()
8249       ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) {  in LowerToHorizontalOp()
8256   // Try harder to match 256-bit ops by using extract/concat.  in LowerToHorizontalOp()
8257   if (!Subtarget.hasAVX() || !VT.is256BitVector())  in LowerToHorizontalOp()
8261   unsigned NumElts = VT.getVectorNumElements();  in LowerToHorizontalOp()
8266     if (BV->getOperand(i)->isUndef())  in LowerToHorizontalOp()
8270     if (BV->getOperand(i)->isUndef())  in LowerToHorizontalOp()
8274   if (VT == MVT::v8i32 || VT == MVT::v16i16) {  in LowerToHorizontalOp()
8306       assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?");  in LowerToHorizontalOp()
8314   if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||  in LowerToHorizontalOp()
8315       VT == MVT::v16i16) {  in LowerToHorizontalOp()
8360   MVT VT = Op->getSimpleValueType(0);  in lowerBuildVectorToBitOp()  local
8361   unsigned NumElems = VT.getVectorNumElements();  in lowerBuildVectorToBitOp()
8366   unsigned Opcode = Op->getOperand(0).getOpcode();  in lowerBuildVectorToBitOp()
8368     if (Opcode != Op->getOperand(i).getOpcode())  in lowerBuildVectorToBitOp()
8384     // Don't do this if the buildvector is a splat - we'd replace one  in lowerBuildVectorToBitOp()
8386     if (Op->getSplatValue())  in lowerBuildVectorToBitOp()
8388     if (!TLI.isOperationLegalOrPromote(Opcode, VT))  in lowerBuildVectorToBitOp()
8394   for (SDValue Elt : Op->ops()) {  in lowerBuildVectorToBitOp()
8403     if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {  in lowerBuildVectorToBitOp()
8406       RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());  in lowerBuildVectorToBitOp()
8415   // TODO: Permit non-uniform XOP/AVX2/MULLO cases?  in lowerBuildVectorToBitOp()
8419   SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);  in lowerBuildVectorToBitOp()
8420   SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);  in lowerBuildVectorToBitOp()
8421   SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);  in lowerBuildVectorToBitOp()
8437   MVT VT = Op.getSimpleValueType();  in materializeVectorConstant()  local
8443   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width  in materializeVectorConstant()
8444   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use  in materializeVectorConstant()
8445   // vpcmpeqd on 256-bit vectors.  in materializeVectorConstant()
8447     if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)  in materializeVectorConstant()
8450     return getOnesVector(VT, DAG, DL);  in materializeVectorConstant()
8459 static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,  in createVariablePermute()  argument
8462   MVT ShuffleVT = VT;  in createVariablePermute()
8463   EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger();  in createVariablePermute()
8464   unsigned NumElts = VT.getVectorNumElements();  in createVariablePermute()
8465   unsigned SizeInBits = VT.getSizeInBits();  in createVariablePermute()
8467   // Adjust IndicesVec to match VT size.  in createVariablePermute()
8474                                     NumElts * VT.getScalarSizeInBits());  in createVariablePermute()
8478     // Zero-extend the index elements within the vector.  in createVariablePermute()
8485   // Handle SrcVec that don't match VT type.  in createVariablePermute()
8490       VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts);  in createVariablePermute()
8491       IndicesVT = EVT(VT).changeVectorElementTypeToInteger();  in createVariablePermute()
8495           createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);  in createVariablePermute()
8500       // Widen smaller SrcVec to match VT.  in createVariablePermute()
8501       SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec));  in createVariablePermute()
8515     // e.g. v4i32 -> v16i8 (Scale = 4)  in createVariablePermute()
8531   switch (VT.SimpleTy) {  in createVariablePermute()
8564       // SSE41 can compare v2i64 - select between indices 0 and 1.  in createVariablePermute()
8568           DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}),  in createVariablePermute()
8569           DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}),  in createVariablePermute()
8582           ISD::CONCAT_VECTORS, DL, VT,  in createVariablePermute()
8588       SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo);  in createVariablePermute()
8589       SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi);  in createVariablePermute()
8596         EVT VT = Idx.getValueType();  in createVariablePermute()  local
8597         return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT),  in createVariablePermute()
8598                                DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx),  in createVariablePermute()
8599                                DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx),  in createVariablePermute()
8614           VT, createVariablePermute(  in createVariablePermute()
8631             VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi,  in createVariablePermute()
8640       return DAG.getBitcast(VT, Res);  in createVariablePermute()
8647         MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8);  in createVariablePermute()
8667             VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi,  in createVariablePermute()
8676       return DAG.getBitcast(VT, Res);  in createVariablePermute()
8698   assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) &&  in createVariablePermute()
8699          (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 &&  in createVariablePermute()
8702   uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits();  in createVariablePermute()
8713   return DAG.getBitcast(VT, Res);  in createVariablePermute()
8716 // Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be
8717 // reasoned to be a permutation of a vector by indices in a non-constant vector.
8721 // ->
8726 // construction of vectors with constant-0 elements.
8733   // This is done by checking that the i-th build_vector operand is of the form:  in LowerBUILD_VECTORAsVariablePermute()
8747     SDValue ExtractedIndex = Op->getOperand(1);  in LowerBUILD_VECTORAsVariablePermute()
8764     if (!PermIdx || PermIdx->getAPIntValue() != Idx)  in LowerBUILD_VECTORAsVariablePermute()
8768   MVT VT = V.getSimpleValueType();  in LowerBUILD_VECTORAsVariablePermute()  local
8769   return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget);  in LowerBUILD_VECTORAsVariablePermute()
8776   MVT VT = Op.getSimpleValueType();  in LowerBUILD_VECTOR()  local
8777   MVT EltVT = VT.getVectorElementType();  in LowerBUILD_VECTOR()
8782   if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())  in LowerBUILD_VECTOR()
8785   if (VT.getVectorElementType() == MVT::bf16 &&  in LowerBUILD_VECTOR()
8808       OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse();  in LowerBUILD_VECTOR()
8815       NumConstants--;  in LowerBUILD_VECTOR()
8826     return DAG.getUNDEF(VT);  in LowerBUILD_VECTOR()
8830     return DAG.getFreeze(DAG.getUNDEF(VT));  in LowerBUILD_VECTOR()
8834     return getZeroVector(VT, Subtarget, DAG, dl);  in LowerBUILD_VECTOR()
8836   // If we have multiple FREEZE-UNDEF operands, we are likely going to end up  in LowerBUILD_VECTOR()
8838   // our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,  in LowerBUILD_VECTOR()
8839   // and blend the FREEZE-UNDEF operands back in.  in LowerBUILD_VECTOR()
8840   // FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?  in LowerBUILD_VECTOR()
8843     SmallVector<int, 16> BlendMask(NumElems, -1);  in LowerBUILD_VECTOR()
8847         BlendMask[i] = -1;  in LowerBUILD_VECTOR()
8856     SDValue EltsBV = DAG.getBuildVector(VT, dl, Elts);  in LowerBUILD_VECTOR()
8858     SDValue FrozenUndefBV = DAG.getSplatBuildVector(VT, dl, FrozenUndefElt);  in LowerBUILD_VECTOR()
8859     return DAG.getVectorShuffle(VT, dl, EltsBV, FrozenUndefBV, BlendMask);  in LowerBUILD_VECTOR()
8867   if ((VT.is256BitVector() || VT.is512BitVector()) &&  in LowerBUILD_VECTOR()
8873       if (VT.is512BitVector() &&  in LowerBUILD_VECTOR()
8874           NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))  in LowerBUILD_VECTOR()
8875         UpperElems = NumElems - (NumElems / 4);  in LowerBUILD_VECTOR()
8878       MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems);  in LowerBUILD_VECTOR()
8880           DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems));  in LowerBUILD_VECTOR()
8881       return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl);  in LowerBUILD_VECTOR()
8897   // If we are inserting one variable into a vector of non-zero constants, try  in LowerBUILD_VECTOR()
8901   // constants. Insertion into a zero vector is handled as a special-case  in LowerBUILD_VECTOR()
8903   if (NumConstants == NumElems - 1 && NumNonZero != 1 &&  in LowerBUILD_VECTOR()
8905       (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) ||  in LowerBUILD_VECTOR()
8906        isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) {  in LowerBUILD_VECTOR()
8907     // Create an all-constant vector. The variable element in the old  in LowerBUILD_VECTOR()
8918         ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue());  in LowerBUILD_VECTOR()
8920         ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF());  in LowerBUILD_VECTOR()
8929     SDValue DAGConstVec = DAG.getConstantPool(CV, VT);  in LowerBUILD_VECTOR()
8940     SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI);  in LowerBUILD_VECTOR()
8941     unsigned InsertC = InsIndex->getAsZExtVal();  in LowerBUILD_VECTOR()
8942     unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits();  in LowerBUILD_VECTOR()
8944       return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex);  in LowerBUILD_VECTOR()
8946     // There's no good way to insert into the high elements of a >128-bit  in LowerBUILD_VECTOR()
8948     assert(VT.getSizeInBits() > 128 && "Invalid insertion index?");  in LowerBUILD_VECTOR()
8949     assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector");  in LowerBUILD_VECTOR()
8951     unsigned NumElts = VT.getVectorNumElements();  in LowerBUILD_VECTOR()
8954     SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt);  in LowerBUILD_VECTOR()
8955     return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask);  in LowerBUILD_VECTOR()
8958   // Special case for single non-zero, non-undef, element.  in LowerBUILD_VECTOR()
8963     // If we have a constant or non-constant insertion into the low element of  in LowerBUILD_VECTOR()
8969         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);  in LowerBUILD_VECTOR()
8974         assert((VT.is128BitVector() || VT.is256BitVector() ||  in LowerBUILD_VECTOR()
8975                 VT.is512BitVector()) &&  in LowerBUILD_VECTOR()
8977         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);  in LowerBUILD_VECTOR()
8987         MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);  in LowerBUILD_VECTOR()
8990         return DAG.getBitcast(VT, Item);  in LowerBUILD_VECTOR()
8998       unsigned NumBits = VT.getSizeInBits();  in LowerBUILD_VECTOR()
8999       return getVShift(true, VT,  in LowerBUILD_VECTOR()
9001                                    VT, Op.getOperand(1)),  in LowerBUILD_VECTOR()
9009     // is a non-constant being inserted into an element other than the low one,  in LowerBUILD_VECTOR()
9014       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);  in LowerBUILD_VECTOR()
9028       if (Op.getNode()->isOnlyUserOf(Item.getNode()))  in LowerBUILD_VECTOR()
9029         return LowerAsSplatVectorLoad(Item, VT, dl, DAG);  in LowerBUILD_VECTOR()
9035   // handled, so this is best done with a single constant-pool load.  in LowerBUILD_VECTOR()
9044     SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);  in LowerBUILD_VECTOR()
9046             EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))  in LowerBUILD_VECTOR()
9050   // If this is a splat of pairs of 32-bit elements, we can use a narrower  in LowerBUILD_VECTOR()
9064       MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64;  in LowerBUILD_VECTOR()
9069       // Broadcast from v2i64/v2f64 and cast to final VT.  in LowerBUILD_VECTOR()
9071       return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT,  in LowerBUILD_VECTOR()
9076   // For AVX-length vectors, build the individual 128-bit pieces and use  in LowerBUILD_VECTOR()
9078   if (VT.getSizeInBits() > 128) {  in LowerBUILD_VECTOR()
9083         DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2));  in LowerBUILD_VECTOR()
9085         HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2));  in LowerBUILD_VECTOR()
9091   // Let legalizer expand 2-wide build_vectors.  in LowerBUILD_VECTOR()
9096       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,  in LowerBUILD_VECTOR()
9103   // If element VT is < 32 bits, convert it to inserts into a zero vector.  in LowerBUILD_VECTOR()
9114   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS  in LowerBUILD_VECTOR()
9119   // If element VT is == 32 bits, turn it into a number of shuffles.  in LowerBUILD_VECTOR()
9125         Ops[i] = getZeroVector(VT, Subtarget, DAG, dl);  in LowerBUILD_VECTOR()
9127         Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));  in LowerBUILD_VECTOR()
9137           Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]);  in LowerBUILD_VECTOR()
9140           Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);  in LowerBUILD_VECTOR()
9143           Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]);  in LowerBUILD_VECTOR()
9156     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec);  in LowerBUILD_VECTOR()
9159   assert(Values.size() > 1 && "Expected non-undef and non-splat vector");  in LowerBUILD_VECTOR()
9165   // For SSE 4.1, use insertps to put the high elements into the low element.  in LowerBUILD_VECTOR()
9169       Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));  in LowerBUILD_VECTOR()
9171       Result = DAG.getUNDEF(VT);  in LowerBUILD_VECTOR()
9175       Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,  in LowerBUILD_VECTOR()
9182   // our (non-undef) elements to the full vector width with the element in the  in LowerBUILD_VECTOR()
9187       Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));  in LowerBUILD_VECTOR()
9189       Ops[i] = DAG.getUNDEF(VT);  in LowerBUILD_VECTOR()
9203     Mask.append(NumElems - Mask.size(), SM_SentinelUndef);  in LowerBUILD_VECTOR()
9206       Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask);  in LowerBUILD_VECTOR()
9211 // 256-bit AVX can use the vinsertf128 instruction
9212 // to create 256-bit vectors from two other 128-bit ones.
9220           ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");  in LowerAVXCONCAT_VECTORS()
9247   // If we have more than 2 non-zeros, build each half separately.  in LowerAVXCONCAT_VECTORS()
9250     ArrayRef<SDUse> Ops = Op->ops();  in LowerAVXCONCAT_VECTORS()
9279 // k-register.
9306   // If we are inserting non-zero vector and there are zeros in LSBs and undef  in LowerCONCAT_VECTORSvXi1()
9310       Log2_64(NonZeros) != NumOperands - 1) {  in LowerCONCAT_VECTORSvXi1()
9322   // If there are zero or one non-zeros we can handle this very simply.  in LowerCONCAT_VECTORSvXi1()
9336     ArrayRef<SDUse> Ops = Op->ops();  in LowerCONCAT_VECTORSvXi1()
9359   MVT VT = Op.getSimpleValueType();  in LowerCONCAT_VECTORS()  local
9360   if (VT.getVectorElementType() == MVT::i1)  in LowerCONCAT_VECTORS()
9363   assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||  in LowerCONCAT_VECTORS()
9364          (VT.is512BitVector() && (Op.getNumOperands() == 2 ||  in LowerCONCAT_VECTORS()
9367   // AVX can use the vinsertf128 instruction to create 256-bit vectors  in LowerCONCAT_VECTORS()
9368   // from two other 128-bit ones.  in LowerCONCAT_VECTORS()
9370   // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors  in LowerCONCAT_VECTORS()
9374 //===----------------------------------------------------------------------===//
9383 //===----------------------------------------------------------------------===//
9385 /// Tiny helper function to identify a no-op mask.
9388 /// array input, which is assumed to be a single-input shuffle mask of the kind
9391 /// in-place shuffle are 'no-op's.
9394     assert(Mask[i] >= -1 && "Out of bound mask element!");  in isNoopShuffleMask()
9404 /// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
9420 /// Test whether there are elements crossing 128-bit lanes in this
9422 static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {  in is128BitLaneCrossingShuffleMask()  argument
9423   return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask);  in is128BitLaneCrossingShuffleMask()
9427 /// from multiple lanes - this is different to isLaneCrossingShuffleMask to
9440       int SrcLane = -1;  in isMultiLaneShuffleMask()
9455 /// Test whether a shuffle mask is equivalent within each sub-lane.
9458 /// lane-relative shuffle in each sub-lane. This trivially implies
9459 /// that it is also not lane-crossing. It may however involve a blend from the
9463 /// non-trivial to compute in the face of undef lanes. The representation is
9464 /// suitable for use with existing 128-bit shuffles as entries from the second
9466 static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,  in isRepeatedShuffleMask()  argument
9469   auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();  in isRepeatedShuffleMask()
9470   RepeatedMask.assign(LaneSize, -1);  in isRepeatedShuffleMask()
9480     // Ok, handle the in-lane shuffles by detecting if and when they repeat.  in isRepeatedShuffleMask()
9485       // This is the first non-undef entry in this slot of a 128-bit lane.  in isRepeatedShuffleMask()
9494 /// Test whether a shuffle mask is equivalent within each 128-bit lane.
9496 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,  in is128BitLaneRepeatedShuffleMask()  argument
9498   return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);  in is128BitLaneRepeatedShuffleMask()
9502 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) {  in is128BitLaneRepeatedShuffleMask()  argument
9504   return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask);  in is128BitLaneRepeatedShuffleMask()
9507 /// Test whether a shuffle mask is equivalent within each 256-bit lane.
9509 is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,  in is256BitLaneRepeatedShuffleMask()  argument
9511   return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);  in is256BitLaneRepeatedShuffleMask()
9514 /// Test whether a target shuffle mask is equivalent within each sub-lane.
9537     // Handle the in-lane shuffles by detecting if and when they repeat. Adjust  in isRepeatedTargetShuffleMask()
9542       // This is the first non-undef entry in this slot of a 128-bit lane.  in isRepeatedTargetShuffleMask()
9551 /// Test whether a target shuffle mask is equivalent within each sub-lane.
9553 static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,  in isRepeatedTargetShuffleMask()  argument
9556   return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(),  in isRepeatedTargetShuffleMask()
9593       MVT VT = Op.getSimpleValueType();  in IsElementEquivalent()  local
9594       int NumElts = VT.getVectorNumElements();  in IsElementEquivalent()
9596         int NumLanes = VT.getSizeInBits() / 128;  in IsElementEquivalent()
9620 /// each element of the mask is either -1 (signifying undef) or the value given
9630     assert(Mask[i] >= -1 && "Out of bound mask element!");  in isShuffleEquivalent()
9636       MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);  in isShuffleEquivalent()
9637       ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);  in isShuffleEquivalent()
9649 /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
9654 static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,  in isTargetShuffleEquivalent()  argument
9666   // Check for out-of-range target shuffle mask indices.  in isTargetShuffleEquivalent()
9671   if (V1 && (V1.getValueSizeInBits() != VT.getSizeInBits() ||  in isTargetShuffleEquivalent()
9674   if (V2 && (V2.getValueSizeInBits() != VT.getSizeInBits() ||  in isTargetShuffleEquivalent()
9693         int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);  in isTargetShuffleEquivalent()
9702       MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size);  in isTargetShuffleEquivalent()
9703       ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size);  in isTargetShuffleEquivalent()
9715 static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT,  in isUnpackWdShuffleMask()  argument
9717   if (VT != MVT::v8i32 && VT != MVT::v8f32)  in isUnpackWdShuffleMask()
9726   bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) ||  in isUnpackWdShuffleMask()
9727                          isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG));  in isUnpackWdShuffleMask()
9733   // Create 128-bit vector type based on mask size.  in is128BitUnpackShuffleMask()
9735   MVT VT = MVT::getVectorVT(EltVT, Mask.size());  in is128BitUnpackShuffleMask()  local
9741   // Match any of unary/binary or low/high.  in is128BitUnpackShuffleMask()
9744     createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2);  in is128BitUnpackShuffleMask()
9745     if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) ||  in is128BitUnpackShuffleMask()
9746         isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG))  in is128BitUnpackShuffleMask()
9766 /// Get a 4-lane 8-bit shuffle immediate for a mask.
9768 /// This helper function produces an 8-bit shuffle immediate corresponding to
9775   assert(Mask.size() == 4 && "Only 4-lane shuffle masks");  in getV4X86ShuffleImm()
9776   assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");  in getV4X86ShuffleImm()
9777   assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");  in getV4X86ShuffleImm()
9778   assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");  in getV4X86ShuffleImm()
9779   assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");  in getV4X86ShuffleImm()
9781   // If the mask only uses one non-undef element, then fully 'splat' it to  in getV4X86ShuffleImm()
9783   int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin();  in getV4X86ShuffleImm()
9808 // The function looks for a sub-mask that the nonzero elements are in
9809 // increasing order. If such sub-mask exist. The function returns true.
9813   int NextElement = -1;  in isNonZeroElementsInOrder()
9817     assert(Mask[i] >= -1 && "Out of bound mask element!");  in isNonZeroElementsInOrder()
9836 static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,  in lowerShuffleWithPSHUFB()  argument
9842   int LaneSize = 128 / VT.getScalarSizeInBits();  in lowerShuffleWithPSHUFB()
9843   const int NumBytes = VT.getSizeInBits() / 8;  in lowerShuffleWithPSHUFB()
9844   const int NumEltBytes = VT.getScalarSizeInBits() / 8;  in lowerShuffleWithPSHUFB()
9846   assert((Subtarget.hasSSSE3() && VT.is128BitVector()) ||  in lowerShuffleWithPSHUFB()
9847          (Subtarget.hasAVX2() && VT.is256BitVector()) ||  in lowerShuffleWithPSHUFB()
9848          (Subtarget.hasBWI() && VT.is512BitVector()));  in lowerShuffleWithPSHUFB()
9885       VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),  in lowerShuffleWithPSHUFB()
9894 static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT,  in lowerShuffleToEXPAND()  argument
9905       MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));  in lowerShuffleToEXPAND()
9907   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleToEXPAND()
9912   SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL);  in lowerShuffleToEXPAND()
9914   return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask);  in lowerShuffleToEXPAND()
9917 static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,  in matchShuffleWithUNPCK()  argument
9922   int NumElts = VT.getVectorNumElements();  in matchShuffleWithUNPCK()
9938   createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);  in matchShuffleWithUNPCK()
9939   if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1,  in matchShuffleWithUNPCK()
9942     V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));  in matchShuffleWithUNPCK()
9943     V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);  in matchShuffleWithUNPCK()
9947   createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);  in matchShuffleWithUNPCK()
9948   if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1,  in matchShuffleWithUNPCK()
9951     V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));  in matchShuffleWithUNPCK()
9952     V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);  in matchShuffleWithUNPCK()
9959     if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&  in matchShuffleWithUNPCK()
9978       V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;  in matchShuffleWithUNPCK()
9979       V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;  in matchShuffleWithUNPCK()
9987     if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) {  in matchShuffleWithUNPCK()
9994     if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) {  in matchShuffleWithUNPCK()
10006 static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT,  in lowerShuffleWithUNPCK()  argument
10010   createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);  in lowerShuffleWithUNPCK()
10012     return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);  in lowerShuffleWithUNPCK()
10015   createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false);  in lowerShuffleWithUNPCK()
10017     return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);  in lowerShuffleWithUNPCK()
10022     return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);  in lowerShuffleWithUNPCK()
10026     return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);  in lowerShuffleWithUNPCK()
10031 /// Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit)
10032 /// followed by unpack 256-bit.
10033 static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,  in lowerShuffleWithUNPCK256()  argument
10037   createSplat2ShuffleMask(VT, Unpckl, /* Lo */ true);  in lowerShuffleWithUNPCK256()
10038   createSplat2ShuffleMask(VT, Unpckh, /* Lo */ false);  in lowerShuffleWithUNPCK256()
10048   // This is a "natural" unpack operation (rather than the 128-bit sectored  in lowerShuffleWithUNPCK256()
10049   // operation implemented by AVX). We need to rearrange 64-bit chunks of the  in lowerShuffleWithUNPCK256()
10053   V1 = DAG.getBitcast(VT, V1);  in lowerShuffleWithUNPCK256()
10054   return DAG.getNode(UnpackOpcode, DL, VT, V1, V1);  in lowerShuffleWithUNPCK256()
10059 static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,  in matchShuffleAsVTRUNC()  argument
10062   if (!VT.is512BitVector() && !Subtarget.hasVLX())  in matchShuffleAsVTRUNC()
10066   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in matchShuffleAsVTRUNC()
10076     unsigned UpperElts = NumElts - NumSrcElts;  in matchShuffleAsVTRUNC()
10126   // Non-VLX targets must truncate from a 512-bit type, so we need to  in getAVX512TruncNode()
10158 static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleWithVPMOV()  argument
10163   assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type");  in lowerShuffleWithVPMOV()
10167   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleWithVPMOV()
10168   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in lowerShuffleWithVPMOV()
10173     unsigned UpperElts = NumElts - NumSrcElts;  in lowerShuffleWithVPMOV()
10201     return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);  in lowerShuffleWithVPMOV()
10208 static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsVTRUNC()  argument
10213   assert((VT.is128BitVector() || VT.is256BitVector()) &&  in lowerShuffleAsVTRUNC()
10218   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleAsVTRUNC()
10219   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in lowerShuffleAsVTRUNC()
10222     // TODO: Support non-BWI VPMOVWB truncations?  in lowerShuffleAsVTRUNC()
10237       unsigned UpperElts = NumElts - NumSrcElts;  in lowerShuffleAsVTRUNC()
10264       // and truncate from the double-sized src.  in lowerShuffleAsVTRUNC()
10265       MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);  in lowerShuffleAsVTRUNC()
10279       return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);  in lowerShuffleAsVTRUNC()
10307 /// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
10318          "We should only be called with masks with a power-of-2 size!");  in canLowerByDroppingElements()
10320   uint64_t ModMask = (uint64_t)ShuffleModulus - 1;  in canLowerByDroppingElements()
10323   // We track whether the input is viable for all power-of-2 strides 2^1, 2^2,  in canLowerByDroppingElements()
10340         if ((uint64_t)(Mask[i] - Offset) == (((uint64_t)i << N) & ModMask))  in canLowerByDroppingElements()
10362 static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2,  in matchShuffleWithPACK()  argument
10367   unsigned NumElts = VT.getVectorNumElements();  in matchShuffleWithPACK()
10368   unsigned BitSize = VT.getScalarSizeInBits();  in matchShuffleWithPACK()
10374     unsigned NumPackedBits = NumSrcBits - BitSize;  in matchShuffleWithPACK()
10417     createPackShuffleMask(VT, BinaryMask, false, NumStages);  in matchShuffleWithPACK()
10418     if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2))  in matchShuffleWithPACK()
10424     createPackShuffleMask(VT, UnaryMask, true, NumStages);  in matchShuffleWithPACK()
10425     if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1))  in matchShuffleWithPACK()
10433 static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,  in lowerShuffleWithPACK()  argument
10438   unsigned SizeBits = VT.getSizeInBits();  in lowerShuffleWithPACK()
10439   unsigned EltBits = VT.getScalarSizeInBits();  in lowerShuffleWithPACK()
10441   if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG,  in lowerShuffleWithPACK()
10448   // Don't lower multi-stage packs on AVX512, truncation is better.  in lowerShuffleWithPACK()
10453   // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB.  in lowerShuffleWithPACK()
10473   assert(Res && Res.getValueType() == VT &&  in lowerShuffleWithPACK()
10482 static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsBitMask()  argument
10487   MVT MaskVT = VT;  in lowerShuffleAsBitMask()
10488   MVT EltVT = VT.getVectorElementType();  in lowerShuffleAsBitMask()
10496   MVT LogicVT = VT;  in lowerShuffleAsBitMask()
10524     return SDValue(); // No non-zeroable elements!  in lowerShuffleAsBitMask()
10530   return DAG.getBitcast(VT, And);  in lowerShuffleAsBitMask()
10538 static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsBitBlend()  argument
10541   assert(VT.isInteger() && "Only supports integer vector types!");  in lowerShuffleAsBitBlend()
10542   MVT EltVT = VT.getVectorElementType();  in lowerShuffleAsBitBlend()
10552   SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);  in lowerShuffleAsBitBlend()
10553   return getBitSelect(DL, VT, V1, V2, V1Mask, DAG);  in lowerShuffleAsBitBlend()
10561 static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,  in matchShuffleAsBlend()  argument
10575   int NumLanes = VT.getSizeInBits() / 128;  in matchShuffleAsBlend()
10579   // For 32/64-bit elements, if we only reference one input (plus any undefs),  in matchShuffleAsBlend()
10582       VT.is256BitVector() && VT.getScalarSizeInBits() >= 32;  in matchShuffleAsBlend()
10604            IsElementEquivalent(NumElts, V2, V2, M - NumElts, Elt))) {  in matchShuffleAsBlend()
10632       LaneBlendMask = (1ull << NumEltsPerLane) - 1;  in matchShuffleAsBlend()
10645 static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsBlend()  argument
10653   if (!matchShuffleAsBlend(VT, V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,  in lowerShuffleAsBlend()
10657   // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.  in lowerShuffleAsBlend()
10659     V1 = getZeroVector(VT, Subtarget, DAG, DL);  in lowerShuffleAsBlend()
10661     V2 = getZeroVector(VT, Subtarget, DAG, DL);  in lowerShuffleAsBlend()
10663   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleAsBlend()
10665   switch (VT.SimpleTy) {  in lowerShuffleAsBlend()
10668     assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");  in lowerShuffleAsBlend()
10672     assert(Subtarget.hasAVX() && "256-bit float blends require AVX!");  in lowerShuffleAsBlend()
10679     assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");  in lowerShuffleAsBlend()
10680     return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,  in lowerShuffleAsBlend()
10686       // We can lower these with PBLENDW which is mirrored across 128-bit lanes.  in lowerShuffleAsBlend()
10696     // TODO - we should allow 2 PBLENDW here and leave shuffle combine to  in lowerShuffleAsBlend()
10712     assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!");  in lowerShuffleAsBlend()
10715     assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!");  in lowerShuffleAsBlend()
10718     if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,  in lowerShuffleAsBlend()
10731               lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))  in lowerShuffleAsBlend()
10735     int Scale = VT.getScalarSizeInBits() / 8;  in lowerShuffleAsBlend()
10739     MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);  in lowerShuffleAsBlend()
10743     // If V2 can be load-folded and V1 cannot be load-folded, then commute to  in lowerShuffleAsBlend()
10744     // allow that load-folding possibility.  in lowerShuffleAsBlend()
10753     // are -1 for true and 0 for false. We then use the LLVM semantics of 'true'  in lowerShuffleAsBlend()
10755     // reality in x86 is that vector masks (pre-AVX-512) use only the high bit  in lowerShuffleAsBlend()
10756     // of the element (the remaining are ignored) and 0 in that high bit would  in lowerShuffleAsBlend()
10757     // mean operand #1 while 1 in the high bit would mean operand #2. So while  in lowerShuffleAsBlend()
10766                         : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL,  in lowerShuffleAsBlend()
10772         VT,  in lowerShuffleAsBlend()
10785       if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,  in lowerShuffleAsBlend()
10790     // Otherwise load an immediate into a GPR, cast to k-register, and use a  in lowerShuffleAsBlend()
10802 /// a single-input permutation.
10805 /// then reduce the shuffle to a single-input permutation.
10806 static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,  in lowerShuffleAsBlendAndPermute()  argument
10813   SmallVector<int, 32> BlendMask(Mask.size(), -1);  in lowerShuffleAsBlendAndPermute()
10814   SmallVector<int, 32> PermuteMask(Mask.size(), -1);  in lowerShuffleAsBlendAndPermute()
10832   unsigned EltSize = VT.getScalarSizeInBits();  in lowerShuffleAsBlendAndPermute()
10836   SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);  in lowerShuffleAsBlendAndPermute()
10837   return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);  in lowerShuffleAsBlendAndPermute()
10841 /// a single-input permutation.
10844 /// then reduce the shuffle to a single-input (wider) permutation.
10845 static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,  in lowerShuffleAsUNPCKAndPermute()  argument
10850   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsUNPCKAndPermute()
10855   SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};  in lowerShuffleAsUNPCKAndPermute()
10870       NormM -= NumElts;  in lowerShuffleAsUNPCKAndPermute()
10895   SmallVector<int, 32> PermuteMask(NumElts, -1);  in lowerShuffleAsUNPCKAndPermute()
10902       NormM -= NumElts;  in lowerShuffleAsUNPCKAndPermute()
10910     assert(PermuteMask[Elt] != -1 &&  in lowerShuffleAsUNPCKAndPermute()
10915   SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops);  in lowerShuffleAsUNPCKAndPermute()
10916   return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask);  in lowerShuffleAsUNPCKAndPermute()
10928 static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,  in lowerShuffleAsPermuteAndUnpack()  argument
10936   // This routine only supports 128-bit integer dual input vectors.  in lowerShuffleAsPermuteAndUnpack()
10937   if (VT.isFloatingPoint() || !VT.is128BitVector() || V2.isUndef())  in lowerShuffleAsPermuteAndUnpack()
10948     SmallVector<int, 16> V1Mask((unsigned)Size, -1);  in lowerShuffleAsPermuteAndUnpack()
10949     SmallVector<int, 16> V2Mask((unsigned)Size, -1);  in lowerShuffleAsPermuteAndUnpack()
10977     V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);  in lowerShuffleAsPermuteAndUnpack()
10978     V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);  in lowerShuffleAsPermuteAndUnpack()
10988         VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,  in lowerShuffleAsPermuteAndUnpack()
10994   int OrigScalarSize = VT.getScalarSizeInBits();  in lowerShuffleAsPermuteAndUnpack()
11005   // If none of the unpack-rooted lowerings worked (or were profitable) try an  in lowerShuffleAsPermuteAndUnpack()
11014     // half-crossings are created.  in lowerShuffleAsPermuteAndUnpack()
11017     SmallVector<int, 32> PermMask((unsigned)Size, -1);  in lowerShuffleAsPermuteAndUnpack()
11025           2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1);  in lowerShuffleAsPermuteAndUnpack()
11028         VT, DL,  in lowerShuffleAsPermuteAndUnpack()
11029         DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT,  in lowerShuffleAsPermuteAndUnpack()
11031         DAG.getUNDEF(VT), PermMask);  in lowerShuffleAsPermuteAndUnpack()
11037 /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
11040     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsByteRotateAndPermute()  argument
11042   if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) ||  in lowerShuffleAsByteRotateAndPermute()
11043       (VT.is256BitVector() && !Subtarget.hasAVX2()) ||  in lowerShuffleAsByteRotateAndPermute()
11044       (VT.is512BitVector() && !Subtarget.hasBWI()))  in lowerShuffleAsByteRotateAndPermute()
11048   if (is128BitLaneCrossingShuffleMask(VT, Mask))  in lowerShuffleAsByteRotateAndPermute()
11051   int Scale = VT.getScalarSizeInBits() / 8;  in lowerShuffleAsByteRotateAndPermute()
11052   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsByteRotateAndPermute()
11053   int NumElts = VT.getVectorNumElements();  in lowerShuffleAsByteRotateAndPermute()
11073         M -= NumElts;  in lowerShuffleAsByteRotateAndPermute()
11084   // TODO - it might be worth doing this for unary shuffles if the permute  in lowerShuffleAsByteRotateAndPermute()
11090   if (VT.getSizeInBits() > 128 && (Blend1 || Blend2))  in lowerShuffleAsByteRotateAndPermute()
11095     MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);  in lowerShuffleAsByteRotateAndPermute()
11097         VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),  in lowerShuffleAsByteRotateAndPermute()
11107           PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane);  in lowerShuffleAsByteRotateAndPermute()
11109           PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane);  in lowerShuffleAsByteRotateAndPermute()
11112     return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask);  in lowerShuffleAsByteRotateAndPermute()
11157     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsDecomposedShuffleMerge()  argument
11160   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsDecomposedShuffleMerge()
11166   SmallVector<int, 32> V1Mask(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11167   SmallVector<int, 32> V2Mask(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11168   SmallVector<int, 32> FinalMask(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11176       V2Mask[i] = M - NumElts;  in lowerShuffleAsDecomposedShuffleMerge()
11184   // and change \p InputMask to be a no-op (identity) mask.  in lowerShuffleAsDecomposedShuffleMerge()
11185   auto canonicalizeBroadcastableInput = [DL, VT, &Subtarget,  in lowerShuffleAsDecomposedShuffleMerge()
11196     Input = DAG.getNode(X86ISD::VBROADCAST, DL, VT, Input);  in lowerShuffleAsDecomposedShuffleMerge()
11205   // It is possible that the shuffle for one of the inputs is already a no-op.  in lowerShuffleAsDecomposedShuffleMerge()
11206   // See if we can simplify non-no-op shuffles into broadcasts,  in lowerShuffleAsDecomposedShuffleMerge()
11215   // one of the input shuffles would be a no-op. We prefer to shuffle inputs as  in lowerShuffleAsDecomposedShuffleMerge()
11217   // we'll have to do 2x as many shuffles in order to achieve this, a 2-input  in lowerShuffleAsDecomposedShuffleMerge()
11218   // pre-shuffle first is a better strategy.  in lowerShuffleAsDecomposedShuffleMerge()
11221     if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,  in lowerShuffleAsDecomposedShuffleMerge()
11233               lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))  in lowerShuffleAsDecomposedShuffleMerge()
11236             DL, VT, V1, V2, Mask, Subtarget, DAG))  in lowerShuffleAsDecomposedShuffleMerge()
11238     // Unpack/rotate failed - try again with variable blends.  in lowerShuffleAsDecomposedShuffleMerge()
11239     if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,  in lowerShuffleAsDecomposedShuffleMerge()
11242     if (VT.getScalarSizeInBits() >= 32)  in lowerShuffleAsDecomposedShuffleMerge()
11244               DL, VT, V1, V2, Mask, Subtarget, DAG))  in lowerShuffleAsDecomposedShuffleMerge()
11250   // TODO: It doesn't have to be alternating - but each lane mustn't have more  in lowerShuffleAsDecomposedShuffleMerge()
11252   if (IsAlternating && VT.getScalarSizeInBits() < 32) {  in lowerShuffleAsDecomposedShuffleMerge()
11253     V1Mask.assign(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11254     V2Mask.assign(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11255     FinalMask.assign(NumElts, -1);  in lowerShuffleAsDecomposedShuffleMerge()
11263           V2Mask[i + (j / 2)] = M - NumElts;  in lowerShuffleAsDecomposedShuffleMerge()
11269   V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);  in lowerShuffleAsDecomposedShuffleMerge()
11270   V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);  in lowerShuffleAsDecomposedShuffleMerge()
11271   return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask);  in lowerShuffleAsDecomposedShuffleMerge()
11277   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");  in matchShuffleAsBitRotate()
11278   assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers");  in matchShuffleAsBitRotate()
11286     return -1;  in matchShuffleAsBitRotate()
11294 static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsBitRotate()  argument
11301       (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512();  in lowerShuffleAsBitRotate()
11306   int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(),  in lowerShuffleAsBitRotate()
11311   // For pre-SSSE3 targets, if we are shuffling vXi8 elts then ISD::ROTL,  in lowerShuffleAsBitRotate()
11319     unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt;  in lowerShuffleAsBitRotate()
11326     return DAG.getBitcast(VT, Rot);  in lowerShuffleAsBitRotate()
11332   return DAG.getBitcast(VT, Rot);  in lowerShuffleAsBitRotate()
11344   //   [-1, 12, 13, 14, -1, -1,  1, -1]  in matchShuffleAsElementRotate()
11345   //   [-1, -1, -1, -1, -1, -1,  1,  2]  in matchShuffleAsElementRotate()
11347   //   [-1,  4,  5,  6, -1, -1,  9, -1]  in matchShuffleAsElementRotate()
11348   //   [-1,  4,  5,  6, -1, -1, -1, -1]  in matchShuffleAsElementRotate()
11359     int StartIdx = i - (M % NumElts);  in matchShuffleAsElementRotate()
11362       return -1;  in matchShuffleAsElementRotate()
11367     int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;  in matchShuffleAsElementRotate()
11373       return -1;  in matchShuffleAsElementRotate()
11379     // to. This reflects whether the high elements are remaining or the low  in matchShuffleAsElementRotate()
11390       return -1;  in matchShuffleAsElementRotate()
11410 /// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
11421 /// specified as a *right shift* because x86 is little-endian, it is a *left
11423 static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,  in matchShuffleAsByteRotate()  argument
11427     return -1;  in matchShuffleAsByteRotate()
11429   // PALIGNR works on 128-bit lanes.  in matchShuffleAsByteRotate()
11431   if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))  in matchShuffleAsByteRotate()
11432     return -1;  in matchShuffleAsByteRotate()
11436     return -1;  in matchShuffleAsByteRotate()
11445 static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsByteRotate()  argument
11449   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");  in lowerShuffleAsByteRotate()
11452   int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);  in lowerShuffleAsByteRotate()
11458   MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);  in lowerShuffleAsByteRotate()
11464     assert((!VT.is512BitVector() || Subtarget.hasBWI()) &&  in lowerShuffleAsByteRotate()
11465            "512-bit PALIGNR requires BWI instructions");  in lowerShuffleAsByteRotate()
11467         VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,  in lowerShuffleAsByteRotate()
11471   assert(VT.is128BitVector() &&  in lowerShuffleAsByteRotate()
11472          "Rotate-based lowering only supports 128-bit lowering!");  in lowerShuffleAsByteRotate()
11474          "Can shuffle at most 16 bytes in a 128-bit vector!");  in lowerShuffleAsByteRotate()
11479   int LoByteShift = 16 - ByteRotation;  in lowerShuffleAsByteRotate()
11488   return DAG.getBitcast(VT,  in lowerShuffleAsByteRotate()
11500 /// specified as a *right shift* because x86 is little-endian, it is a *left
11502 static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsVALIGN()  argument
11507   assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&  in lowerShuffleAsVALIGN()
11508          "Only 32-bit and 64-bit elements are supported!");  in lowerShuffleAsVALIGN()
11510   // 128/256-bit vectors are only supported with VLX.  in lowerShuffleAsVALIGN()
11511   assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))  in lowerShuffleAsVALIGN()
11512          && "VLX required for 128/256-bit vectors");  in lowerShuffleAsVALIGN()
11517     return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,  in lowerShuffleAsVALIGN()
11520   // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.  in lowerShuffleAsVALIGN()
11522   // TODO: We can probably make this more aggressive and use shift-pairs like  in lowerShuffleAsVALIGN()
11534     if (isSequentialOrUndefInRange(Mask, ZeroLo, NumElts - ZeroLo, Low))  in lowerShuffleAsVALIGN()
11535       return DAG.getNode(X86ISD::VALIGN, DL, VT, Src,  in lowerShuffleAsVALIGN()
11536                          getZeroVector(VT, Subtarget, DAG, DL),  in lowerShuffleAsVALIGN()
11537                          DAG.getTargetConstant(NumElts - ZeroLo, DL, MVT::i8));  in lowerShuffleAsVALIGN()
11543     if (isSequentialOrUndefInRange(Mask, 0, NumElts - ZeroHi, Low + ZeroHi))  in lowerShuffleAsVALIGN()
11544       return DAG.getNode(X86ISD::VALIGN, DL, VT,  in lowerShuffleAsVALIGN()
11545                          getZeroVector(VT, Subtarget, DAG, DL), Src,  in lowerShuffleAsVALIGN()
11553 static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsByteShiftMask()  argument
11558   assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");  in lowerShuffleAsByteShiftMask()
11559   assert(VT.is128BitVector() && "Only 128-bit vectors supported");  in lowerShuffleAsByteShiftMask()
11569   unsigned Len = NumElts - (ZeroLo + ZeroHi);  in lowerShuffleAsByteShiftMask()
11573   unsigned Scale = VT.getScalarSizeInBits() / 8;  in lowerShuffleAsByteShiftMask()
11584   // 01234567 --> zzzzzz01 --> 1zzzzzzz  in lowerShuffleAsByteShiftMask()
11585   // 01234567 --> 4567zzzz --> zzzzz456  in lowerShuffleAsByteShiftMask()
11586   // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz  in lowerShuffleAsByteShiftMask()
11588     unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);  in lowerShuffleAsByteShiftMask()
11603     unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);  in lowerShuffleAsByteShiftMask()
11614   return DAG.getBitcast(VT, Res);  in lowerShuffleAsByteShiftMask()
11620 /// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
11623 /// bit-wise element shifts and the byte shift across an entire 128-bit double
11626 /// PSHL : (little-endian) left bit shift.
11628 /// [ -1, 4, zz, -1 ]
11629 /// PSRL : (little-endian) right bit shift.
11631 /// [ -1, -1,  7, zz]
11632 /// PSLLDQ : (little-endian) left byte shift
11634 /// [ zz, zz, -1, -1,  2,  3,  4, -1]
11635 /// [ zz, zz, zz, zz, zz, zz, -1,  1]
11636 /// PSRLDQ : (little-endian) right byte shift
11638 /// [ -1, 5,  6,  7, zz, zz, zz, zz]
11639 /// [  1, 2, -1, -1, -1, -1, zz, zz]
11650         if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])  in matchShuffleAsShift()
11660       unsigned Len = Scale - Shift;  in matchShuffleAsShift()
11662         return -1;  in matchShuffleAsShift()
11682   // SSE/AVX supports logical shifts up to 64-bit integers - so we can just  in matchShuffleAsShift()
11699   return -1;  in matchShuffleAsShift()
11702 static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsShift()  argument
11708   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");  in lowerShuffleAsShift()
11715   int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),  in lowerShuffleAsShift()
11720     ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),  in lowerShuffleAsShift()
11736   return DAG.getBitcast(VT, V);  in lowerShuffleAsShift()
11741 static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,  in matchShuffleAsEXTRQ()  argument
11746   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");  in matchShuffleAsEXTRQ()
11756   for (; Len > 0; --Len)  in matchShuffleAsEXTRQ()
11757     if (!Zeroable[Len - 1])  in matchShuffleAsEXTRQ()
11763   int Idx = -1;  in matchShuffleAsEXTRQ()
11776     if (Idx < 0 || (Src == V && Idx == (M - i))) {  in matchShuffleAsEXTRQ()
11778       Idx = M - i;  in matchShuffleAsEXTRQ()
11788   BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;  in matchShuffleAsEXTRQ()
11789   BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;  in matchShuffleAsEXTRQ()
11796 // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
11797 static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2,  in matchShuffleAsINSERTQ()  argument
11802   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");  in matchShuffleAsINSERTQ()
11826       int Len = Hi - Idx;  in matchShuffleAsINSERTQ()
11838       if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {  in matchShuffleAsINSERTQ()
11841                  isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {  in matchShuffleAsINSERTQ()
11844                  isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,  in matchShuffleAsINSERTQ()
11851       BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;  in matchShuffleAsINSERTQ()
11852       BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;  in matchShuffleAsINSERTQ()
11863 static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleWithSSE4A()  argument
11867   if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))  in lowerShuffleWithSSE4A()
11868     return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,  in lowerShuffleWithSSE4A()
11872   if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))  in lowerShuffleWithSSE4A()
11873     return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),  in lowerShuffleWithSSE4A()
11874                        V2 ? V2 : DAG.getUNDEF(VT),  in lowerShuffleWithSSE4A()
11891     const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,  in lowerShuffleAsSpecificZeroOrAnyExtend()  argument
11894   int EltBits = VT.getScalarSizeInBits();  in lowerShuffleAsSpecificZeroOrAnyExtend()
11895   int NumElements = VT.getVectorNumElements();  in lowerShuffleAsSpecificZeroOrAnyExtend()
11915     SmallVector<int, 8> ShMask((unsigned)NumElements, -1);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11918       ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;  in lowerShuffleAsSpecificZeroOrAnyExtend()
11920     return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11926     // Not worth offsetting 128-bit vectors if scale == 2, a pattern using  in lowerShuffleAsSpecificZeroOrAnyExtend()
11928     if (Offset && Scale == 2 && VT.is128BitVector())  in lowerShuffleAsSpecificZeroOrAnyExtend()
11932     InputV = DAG.getBitcast(VT, InputV);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11936     return DAG.getBitcast(VT, InputV);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11939   assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");  in lowerShuffleAsSpecificZeroOrAnyExtend()
11940   InputV = DAG.getBitcast(VT, InputV);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11945     int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11946                          -1};  in lowerShuffleAsSpecificZeroOrAnyExtend()
11948         VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11953     int PSHUFDMask[4] = {Offset / 2, -1,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11954                          SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};  in lowerShuffleAsSpecificZeroOrAnyExtend()
11958     int PSHUFWMask[4] = {1, -1, -1, -1};  in lowerShuffleAsSpecificZeroOrAnyExtend()
11961         VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11967   // to 64-bits.  in lowerShuffleAsSpecificZeroOrAnyExtend()
11970     assert(VT.is128BitVector() && "Unexpected vector width!");  in lowerShuffleAsSpecificZeroOrAnyExtend()
11974         MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11979       return DAG.getBitcast(VT, Lo);  in lowerShuffleAsSpecificZeroOrAnyExtend()
11983         MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,  in lowerShuffleAsSpecificZeroOrAnyExtend()
11986     return DAG.getBitcast(VT,  in lowerShuffleAsSpecificZeroOrAnyExtend()
12007         VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,  in lowerShuffleAsSpecificZeroOrAnyExtend()
12015     SmallVector<int, 8> ShMask((unsigned)NumElements, -1);  in lowerShuffleAsSpecificZeroOrAnyExtend()
12017       ShMask[i - AlignToUnpack] = i;  in lowerShuffleAsSpecificZeroOrAnyExtend()
12018     InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);  in lowerShuffleAsSpecificZeroOrAnyExtend()
12019     Offset -= AlignToUnpack;  in lowerShuffleAsSpecificZeroOrAnyExtend()
12027       Offset -= (NumElements / 2);  in lowerShuffleAsSpecificZeroOrAnyExtend()
12039   return DAG.getBitcast(VT, InputV);  in lowerShuffleAsSpecificZeroOrAnyExtend()
12047 /// match this pattern. It will use all of the micro-architectural details it
12048 /// can to emit an efficient lowering. It handles both blends with all-zero
12049 /// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
12052 /// The reason we have dedicated lowering for zext-style shuffles is that they
12055     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsZeroOrAnyExtend()  argument
12058   int Bits = VT.getSizeInBits();  in lowerShuffleAsZeroOrAnyExtend()
12060   int NumElements = VT.getVectorNumElements();  in lowerShuffleAsZeroOrAnyExtend()
12062   assert(VT.getScalarSizeInBits() <= 32 &&  in lowerShuffleAsZeroOrAnyExtend()
12063          "Exceeds 32-bit integer zero extension limit");  in lowerShuffleAsZeroOrAnyExtend()
12066   // Define a helper function to check a particular ext-scale and lower to it if  in lowerShuffleAsZeroOrAnyExtend()
12068   auto Lower = [&](int Scale) -> SDValue {  in lowerShuffleAsZeroOrAnyExtend()
12093         Offset = M - (i / Scale);  in lowerShuffleAsZeroOrAnyExtend()
12095         return SDValue(); // Flip-flopping inputs.  in lowerShuffleAsZeroOrAnyExtend()
12097       // Offset must start in the lowest 128-bit lane or at the start of an  in lowerShuffleAsZeroOrAnyExtend()
12110         return SDValue(); // Non-consecutive strided elements.  in lowerShuffleAsZeroOrAnyExtend()
12114     // If we fail to find an input, we have a zero-shuffle which should always  in lowerShuffleAsZeroOrAnyExtend()
12125     return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt,  in lowerShuffleAsZeroOrAnyExtend()
12129   // The widest scale possible for extending is to a 64-bit integer.  in lowerShuffleAsZeroOrAnyExtend()
12143   // General extends failed, but 128-bit vectors may be able to use MOVQ.  in lowerShuffleAsZeroOrAnyExtend()
12148   // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits.  in lowerShuffleAsZeroOrAnyExtend()
12163     return DAG.getBitcast(VT, V);  in lowerShuffleAsZeroOrAnyExtend()
12175   MVT VT = V.getSimpleValueType();  in getScalarValueForVectorElement()  local
12176   MVT EltVT = VT.getVectorElementType();  in getScalarValueForVectorElement()
12182   if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())  in getScalarValueForVectorElement()
12202   return V->hasOneUse() &&  in isShuffleFoldableLoad()
12207 static bool isSoftF16(T VT, const X86Subtarget &Subtarget) {  in isSoftF16()  argument
12208   T EltVT = VT.getScalarType();  in isSoftF16()
12217     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsElementInsertion()  argument
12220   MVT ExtVT = VT;  in lowerShuffleAsElementInsertion()
12221   MVT EltVT = VT.getVectorElementType();  in lowerShuffleAsElementInsertion()
12222   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleAsElementInsertion()
12223   unsigned EltBits = VT.getScalarSizeInBits();  in lowerShuffleAsElementInsertion()
12229       find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -  in lowerShuffleAsElementInsertion()
12239   // Bail if a non-zero V1 isn't used in place.  in lowerShuffleAsElementInsertion()
12242     V1Mask[V2Index] = -1;  in lowerShuffleAsElementInsertion()
12252   SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),  in lowerShuffleAsElementInsertion()
12258       // Using zext to expand a narrow element won't work for non-zero  in lowerShuffleAsElementInsertion()
12264       // Zero-extend directly to i32.  in lowerShuffleAsElementInsertion()
12269       // and OR with the zero-extended scalar.  in lowerShuffleAsElementInsertion()
12273         SDValue BitMask = getConstVector(Bits, VT, DAG, DL);  in lowerShuffleAsElementInsertion()
12274         V1 = DAG.getNode(ISD::AND, DL, VT, V1, BitMask);  in lowerShuffleAsElementInsertion()
12276         V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2));  in lowerShuffleAsElementInsertion()
12277         return DAG.getNode(ISD::OR, DL, VT, V1, V2);  in lowerShuffleAsElementInsertion()
12284     // element size is too small to use VZEXT_MOVL to clear the high bits.  in lowerShuffleAsElementInsertion()
12290     // this. We can't support integer vectors or non-zero targets cheaply.  in lowerShuffleAsElementInsertion()
12291     assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");  in lowerShuffleAsElementInsertion()
12292     if (!VT.isFloatingPoint() || V2Index != 0)  in lowerShuffleAsElementInsertion()
12294     if (!VT.is128BitVector())  in lowerShuffleAsElementInsertion()
12311   if (VT.isFloatingPoint() && V2Index != 0)  in lowerShuffleAsElementInsertion()
12315   if (ExtVT != VT)  in lowerShuffleAsElementInsertion()
12316     V2 = DAG.getBitcast(VT, V2);  in lowerShuffleAsElementInsertion()
12323     if (VT.isFloatingPoint() || NumElts <= 4) {  in lowerShuffleAsElementInsertion()
12326       V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);  in lowerShuffleAsElementInsertion()
12332       V2 = DAG.getBitcast(VT, V2);  in lowerShuffleAsElementInsertion()
12338 /// Try to lower broadcast of a single - truncated - integer element,
12342 static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0,  in lowerShuffleAsTruncBroadcast()  argument
12349   MVT EltVT = VT.getVectorElementType();  in lowerShuffleAsTruncBroadcast()
12352   assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");  in lowerShuffleAsTruncBroadcast()
12353   assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");  in lowerShuffleAsTruncBroadcast()
12379   // If we're extracting non-least-significant bits, shift so we can truncate.  in lowerShuffleAsTruncBroadcast()
12387   return DAG.getNode(X86ISD::VBROADCAST, DL, VT,  in lowerShuffleAsTruncBroadcast()
12396   // This routine only handles 128-bit shufps.  in isSingleSHUFPSMask()
12398   assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!");  in isSingleSHUFPSMask()
12399   assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!");  in isSingleSHUFPSMask()
12400   assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!");  in isSingleSHUFPSMask()
12401   assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!");  in isSingleSHUFPSMask()
12403   // To lower with a single SHUFPS we need to have the low half and high half  in isSingleSHUFPSMask()
12413 /// Test whether the specified input (0 or 1) is in-place blended by the
12428 /// If we are extracting two 128-bit halves of a vector and shuffling the
12429 /// result, match that to a 256-bit AVX2 vperm* instruction to avoid a
12430 /// multi-shuffle lowering.
12434   MVT VT = N0.getSimpleValueType();  in lowerShuffleOfExtractsAsVperm()  local
12435   assert((VT.is128BitVector() &&  in lowerShuffleOfExtractsAsVperm()
12436           (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) &&  in lowerShuffleOfExtractsAsVperm()
12437          "VPERM* family of shuffles requires 32-bit or 64-bit elements");  in lowerShuffleOfExtractsAsVperm()
12453   unsigned NumElts = VT.getVectorNumElements();  in lowerShuffleOfExtractsAsVperm()
12470   NewMask.append(NumElts, -1);  in lowerShuffleOfExtractsAsVperm()
12472   // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0  in lowerShuffleOfExtractsAsVperm()
12475   // This is free: ymm -> xmm.  in lowerShuffleOfExtractsAsVperm()
12476   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf,  in lowerShuffleOfExtractsAsVperm()
12483 /// filtering. While a little annoying to re-dispatch on type here, there isn't
12485 static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsBroadcast()  argument
12489   MVT EltVT = VT.getVectorElementType();  in lowerShuffleAsBroadcast()
12490   if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||  in lowerShuffleAsBroadcast()
12492         (Subtarget.hasAVX2() && (VT.isInteger() || EltVT == MVT::f16))))  in lowerShuffleAsBroadcast()
12497   unsigned NumEltBits = VT.getScalarSizeInBits();  in lowerShuffleAsBroadcast()
12498   unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2())  in lowerShuffleAsBroadcast()
12547         BitOffset -= BeginOffset;  in lowerShuffleAsBroadcast()
12557   assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset");  in lowerShuffleAsBroadcast()
12568   if (BitCastSrc && VT.isInteger())  in lowerShuffleAsBroadcast()
12570             DL, VT, V, BroadcastIdx, Subtarget, DAG))  in lowerShuffleAsBroadcast()
12583              cast<LoadSDNode>(V)->isSimple()) {  in lowerShuffleAsBroadcast()
12584     // We do not check for one-use of the vector load because a broadcast load  in lowerShuffleAsBroadcast()
12590     SDValue BaseAddr = Ld->getOperand(1);  in lowerShuffleAsBroadcast()
12591     MVT SVT = VT.getScalarType();  in lowerShuffleAsBroadcast()
12593     assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");  in lowerShuffleAsBroadcast()
12599     // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?  in lowerShuffleAsBroadcast()
12601       SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in lowerShuffleAsBroadcast()
12602       SDValue Ops[] = {Ld->getChain(), NewAddr};  in lowerShuffleAsBroadcast()
12606               Ld->getMemOperand(), Offset, SVT.getStoreSize()));  in lowerShuffleAsBroadcast()
12608       return DAG.getBitcast(VT, V);  in lowerShuffleAsBroadcast()
12610     assert(SVT == MVT::f64 && "Unexpected VT!");  in lowerShuffleAsBroadcast()
12611     V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,  in lowerShuffleAsBroadcast()
12613                         Ld->getMemOperand(), Offset, SVT.getStoreSize()));  in lowerShuffleAsBroadcast()
12619     // We can only broadcast from the zero-element of a vector register,  in lowerShuffleAsBroadcast()
12620     // but it can be advantageous to broadcast from the zero-element of a  in lowerShuffleAsBroadcast()
12622     if (!VT.is256BitVector() && !VT.is512BitVector())  in lowerShuffleAsBroadcast()
12625     // VPERMQ/VPERMPD can perform the cross-lane shuffle directly.  in lowerShuffleAsBroadcast()
12626     if (VT == MVT::v4f64 || VT == MVT::v4i64)  in lowerShuffleAsBroadcast()
12629     // Only broadcast the zero-element of a 128-bit subvector.  in lowerShuffleAsBroadcast()
12634            "Unexpected bit-offset");  in lowerShuffleAsBroadcast()
12646       return DAG.getBitcast(VT, V);  in lowerShuffleAsBroadcast()
12656                                        VT.getVectorNumElements());  in lowerShuffleAsBroadcast()
12657     return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));  in lowerShuffleAsBroadcast()
12660   // We only support broadcasting from 128-bit vectors to minimize the  in lowerShuffleAsBroadcast()
12662   // 128-bits, removing as many bitcasts as possible.  in lowerShuffleAsBroadcast()
12666   // Otherwise cast V to a vector with the same element type as VT, but  in lowerShuffleAsBroadcast()
12667   // possibly narrower than VT. Then perform the broadcast.  in lowerShuffleAsBroadcast()
12669   MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts);  in lowerShuffleAsBroadcast()
12670   return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V));  in lowerShuffleAsBroadcast()
12693     int VADstIndex = -1;  in matchShuffleAsInsertPS()
12694     int VBDstIndex = -1;  in matchShuffleAsInsertPS()
12710       // We can only insert a single non-zeroable element.  in matchShuffleAsInsertPS()
12723     // Don't bother if we have no (non-zeroable) element for insertion.  in matchShuffleAsInsertPS()
12737       VBSrcIndex = CandidateMask[VBDstIndex] - 4;  in matchShuffleAsInsertPS()
12741     // the zero mask and the V2 insertion - so remove V1 dependency.  in matchShuffleAsInsertPS()
12783 /// Handle lowering of 2-lane 64-bit floating point shuffles.
12785 /// This is the basis function for the 2-lane 64-bit shuffles as we have full
12821   assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");  in lowerV2F64Shuffle()
12822   assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");  in lowerV2F64Shuffle()
12837   int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),  in lowerV2F64Shuffle()
12838                         Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};  in lowerV2F64Shuffle()
12844   // blend patterns if a zero-blend above didn't work.  in lowerV2F64Shuffle()
12863   unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);  in lowerV2F64Shuffle()
12868 /// Handle lowering of 2-lane 64-bit integer shuffles.
12870 /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
12892     int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2),  in lowerV2I64Shuffle()
12893                           Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1),  in lowerV2I64Shuffle()
12894                           Mask[1] < 0 ? -1 : (Mask[1] * 2),  in lowerV2I64Shuffle()
12895                           Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)};  in lowerV2I64Shuffle()
12901   assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!");  in lowerV2I64Shuffle()
12902   assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!");  in lowerV2I64Shuffle()
12941   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.  in lowerV2I64Shuffle()
12974 static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,  in lowerShuffleWithSHUFPS()  argument
12982     int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin();  in lowerShuffleWithSHUFPS()
12990       // This will only ever happen in the high lanes because we commute the  in lowerShuffleWithSHUFPS()
12994       NewMask[V2Index] -= 4;  in lowerShuffleWithSHUFPS()
12999       int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};  in lowerShuffleWithSHUFPS()
13000       V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,  in lowerShuffleWithSHUFPS()
13004       // high or low half formed.  in lowerShuffleWithSHUFPS()
13017       // high lanes.  in lowerShuffleWithSHUFPS()
13018       NewMask[2] -= 4;  in lowerShuffleWithSHUFPS()
13019       NewMask[3] -= 4;  in lowerShuffleWithSHUFPS()
13024       NewMask[0] -= 4;  in lowerShuffleWithSHUFPS()
13025       NewMask[1] -= 4;  in lowerShuffleWithSHUFPS()
13029       // We have a mixture of V1 and V2 in both low and high lanes. Rather than  in lowerShuffleWithSHUFPS()
13037                           (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,  in lowerShuffleWithSHUFPS()
13038                           (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};  in lowerShuffleWithSHUFPS()
13039       V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,  in lowerShuffleWithSHUFPS()
13055     return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG);  in lowerShuffleWithSHUFPS()
13057   return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,  in lowerShuffleWithSHUFPS()
13061 /// Lower 4-lane 32-bit floating point shuffles.
13128   // There are special ways we can lower some single-element blends. However, we  in lowerV4F32Shuffle()
13129   // have custom ways we can lower more complex single-element blends below that  in lowerV4F32Shuffle()
13131   // when the V2 input is targeting element 0 of the mask -- that is the fast  in lowerV4F32Shuffle()
13149   // Use low/high mov instructions. These are only valid in SSE1 because  in lowerV4F32Shuffle()
13166 /// Lower 4-lane i32 vector shuffles.
13168 /// We try to handle these with integer-domain shuffles where we can, but for
13200     // Try to use broadcast unless the mask only has one non-undef element.  in lowerV4I32Shuffle()
13233   // There are special ways we can lower some single-element blends.  in lowerV4I32Shuffle()
13256   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.  in lowerV4I32Shuffle()
13295 /// Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
13307 /// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
13309 /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
13310 /// vector, form the analogous 128-bit 8-element Mask.
13312     const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,  in lowerV8I16GeneralSingleInputShuffle()  argument
13314   assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");  in lowerV8I16GeneralSingleInputShuffle()
13315   MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);  in lowerV8I16GeneralSingleInputShuffle()
13324     return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13330       HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));  in lowerV8I16GeneralSingleInputShuffle()
13331     return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13343   int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin();  in lowerV8I16GeneralSingleInputShuffle()
13344   int NumHToL = LoInputs.size() - NumLToL;  in lowerV8I16GeneralSingleInputShuffle()
13345   int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin();  in lowerV8I16GeneralSingleInputShuffle()
13346   int NumHToH = HiInputs.size() - NumLToH;  in lowerV8I16GeneralSingleInputShuffle()
13352   // If we are shuffling values from one half - check how many different DWORD  in lowerV8I16GeneralSingleInputShuffle()
13357     V = DAG.getNode(ShufWOp, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13362     return DAG.getBitcast(VT, V);  in lowerV8I16GeneralSingleInputShuffle()
13366     int PSHUFDMask[4] = { -1, -1, -1, -1 };  in lowerV8I16GeneralSingleInputShuffle()
13398       DWordPairs.resize(2, std::make_pair(-1, -1));  in lowerV8I16GeneralSingleInputShuffle()
13408   // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all  in lowerV8I16GeneralSingleInputShuffle()
13413   // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]  in lowerV8I16GeneralSingleInputShuffle()
13414   // Mask:  [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]  in lowerV8I16GeneralSingleInputShuffle()
13416   // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half  in lowerV8I16GeneralSingleInputShuffle()
13417   // and an existing 2-into-2 on the other half. In this case we may have to  in lowerV8I16GeneralSingleInputShuffle()
13418   // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or  in lowerV8I16GeneralSingleInputShuffle()
13419   // 1-into-3 which could cause us to cycle endlessly fixing each side in turn.  in lowerV8I16GeneralSingleInputShuffle()
13420   // Fortunately, we don't have to handle anything but a 2-into-2 pattern  in lowerV8I16GeneralSingleInputShuffle()
13421   // because any other situation (including a 3-into-1 or 1-into-3 in the other  in lowerV8I16GeneralSingleInputShuffle()
13422   // half than the one we target for fixing) will be fixed when we re-enter this  in lowerV8I16GeneralSingleInputShuffle()
13426   // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]  in lowerV8I16GeneralSingleInputShuffle()
13427   // Mask:  [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3]  in lowerV8I16GeneralSingleInputShuffle()
13429   // This now has a 1-into-3 in the high half! Instead, we do two shuffles:  in lowerV8I16GeneralSingleInputShuffle()
13431   // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h]  in lowerV8I16GeneralSingleInputShuffle()
13432   // Mask:  [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6]  in lowerV8I16GeneralSingleInputShuffle()
13434   // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h]  in lowerV8I16GeneralSingleInputShuffle()
13435   // Mask:  [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6]  in lowerV8I16GeneralSingleInputShuffle()
13462         TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);  in lowerV8I16GeneralSingleInputShuffle()
13469     // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA  in lowerV8I16GeneralSingleInputShuffle()
13471     // inputs, we don't try to fix it necessarily -- we'll recurse and see it in  in lowerV8I16GeneralSingleInputShuffle()
13472     // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it  in lowerV8I16GeneralSingleInputShuffle()
13473     // is essential that we don't *create* a 3<-1 as then we might oscillate.  in lowerV8I16GeneralSingleInputShuffle()
13477       // to balance this to ensure we don't form a 3-1 shuffle in the other  in lowerV8I16GeneralSingleInputShuffle()
13490         // will more commonly be the high half, and we have to bias one way.  in lowerV8I16GeneralSingleInputShuffle()
13534         VT,  in lowerV8I16GeneralSingleInputShuffle()
13545     // Recurse back into this routine to re-compute state now that this isn't  in lowerV8I16GeneralSingleInputShuffle()
13547     return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG);  in lowerV8I16GeneralSingleInputShuffle()
13554   // At this point there are at most two inputs to the low and high halves from  in lowerV8I16GeneralSingleInputShuffle()
13557   // We use at most one low and one high word shuffle to collect these paired  in lowerV8I16GeneralSingleInputShuffle()
13559   int PSHUFLMask[4] = {-1, -1, -1, -1};  in lowerV8I16GeneralSingleInputShuffle()
13560   int PSHUFHMask[4] = {-1, -1, -1, -1};  in lowerV8I16GeneralSingleInputShuffle()
13561   int PSHUFDMask[4] = {-1, -1, -1, -1};  in lowerV8I16GeneralSingleInputShuffle()
13564   // original halves. This will then dictate the targets of the cross-half  in lowerV8I16GeneralSingleInputShuffle()
13573       SourceHalfMask[InPlaceInputs[0] - HalfOffset] =  in lowerV8I16GeneralSingleInputShuffle()
13574           InPlaceInputs[0] - HalfOffset;  in lowerV8I16GeneralSingleInputShuffle()
13581         SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;  in lowerV8I16GeneralSingleInputShuffle()
13588     SourceHalfMask[InPlaceInputs[0] - HalfOffset] =  in lowerV8I16GeneralSingleInputShuffle()
13589         InPlaceInputs[0] - HalfOffset;  in lowerV8I16GeneralSingleInputShuffle()
13593     SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;  in lowerV8I16GeneralSingleInputShuffle()
13600   // Now gather the cross-half inputs and place them into a free dword of  in lowerV8I16GeneralSingleInputShuffle()
13603   // look more like the 3-1 fixing operation.  in lowerV8I16GeneralSingleInputShuffle()
13628         if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {  in lowerV8I16GeneralSingleInputShuffle()
13629           if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) {  in lowerV8I16GeneralSingleInputShuffle()
13630             SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =  in lowerV8I16GeneralSingleInputShuffle()
13631                 Input - SourceOffset;  in lowerV8I16GeneralSingleInputShuffle()
13634               if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)  in lowerV8I16GeneralSingleInputShuffle()
13637                 M = SourceHalfMask[Input - SourceOffset] + SourceOffset;  in lowerV8I16GeneralSingleInputShuffle()
13639             assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==  in lowerV8I16GeneralSingleInputShuffle()
13640                        Input - SourceOffset &&  in lowerV8I16GeneralSingleInputShuffle()
13643           // Note that this correctly re-maps both when we do a swap and when  in lowerV8I16GeneralSingleInputShuffle()
13646           Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;  in lowerV8I16GeneralSingleInputShuffle()
13650         if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0)  in lowerV8I16GeneralSingleInputShuffle()
13651           PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;  in lowerV8I16GeneralSingleInputShuffle()
13653           assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==  in lowerV8I16GeneralSingleInputShuffle()
13658       // And just directly shift any other-half mask elements to be same-half  in lowerV8I16GeneralSingleInputShuffle()
13663           M = M - SourceOffset + DestOffset;  in lowerV8I16GeneralSingleInputShuffle()
13673       if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {  in lowerV8I16GeneralSingleInputShuffle()
13674         int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +  in lowerV8I16GeneralSingleInputShuffle()
13676         SourceHalfMask[InputFixed - SourceOffset] =  in lowerV8I16GeneralSingleInputShuffle()
13677             IncomingInputs[0] - SourceOffset;  in lowerV8I16GeneralSingleInputShuffle()
13684           isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {  in lowerV8I16GeneralSingleInputShuffle()
13685         // We have two non-adjacent or clobbered inputs we need to extract from  in lowerV8I16GeneralSingleInputShuffle()
13688         int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,  in lowerV8I16GeneralSingleInputShuffle()
13689                               IncomingInputs[1] - SourceOffset};  in lowerV8I16GeneralSingleInputShuffle()
13715           // (because there are no off-half inputs to this half) and there is no  in lowerV8I16GeneralSingleInputShuffle()
13717           // swap an input with a non-input.  in lowerV8I16GeneralSingleInputShuffle()
13769     V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13772     V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13776         VT,  in lowerV8I16GeneralSingleInputShuffle()
13783          "Failed to lift all the high half inputs to the low mask!");  in lowerV8I16GeneralSingleInputShuffle()
13785          "Failed to lift all the low half inputs to the high mask!");  in lowerV8I16GeneralSingleInputShuffle()
13789     V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13792   // Do a half shuffle with the high mask after shifting its values down.  in lowerV8I16GeneralSingleInputShuffle()
13795       M -= 4;  in lowerV8I16GeneralSingleInputShuffle()
13797     V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,  in lowerV8I16GeneralSingleInputShuffle()
13803 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
13806     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsBlendOfPSHUFBs()  argument
13808   assert(!is128BitLaneCrossingShuffleMask(VT, Mask) &&  in lowerShuffleAsBlendOfPSHUFBs()
13811   int NumBytes = VT.getSizeInBits() / 8;  in lowerShuffleAsBlendOfPSHUFBs()
13827     int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale;  in lowerShuffleAsBlendOfPSHUFBs()
13853   return DAG.getBitcast(VT, V);  in lowerShuffleAsBlendOfPSHUFBs()
13856 /// Generic lowering of 8-lane i16 shuffles.
13858 /// This handles both single-input shuffles and combined shuffle/blends with
13863 /// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
13865 /// the two inputs, try to interleave them. Otherwise, blend the low and high
13927          "All single-input shuffles should be canonicalized to be V1-input "  in lowerV8I16Shuffle()
13942   // There are special ways we can lower some single-element blends.  in lowerV8I16Shuffle()
13992     // Check if this is part of a 256-bit vector truncation.  in lowerV8I16Shuffle()
14008       for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))  in lowerV8I16Shuffle()
14038   // When compacting odd (upper) elements, use PACKSS pre-SSE41.  in lowerV8I16Shuffle()
14065   // We can always bit-blend if we have to so the fallback strategy is to  in lowerV8I16Shuffle()
14066   // decompose into single-input permutes and blends/unpacks.  in lowerV8I16Shuffle()
14071 /// Lower 8-lane 16-bit floating point shuffles.
14100 // Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
14101 // sub-512-bit shuffles are padded to 512-bits for the shuffle and then
14103 static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT,  in lowerShuffleWithPERMV()  argument
14107   MVT MaskVT = VT.changeTypeToInteger();  in lowerShuffleWithPERMV()
14109   MVT ShuffleVT = VT;  in lowerShuffleWithPERMV()
14110   if (!VT.is512BitVector() && !Subtarget.hasVLX()) {  in lowerShuffleWithPERMV()
14116     int NumElts = VT.getVectorNumElements();  in lowerShuffleWithPERMV()
14117     unsigned Scale = 512 / VT.getSizeInBits();  in lowerShuffleWithPERMV()
14121         M += (Scale - 1) * NumElts;  in lowerShuffleWithPERMV()
14134   if (VT != ShuffleVT)  in lowerShuffleWithPERMV()
14135     Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits());  in lowerShuffleWithPERMV()
14144 /// UNPCK to spread the i8 elements across two i16-element vectors, and uses
14145 /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
14193   // For single-input shuffles, there are some nicer lowering tricks we can use.  in lowerV16I8Shuffle()
14209     // Notably, this handles splat and partial-splat shuffles more efficiently.  in lowerV16I8Shuffle()
14210     // However, it only makes sense if the pre-duplication shuffle simplifies  in lowerV16I8Shuffle()
14212     // express the pre-duplication shuffle as an i16 shuffle.  in lowerV16I8Shuffle()
14223     auto tryToWidenViaDuplication = [&]() -> SDValue {  in lowerV16I8Shuffle()
14240       int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};  in lowerV16I8Shuffle()
14284       int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};  in lowerV16I8Shuffle()
14287           int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);  in lowerV16I8Shuffle()
14323   // blends but after all of the single-input lowerings. If the single input  in lowerV16I8Shuffle()
14327   // the complexity of DAG combining bad patterns back into PSHUFB is too high,  in lowerV16I8Shuffle()
14332   // as a PACKUS(AND(),AND()) - which is quicker than UNPACK(PSHUFB(),PSHUFB()).  in lowerV16I8Shuffle()
14345     // do so. This avoids using them to handle blends-with-zero which is  in lowerV16I8Shuffle()
14358       // FIXME: It might be worth trying to detect if the unpack-feeding  in lowerV16I8Shuffle()
14365       // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).  in lowerV16I8Shuffle()
14376       // Use PALIGNR+Permute if possible - permute might become PSHUFB but the  in lowerV16I8Shuffle()
14386   // There are special ways we can lower some single-element blends.  in lowerV16I8Shuffle()
14411     for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1))  in lowerV16I8Shuffle()
14443   // Handle multi-input cases by blending/unpacking single-input shuffles.  in lowerV16I8Shuffle()
14448   // The fallback path for single-input shuffles widens this into two v8i16  in lowerV16I8Shuffle()
14453   std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};  in lowerV16I8Shuffle()
14454   std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};  in lowerV16I8Shuffle()
14465     // Use a mask to drop the high bytes.  in lowerV16I8Shuffle()
14481     // Otherwise just unpack the low half of V into VLoHalf and the high half into  in lowerV16I8Shuffle()
14497 /// Dispatching routine to lower various 128-bit x86 vector shuffles.
14499 /// This routine breaks down the specific type of 128-bit shuffle and
14502                                   MVT VT, SDValue V1, SDValue V2,  in lower128BitShuffle()  argument
14506   if (VT == MVT::v8bf16) {  in lower128BitShuffle()
14509     return DAG.getBitcast(VT,  in lower128BitShuffle()
14513   switch (VT.SimpleTy) {  in lower128BitShuffle()
14534 /// Generic routine to split vector shuffle into half-sized shuffles.
14539 static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,  in splitAndLowerShuffle()  argument
14542   assert(VT.getSizeInBits() >= 256 &&  in splitAndLowerShuffle()
14543          "Only for 256-bit or wider vector shuffles!");  in splitAndLowerShuffle()
14544   assert(V1.getSimpleValueType() == VT && "Bad operand type!");  in splitAndLowerShuffle()
14545   assert(V2.getSimpleValueType() == VT && "Bad operand type!");  in splitAndLowerShuffle()
14550   int NumElements = VT.getVectorNumElements();  in splitAndLowerShuffle()
14552   MVT ScalarVT = VT.getVectorElementType();  in splitAndLowerShuffle()
14555   // Use splitVector/extractSubVector so that split build-vectors just build two  in splitAndLowerShuffle()
14568   // Now create two 4-way blends of these half-width vectors.  in splitAndLowerShuffle()
14589   auto CheckHalfBlendUsable = [&](const ArrayRef<int> &HalfMask) -> bool {  in splitAndLowerShuffle()
14600     SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);  in splitAndLowerShuffle()
14601     SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);  in splitAndLowerShuffle()
14602     SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);  in splitAndLowerShuffle()
14606         V2BlendMask[i] = M - NumElements;  in splitAndLowerShuffle()
14619     // a minimal number of high-level vector shuffle nodes.  in splitAndLowerShuffle()
14638           BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);  in splitAndLowerShuffle()
14657   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);  in splitAndLowerShuffle()
14663 /// This is provided as a good fallback for many lowerings of non-single-input
14664 /// shuffles with more than one 128-bit lane. In those cases, we want to select
14665 /// between splitting the shuffle into 128-bit components and stitching those
14666 /// back together vs. extracting the single-input shuffles and blending those
14668 static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleAsSplitOrBlend()  argument
14672   assert(!V2.isUndef() && "This routine must not be used to lower single-input "  in lowerShuffleAsSplitOrBlend()
14680     int V1BroadcastIdx = -1, V2BroadcastIdx = -1;  in lowerShuffleAsSplitOrBlend()
14684           V2BroadcastIdx = M - Size;  in lowerShuffleAsSplitOrBlend()
14685         else if (M - Size != V2BroadcastIdx)  in lowerShuffleAsSplitOrBlend()
14696     return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,  in lowerShuffleAsSplitOrBlend()
14699   // If the inputs all stem from a single 128-bit lane of each input, then we  in lowerShuffleAsSplitOrBlend()
14702   int LaneCount = VT.getSizeInBits() / 128;  in lowerShuffleAsSplitOrBlend()
14711     return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,  in lowerShuffleAsSplitOrBlend()
14715   // requires that the decomposed single-input shuffles don't end up here.  in lowerShuffleAsSplitOrBlend()
14716   return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget,  in lowerShuffleAsSplitOrBlend()
14721 // TODO: Extend to support v8f32 (+ 512-bit shuffles).
14722 static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT,  in lowerShuffleAsLanePermuteAndSHUFP()  argument
14726   assert(VT == MVT::v4f64 && "Only for v4f64 shuffles");  in lowerShuffleAsLanePermuteAndSHUFP()
14728   int LHSMask[4] = {-1, -1, -1, -1};  in lowerShuffleAsLanePermuteAndSHUFP()
14729   int RHSMask[4] = {-1, -1, -1, -1};  in lowerShuffleAsLanePermuteAndSHUFP()
14744   SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask);  in lowerShuffleAsLanePermuteAndSHUFP()
14745   SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask);  in lowerShuffleAsLanePermuteAndSHUFP()
14746   return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS,  in lowerShuffleAsLanePermuteAndSHUFP()
14750 /// Lower a vector shuffle crossing multiple 128-bit lanes as
14751 /// a lane permutation followed by a per-lane permutation.
14753 /// This is mainly for cases where we can have non-repeating permutes
14759     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsLanePermuteAndPermute()  argument
14761   int NumElts = VT.getVectorNumElements();  in lowerShuffleAsLanePermuteAndPermute()
14762   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsLanePermuteAndPermute()
14771   auto getSublanePermute = [&](int NumSublanes) -> SDValue {  in lowerShuffleAsLanePermuteAndPermute()
14813       // TODO - isShuffleMaskInputInPlace could be extended to something like  in lowerShuffleAsLanePermuteAndPermute()
14825       if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))  in lowerShuffleAsLanePermuteAndPermute()
14835     SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);  in lowerShuffleAsLanePermuteAndPermute()
14836     return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),  in lowerShuffleAsLanePermuteAndPermute()
14848   // Then attempt a solution with 64-bit sublanes (vpermq).  in lowerShuffleAsLanePermuteAndPermute()
14852   // If that doesn't work and we have fast variable cross-lane shuffle,  in lowerShuffleAsLanePermuteAndPermute()
14853   // attempt 32-bit sublanes (vpermd).  in lowerShuffleAsLanePermuteAndPermute()
14874 /// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
14878 /// single-input cross lane shuffle which is lower than any other fully general
14879 /// cross-lane shuffle strategy I'm aware of. Special cases for each particular
14882     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsLanePermuteAndShuffle()  argument
14884   // FIXME: This should probably be generalized for 512-bit vectors as well.  in lowerShuffleAsLanePermuteAndShuffle()
14885   assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");  in lowerShuffleAsLanePermuteAndShuffle()
14892   if (VT == MVT::v4f64 &&  in lowerShuffleAsLanePermuteAndShuffle()
14894     return lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG);  in lowerShuffleAsLanePermuteAndShuffle()
14896   // If there are only inputs from one 128-bit lane, splitting will in fact be  in lowerShuffleAsLanePermuteAndShuffle()
14914   // TODO - we could support shuffling V2 in the Flipped input.  in lowerShuffleAsLanePermuteAndShuffle()
14921   assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&  in lowerShuffleAsLanePermuteAndShuffle()
14922          "In-lane shuffle mask expected");  in lowerShuffleAsLanePermuteAndShuffle()
14926   if (!AllLanes && !is128BitLaneRepeatedShuffleMask(VT, InLaneMask))  in lowerShuffleAsLanePermuteAndShuffle()
14927     return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,  in lowerShuffleAsLanePermuteAndShuffle()
14930   // Flip the lanes, and shuffle the results which should now be in-lane.  in lowerShuffleAsLanePermuteAndShuffle()
14931   MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;  in lowerShuffleAsLanePermuteAndShuffle()
14935   Flipped = DAG.getBitcast(VT, Flipped);  in lowerShuffleAsLanePermuteAndShuffle()
14936   return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);  in lowerShuffleAsLanePermuteAndShuffle()
14939 /// Handle lowering 2-lane 128-bit shuffles.
14940 static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,  in lowerV2X128Shuffle()  argument
14951       MVT MemVT = VT.getHalfNumVectorElementsVT();  in lowerV2X128Shuffle()
14955                                              VT, MemVT, Ld, Ofs, DAG))  in lowerV2X128Shuffle()
14975     MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);  in lowerV2X128Shuffle()
14978     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,  in lowerV2X128Shuffle()
14979                        getZeroVector(VT, Subtarget, DAG, DL), LoV,  in lowerV2X128Shuffle()
14987   // Blends are faster and handle all the non-lane-crossing cases.  in lowerV2X128Shuffle()
14988   if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable,  in lowerV2X128Shuffle()
14995     // Check for patterns which can be matched with a single insert of a 128-bit  in lowerV2X128Shuffle()
15001       // this will likely become vinsertf128 which can't fold a 256-bit memop.  in lowerV2X128Shuffle()
15003         MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);  in lowerV2X128Shuffle()
15007         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,  in lowerV2X128Shuffle()
15017         return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,  in lowerV2X128Shuffle()
15023   // Otherwise form a 128-bit permutation. After accounting for undefs,  in lowerV2X128Shuffle()
15024   // convert the 64-bit shuffle mask selection values into 128-bit  in lowerV2X128Shuffle()
15029   //    [1:0] - select 128 bits from sources for low half of destination  in lowerV2X128Shuffle()
15030   //    [2]   - ignore  in lowerV2X128Shuffle()
15031   //    [3]   - zero low half of destination  in lowerV2X128Shuffle()
15032   //    [5:4] - select 128 bits from sources for high half of destination  in lowerV2X128Shuffle()
15033   //    [6]   - ignore  in lowerV2X128Shuffle()
15034   //    [7]   - zero high half of destination  in lowerV2X128Shuffle()
15045     V1 = DAG.getUNDEF(VT);  in lowerV2X128Shuffle()
15047     V2 = DAG.getUNDEF(VT);  in lowerV2X128Shuffle()
15049   return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,  in lowerV2X128Shuffle()
15053 /// Lower a vector shuffle by first fixing the 128-bit lanes and then
15061     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsLanePermuteAndRepeatedMask()  argument
15065   if (is128BitLaneRepeatedShuffleMask(VT, Mask))  in lowerShuffleAsLanePermuteAndRepeatedMask()
15069   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsLanePermuteAndRepeatedMask()
15070   int NumLaneElts = 128 / VT.getScalarSizeInBits();  in lowerShuffleAsLanePermuteAndRepeatedMask()
15071   SmallVector<int, 16> RepeatMask(NumLaneElts, -1);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15072   SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}});  in lowerShuffleAsLanePermuteAndRepeatedMask()
15077     int Srcs[2] = {-1, -1};  in lowerShuffleAsLanePermuteAndRepeatedMask()
15078     SmallVector<int, 16> InLaneMask(NumLaneElts, -1);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15177   SmallVector<int, 16> NewMask(NumElts, -1);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15181       int M = -1;  in lowerShuffleAsLanePermuteAndRepeatedMask()
15187   SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15192       cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask)  in lowerShuffleAsLanePermuteAndRepeatedMask()
15198       int M = -1;  in lowerShuffleAsLanePermuteAndRepeatedMask()
15204   SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15209       cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask)  in lowerShuffleAsLanePermuteAndRepeatedMask()
15214       NewMask[i] = -1;  in lowerShuffleAsLanePermuteAndRepeatedMask()
15223   return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask);  in lowerShuffleAsLanePermuteAndRepeatedMask()
15246   HalfIdx1 = -1;  in getHalfShuffleMask()
15247   HalfIdx2 = -1;  in getHalfShuffleMask()
15291   MVT VT = V1.getSimpleValueType();  in getShuffleHalfVectors()  local
15292   MVT HalfVT = VT.getHalfNumVectorElementsVT();  in getShuffleHalfVectors()
15313     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1);  in getShuffleHalfVectors()
15317   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,  in getShuffleHalfVectors()
15321 /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
15324 static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleWithUndefHalf()  argument
15328   assert((VT.is256BitVector() || VT.is512BitVector()) &&  in lowerShuffleWithUndefHalf()
15329          "Expected 256-bit or 512-bit vector");  in lowerShuffleWithUndefHalf()
15340   MVT HalfVT = VT.getHalfNumVectorElementsVT();  in lowerShuffleWithUndefHalf()
15346     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,  in lowerShuffleWithUndefHalf()
15356     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,  in lowerShuffleWithUndefHalf()
15376   unsigned EltWidth = VT.getVectorElementType().getSizeInBits();  in lowerShuffleWithUndefHalf()
15379     // Always extract lowers when setting lower - these are all free subreg ops.  in lowerShuffleWithUndefHalf()
15385       // AVX2 has efficient 32/64-bit element cross-lane shuffles.  in lowerShuffleWithUndefHalf()
15400       // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.  in lowerShuffleWithUndefHalf()
15401       if (Subtarget.hasAVX512() && VT.is512BitVector())  in lowerShuffleWithUndefHalf()
15413   // UndefLower - uuuuXXXX: an insert to high half is required if we split this.  in lowerShuffleWithUndefHalf()
15415     // AVX2 has efficient 64-bit element cross-lane shuffles.  in lowerShuffleWithUndefHalf()
15419     // AVX512 has efficient cross-lane shuffles for all legal 512-bit types.  in lowerShuffleWithUndefHalf()
15420     if (Subtarget.hasAVX512() && VT.is512BitVector())  in lowerShuffleWithUndefHalf()
15431 /// Handle case where shuffle sources are coming from the same 128-bit lane and
15432 /// every lane can be represented as the same repeating mask - allowing us to
15436     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,  in lowerShuffleAsRepeatedMaskAndLanePermute()  argument
15438   int NumElts = VT.getVectorNumElements();  in lowerShuffleAsRepeatedMaskAndLanePermute()
15439   int NumLanes = VT.getSizeInBits() / 128;  in lowerShuffleAsRepeatedMaskAndLanePermute()
15446       if (BroadcastSize <= VT.getScalarSizeInBits())  in lowerShuffleAsRepeatedMaskAndLanePermute()
15448       int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits();  in lowerShuffleAsRepeatedMaskAndLanePermute()
15451       // accounting for UNDEFs but only references the lowest 128-bit  in lowerShuffleAsRepeatedMaskAndLanePermute()
15469       SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15474       SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15477       SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15487       return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT),  in lowerShuffleAsRepeatedMaskAndLanePermute()
15492   // Bail if the shuffle mask doesn't cross 128-bit lanes.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15493   if (!is128BitLaneCrossingShuffleMask(VT, Mask))  in lowerShuffleAsRepeatedMaskAndLanePermute()
15497   if (is128BitLaneRepeatedShuffleMask(VT, Mask))  in lowerShuffleAsRepeatedMaskAndLanePermute()
15507     // can form a repeating shuffle mask (local to each sub-lane). At the same  in lowerShuffleAsRepeatedMaskAndLanePermute()
15508     // time, determine the source sub-lane for each destination sub-lane.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15509     int TopSrcSubLane = -1;  in lowerShuffleAsRepeatedMaskAndLanePermute()
15510     SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15516       // Extract the sub-lane mask, check that it all comes from the same lane  in lowerShuffleAsRepeatedMaskAndLanePermute()
15518       int SrcLane = -1;  in lowerShuffleAsRepeatedMaskAndLanePermute()
15519       SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15532       // Whole sub-lane is UNDEF.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15536       // Attempt to match against the candidate repeated sub-lane masks.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15552         // Merge the sub-lane mask into the matching repeated sub-lane mask.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15562         // Track the top most source sub-lane - by setting the remaining to  in lowerShuffleAsRepeatedMaskAndLanePermute()
15570       // Bail if we failed to find a matching repeated sub-lane mask.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15578     SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15591     // Shuffle each source sub-lane to its destination.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15592     SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15607         DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask);  in lowerShuffleAsRepeatedMaskAndLanePermute()
15609     return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT),  in lowerShuffleAsRepeatedMaskAndLanePermute()
15613   // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes  in lowerShuffleAsRepeatedMaskAndLanePermute()
15614   // (with PERMQ/PERMPD). On AVX2/AVX512BW targets, permuting 32-bit sub-lanes,  in lowerShuffleAsRepeatedMaskAndLanePermute()
15616   // Otherwise we can only permute whole 128-bit lanes.  in lowerShuffleAsRepeatedMaskAndLanePermute()
15618   if (Subtarget.hasAVX2() && VT.is256BitVector()) {  in lowerShuffleAsRepeatedMaskAndLanePermute()
15622         (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2;  in lowerShuffleAsRepeatedMaskAndLanePermute()
15624   if (Subtarget.hasBWI() && VT == MVT::v64i8)  in lowerShuffleAsRepeatedMaskAndLanePermute()
15634 static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,  in matchShuffleWithSHUFPD()  argument
15638   int NumElts = VT.getVectorNumElements();  in matchShuffleWithSHUFPD()
15639   assert(VT.getScalarSizeInBits() == 64 &&  in matchShuffleWithSHUFPD()
15679 static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,  in lowerShuffleWithSHUFPD()  argument
15684   assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) &&  in lowerShuffleWithSHUFPD()
15689   if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate,  in lowerShuffleWithSHUFPD()
15693   // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.  in lowerShuffleWithSHUFPD()
15695     V1 = getZeroVector(VT, Subtarget, DAG, DL);  in lowerShuffleWithSHUFPD()
15697     V2 = getZeroVector(VT, Subtarget, DAG, DL);  in lowerShuffleWithSHUFPD()
15699   return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,  in lowerShuffleWithSHUFPD()
15706 static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,  in lowerShuffleAsVTRUNCAndUnpack()  argument
15711   assert(VT == MVT::v32i8 && "Unexpected type!");  in lowerShuffleAsVTRUNCAndUnpack()
15718   if (Zeroable.countl_one() < (Mask.size() - 8))  in lowerShuffleAsVTRUNCAndUnpack()
15746 // Pattern-match interleave(256b v1, 256b v2) -> 512b v3 and lower it into unpck
15748 // 256-bit vectors in earlier isel stages. Therefore, this function matches a
15749 // pair of 256-bit shuffles and makes sure the masks are consecutive.
15754 static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT,  in lowerShufflePairAsUNPCKAndPermute()  argument
15758   if (VT != MVT::v8f32 && VT != MVT::v8i32 && VT != MVT::v16i16 &&  in lowerShufflePairAsUNPCKAndPermute()
15759       VT != MVT::v32i8)  in lowerShufflePairAsUNPCKAndPermute()
15774   int NumElts = VT.getVectorNumElements();  in lowerShufflePairAsUNPCKAndPermute()
15784   for (SDNode *User : V1->uses())  in lowerShufflePairAsUNPCKAndPermute()
15785     if (User->getOpcode() == ISD::VECTOR_SHUFFLE && User->getOperand(0) == V1 &&  in lowerShufflePairAsUNPCKAndPermute()
15786         User->getOperand(1) == V2)  in lowerShufflePairAsUNPCKAndPermute()
15791   // Find out which half of the 512-bit shuffles is each smaller shuffle  in lowerShufflePairAsUNPCKAndPermute()
15796   if (IsInterleavingPattern(SVN1->getMask(), 0, NumElts) &&  in lowerShufflePairAsUNPCKAndPermute()
15797       IsInterleavingPattern(SVN2->getMask(), FirstQtr, ThirdQtr)) {  in lowerShufflePairAsUNPCKAndPermute()
15800   } else if (IsInterleavingPattern(SVN1->getMask(), FirstQtr, ThirdQtr) &&  in lowerShufflePairAsUNPCKAndPermute()
15801              IsInterleavingPattern(SVN2->getMask(), 0, NumElts)) {  in lowerShufflePairAsUNPCKAndPermute()
15809   SDValue Unpckl = DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);  in lowerShufflePairAsUNPCKAndPermute()
15810   SDValue Unpckh = DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);  in lowerShufflePairAsUNPCKAndPermute()
15811   SDValue Perm1 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,  in lowerShufflePairAsUNPCKAndPermute()
15813   SDValue Perm2 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh,  in lowerShufflePairAsUNPCKAndPermute()
15823 /// Handle lowering of 4-lane 64-bit floating point shuffles.
15825 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
15850       // Non-half-crossing single input shuffles can be lowered with an  in lowerV4F64Shuffle()
15863     // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV4F64Shuffle()
15869     // Try to permute the lanes and then use a per-lane permute.  in lowerV4F64Shuffle()
15911   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV4F64Shuffle()
15917   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV4F64Shuffle()
15943 /// Handle lowering of 4-lane 64-bit integer shuffles.
15977     // When the shuffle is mirrored between the 128-bit lanes of the unit, we  in lowerV4I64Shuffle()
16031   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV4I64Shuffle()
16042   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV4I64Shuffle()
16056 /// Handle lowering of 8-lane 32-bit floating point shuffles.
16058 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
16090   // If the shuffle mask is repeated in each 128-bit lane, we have many more  in lowerV8F32Shuffle()
16116   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV8F32Shuffle()
16123   // two 128-bit lanes use the variable mask to VPERMILPS.  in lowerV8F32Shuffle()
16138   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV8F32Shuffle()
16160   // For non-AVX512 if the Mask is of 16bit elements in lane then try to split  in lowerV8F32Shuffle()
16178 /// Handle lowering of 8-lane 32-bit integer shuffles.
16207   // For non-AVX512 if the Mask is of 16bit elements in lane then try to split  in lowerV8I32Shuffle()
16236   // If the shuffle mask is repeated in each 128-bit lane we can use more  in lowerV8I32Shuffle()
16237   // efficient instructions that mirror the shuffles across the two 128-bit  in lowerV8I32Shuffle()
16280   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV8I32Shuffle()
16287     // Try to produce a fixed cross-128-bit lane permute followed by unpack  in lowerV8I32Shuffle()
16293     // generate a cross-lane VPERMD instruction.  in lowerV8I32Shuffle()
16309   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV8I32Shuffle()
16320 /// Handle lowering of 16-lane 16-bit integer shuffles.
16374   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV16I16Shuffle()
16386     // Try to produce a fixed cross-128-bit lane permute followed by unpack  in lowerV16I16Shuffle()
16391     // There are no generalized cross-lane shuffle operations available on i16  in lowerV16I16Shuffle()
16404       // As this is a single-input shuffle, the repeated mask should be  in lowerV16I16Shuffle()
16416   // AVX512BW can lower to VPERMW (non-VLX will pad to v32i16).  in lowerV16I16Shuffle()
16420   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV16I16Shuffle()
16426   // Try to permute the lanes and then use a per-lane permute.  in lowerV16I16Shuffle()
16443 /// Handle lowering of 32-lane 8-bit integer shuffles.
16503   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV32I8Shuffle()
16509   // There are no generalized cross-lane shuffle operations available on i8  in lowerV32I8Shuffle()
16512     // Try to produce a fixed cross-128-bit lane permute followed by unpack  in lowerV32I8Shuffle()
16529   // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8).  in lowerV32I8Shuffle()
16533   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV32I8Shuffle()
16539   // Try to permute the lanes and then use a per-lane permute.  in lowerV32I8Shuffle()
16564 /// High-level routine to lower various 256-bit x86 vector shuffles.
16566 /// This routine either breaks down the specific type of a 256-bit x86 vector
16567 /// shuffle or splits it into two 128-bit shuffles and fuses the results back
16569 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,  in lower256BitShuffle()  argument
16575   int NumElts = VT.getVectorNumElements();  in lower256BitShuffle()
16580             DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))  in lower256BitShuffle()
16585           lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))  in lower256BitShuffle()
16588   // There is a really nice hard cut-over between AVX1 and AVX2 that means we  in lower256BitShuffle()
16590   // querying in the per-vector-type lowering routines. With AVX1 we have  in lower256BitShuffle()
16591   // essentially *zero* ability to manipulate a 256-bit vector with integer  in lower256BitShuffle()
16594   if (VT.isInteger() && !Subtarget.hasAVX2()) {  in lower256BitShuffle()
16595     int ElementBits = VT.getScalarSizeInBits();  in lower256BitShuffle()
16598       // for masking/blending then decompose into 128-bit vectors.  in lower256BitShuffle()
16599       if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,  in lower256BitShuffle()
16602       if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))  in lower256BitShuffle()
16604       return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);  in lower256BitShuffle()
16608                                 VT.getVectorNumElements());  in lower256BitShuffle()
16611     return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));  in lower256BitShuffle()
16614   if (VT == MVT::v16f16 || VT == MVT::v16bf16) {  in lower256BitShuffle()
16617     return DAG.getBitcast(VT,  in lower256BitShuffle()
16621   switch (VT.SimpleTy) {  in lower256BitShuffle()
16636     llvm_unreachable("Not a valid 256-bit x86 vector type!");  in lower256BitShuffle()
16640 /// Try to lower a vector shuffle as a 128-bit shuffles.
16641 static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,  in lowerV4X128Shuffle()  argument
16645   assert(VT.getScalarSizeInBits() == 64 &&  in lowerV4X128Shuffle()
16650   assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle.");  in lowerV4X128Shuffle()
16652   // TODO - use Zeroable like we do for lowerV2X128VectorShuffle?  in lowerV4X128Shuffle()
16662     MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);  in lowerV4X128Shuffle()
16665     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,  in lowerV4X128Shuffle()
16666                        getZeroVector(VT, Subtarget, DAG, DL), LoV,  in lowerV4X128Shuffle()
16670   // Check for patterns which can be matched with a single insert of a 256-bit  in lowerV4X128Shuffle()
16675     MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);  in lowerV4X128Shuffle()
16679     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec,  in lowerV4X128Shuffle()
16683   // See if this is an insertion of the lower 128-bits of V2 into V1.  in lowerV4X128Shuffle()
16685   int V2Index = -1;  in lowerV4X128Shuffle()
16687     assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");  in lowerV4X128Shuffle()
16698       // Make sure we only have a single V2 index and its the lowest 128-bits.  in lowerV4X128Shuffle()
16707     MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);  in lowerV4X128Shuffle()
16713   // See if we can widen to a 256-bit lane shuffle, we're going to lose 128-lane  in lowerV4X128Shuffle()
16724   SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};  in lowerV4X128Shuffle()
16725   int PermMask[4] = {-1, -1, -1, -1};  in lowerV4X128Shuffle()
16728     assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value");  in lowerV4X128Shuffle()
16742   return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],  in lowerV4X128Shuffle()
16746 /// Handle lowering of 8-lane 64-bit floating point shuffles.
16761       // Non-half-crossing single input shuffles can be lowered with an  in lowerV8F64Shuffle()
16800 /// Handle lowering of 16-lane 32-bit floating point shuffles.
16809   // If the shuffle mask is repeated in each 128-bit lane, we have many more  in lowerV16F32Shuffle()
16845   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV16F32Shuffle()
16852   // 128-bit lanes and don't lane cross, use variable mask VPERMILPS.  in lowerV16F32Shuffle()
16867 /// Handle lowering of 8-lane 64-bit integer shuffles.
16884     // When the shuffle is mirrored between the 128-bit lanes of the unit, we  in lowerV8I64Shuffle()
16886     // 128-bit lanes.  in lowerV8I64Shuffle()
16940 /// Handle lowering of 16-lane 32-bit integer shuffles.
16970   // If the shuffle mask is repeated in each 128-bit lane we can use more  in lowerV16I32Shuffle()
16971   // efficient instructions that mirror the shuffles across the four 128-bit  in lowerV16I32Shuffle()
17019   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV16I32Shuffle()
17037 /// Handle lowering of 32-lane 16-bit integer shuffles.
17045   assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");  in lowerV32I16Shuffle()
17082       // As this is a single-input shuffle, the repeated mask should be  in lowerV32I16Shuffle()
17101 /// Handle lowering of 64-lane 8-bit integer shuffles.
17109   assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");  in lowerV64I8Shuffle()
17153   // Try to create an in-lane repeating shuffle mask and then shuffle the  in lowerV64I8Shuffle()
17168     // Use PALIGNR+Permute if possible - permute might become PSHUFB but the  in lowerV64I8Shuffle()
17181   // Try to simplify this by merging 128-bit lanes to enable a lane-based  in lowerV64I8Shuffle()
17195 /// High-level routine to lower various 512-bit x86 vector shuffles.
17197 /// This routine either breaks down the specific type of a 512-bit x86 vector
17198 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
17201                                   MVT VT, SDValue V1, SDValue V2,  in lower512BitShuffle()  argument
17206          "Cannot lower 512-bit vectors w/ basic ISA!");  in lower512BitShuffle()
17215             DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))  in lower512BitShuffle()
17220           lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))  in lower512BitShuffle()
17224   if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask,  in lower512BitShuffle()
17228   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) {  in lower512BitShuffle()
17231     if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable,  in lower512BitShuffle()
17234     if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))  in lower512BitShuffle()
17237     return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);  in lower512BitShuffle()
17240   if (VT == MVT::v32f16 || VT == MVT::v32bf16) {  in lower512BitShuffle()
17242       return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,  in lower512BitShuffle()
17247     return DAG.getBitcast(VT,  in lower512BitShuffle()
17255   switch (VT.SimpleTy) {  in lower512BitShuffle()
17270     llvm_unreachable("Not a valid 512-bit x86 vector type!");  in lower512BitShuffle()
17275                                          MVT VT, SDValue V1, SDValue V2,  in lower1BitShuffleAsKSHIFTR()  argument
17282   int ShiftAmt = -1;  in lower1BitShuffleAsKSHIFTR()
17291     // The first non-undef element determines our shift amount.  in lower1BitShuffleAsKSHIFTR()
17293       ShiftAmt = M - i;  in lower1BitShuffleAsKSHIFTR()
17298     // All non-undef elements must shift by the same amount.  in lower1BitShuffleAsKSHIFTR()
17299     if (ShiftAmt != M - i)  in lower1BitShuffleAsKSHIFTR()
17308   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,  in lower1BitShuffleAsKSHIFTR()
17313 // Returns the shift amount if possible or -1 if not. This is a simplified
17321       if (!Zeroable[j + (Left ? 0 : (Size - Shift))])  in match1BitShuffleAsKSHIFT()
17330     unsigned Len = Size - Shift;  in match1BitShuffleAsKSHIFT()
17341   return -1;  in match1BitShuffleAsKSHIFT()
17346 // There is no a dedicated instruction on AVX-512 that shuffles the masks.
17347 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
17350                                 MVT VT, SDValue V1, SDValue V2,  in lower1BitShuffle()  argument
17355          "Cannot lower 512-bit vectors w/o basic ISA!");  in lower1BitShuffle()
17362   int Src = -1;  in lower1BitShuffle()
17382   if ((int)Zeroable.countl_one() >= (NumElts - SubvecElts)) {  in lower1BitShuffle()
17388     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,  in lower1BitShuffle()
17389                        DAG.getConstant(0, DL, VT),  in lower1BitShuffle()
17394   if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget,  in lower1BitShuffle()
17407       if (Opcode == X86ISD::KSHIFTR && WideVT != VT) {  in lower1BitShuffle()
17411                           DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8));  in lower1BitShuffle()
17413         ShiftAmt += WideElts - NumElts;  in lower1BitShuffle()
17418       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,  in lower1BitShuffle()
17427   if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) {  in lower1BitShuffle()
17430     ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();  in lower1BitShuffle()
17434           DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),  in lower1BitShuffle()
17439   switch (VT.SimpleTy) {  in lower1BitShuffle()
17449     // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit  in lower1BitShuffle()
17454     // Take 512-bit type, unless we are avoiding 512-bit types and have the  in lower1BitShuffle()
17455     // 256-bit operation available.  in lower1BitShuffle()
17459     // Take 512-bit type, unless we are avoiding 512-bit types and have the  in lower1BitShuffle()
17460     // 256-bit operation available.  in lower1BitShuffle()
17478   int NumElems = VT.getVectorNumElements();  in lower1BitShuffle()
17481     return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT),  in lower1BitShuffle()
17484   return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);  in lower1BitShuffle()
17559   MVT VT = V.getSimpleValueType().getScalarType();  in canCombineAsMaskOperation()  local
17560   if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())  in canCombineAsMaskOperation()
17564   // are preferable to blendw/blendvb/masked-mov.  in canCombineAsMaskOperation()
17565   if ((VT == MVT::i16 || VT == MVT::i8) &&  in canCombineAsMaskOperation()
17572     switch (V->getOpcode()) {  in canCombineAsMaskOperation()
17591     if (!V->hasOneUse())  in canCombineAsMaskOperation()
17609     /// Top-level lowering for x86 vector shuffles.
17619   ArrayRef<int> OrigMask = SVOp->getMask();  in lowerVECTOR_SHUFFLE()
17622   MVT VT = Op.getSimpleValueType();  in lowerVECTOR_SHUFFLE()  local
17623   int NumElements = VT.getVectorNumElements();  in lowerVECTOR_SHUFFLE()
17625   bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);  in lowerVECTOR_SHUFFLE()
17627   assert((VT.getSizeInBits() != 64 || Is1BitVector) &&  in lowerVECTOR_SHUFFLE()
17633     return DAG.getUNDEF(VT);  in lowerVECTOR_SHUFFLE()
17641   // Check for non-undef masks pointing at an undef vector and make the masks  in lowerVECTOR_SHUFFLE()
17649         M = -1;  in lowerVECTOR_SHUFFLE()
17650     return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);  in lowerVECTOR_SHUFFLE()
17657                       [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&  in lowerVECTOR_SHUFFLE()
17660   // We actually see shuffles that are entirely re-arrangements of a set of  in lowerVECTOR_SHUFFLE()
17668     return getZeroVector(VT, Subtarget, DAG, DL);  in lowerVECTOR_SHUFFLE()
17675   // integers to handle flipping the low and high halves of AVX 256-bit vectors.  in lowerVECTOR_SHUFFLE()
17677   if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&  in lowerVECTOR_SHUFFLE()
17683     // TODO: Avoid lowering directly from this top-level function: make this  in lowerVECTOR_SHUFFLE()
17684     // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.  in lowerVECTOR_SHUFFLE()
17685     if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask,  in lowerVECTOR_SHUFFLE()
17689     MVT NewEltVT = VT.isFloatingPoint()  in lowerVECTOR_SHUFFLE()
17690                        ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)  in lowerVECTOR_SHUFFLE()
17691                        : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);  in lowerVECTOR_SHUFFLE()
17698         // Modify the new Mask to take all zeros from the all-zero vector.  in lowerVECTOR_SHUFFLE()
17699         // Choose indices that are blend-friendly.  in lowerVECTOR_SHUFFLE()
17702                "V2's non-undef elements are used?!");  in lowerVECTOR_SHUFFLE()
17708         // Ensure all elements of V2 are zero - isBuildVectorAllZeros permits  in lowerVECTOR_SHUFFLE()
17716           VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask));  in lowerVECTOR_SHUFFLE()
17726           Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))  in lowerVECTOR_SHUFFLE()
17727     return DAG.getBitcast(VT, HOp);  in lowerVECTOR_SHUFFLE()
17729   V1 = DAG.getBitcast(VT, Ops[0]);  in lowerVECTOR_SHUFFLE()
17730   V2 = DAG.getBitcast(VT, Ops[1]);  in lowerVECTOR_SHUFFLE()
17742   if (VT.is128BitVector())  in lowerVECTOR_SHUFFLE()
17743     return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);  in lowerVECTOR_SHUFFLE()
17745   if (VT.is256BitVector())  in lowerVECTOR_SHUFFLE()
17746     return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);  in lowerVECTOR_SHUFFLE()
17748   if (VT.is512BitVector())  in lowerVECTOR_SHUFFLE()
17749     return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);  in lowerVECTOR_SHUFFLE()
17752     return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG);  in lowerVECTOR_SHUFFLE()
17764   MVT VT = Op.getSimpleValueType();  in lowerVSELECTtoVectorShuffle()  local
17766   // Only non-legal VSELECTs reach this lowering, convert those into generic  in lowerVSELECTtoVectorShuffle()
17767   // shuffles and re-use the shuffle lowering path for blends.  in lowerVSELECTtoVectorShuffle()
17771       return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask);  in lowerVSELECTtoVectorShuffle()
17783   MVT VT = Op.getSimpleValueType();  in LowerVSELECT()  local
17784   if (isSoftF16(VT, Subtarget)) {  in LowerVSELECT()
17785     MVT NVT = VT.changeVectorElementTypeToInteger();  in LowerVSELECT()
17786     return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond,  in LowerVSELECT()
17798   // Try to lower this to a blend-style vector shuffle. This can handle all  in LowerVSELECT()
17804   // with patterns on the mask registers on AVX-512.  in LowerVSELECT()
17814   unsigned EltSize = VT.getScalarSizeInBits();  in LowerVSELECT()
17815   unsigned NumElts = VT.getVectorNumElements();  in LowerVSELECT()
17818   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())  in LowerVSELECT()
17821   // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition  in LowerVSELECT()
17822   // into an i1 condition so that we can use the mask-based 512-bit blend  in LowerVSELECT()
17824   if (VT.getSizeInBits() == 512) {  in LowerVSELECT()
17831     return DAG.getSelect(dl, VT, Mask, LHS, RHS);  in LowerVSELECT()
17843     return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS);  in LowerVSELECT()
17851   if (EltSize < 32 && VT.is256BitVector() && !Subtarget.hasAVX2() &&  in LowerVSELECT()
17863   // VSELECT-matching blend, return Op, and but if we need to expand, return  in LowerVSELECT()
17865   switch (VT.SimpleTy) {  in LowerVSELECT()
17885     return DAG.getBitcast(VT, Select);  in LowerVSELECT()
17891   MVT VT = Op.getSimpleValueType();  in LowerEXTRACT_VECTOR_ELT_SSE4()  local
17900   if (VT.getSizeInBits() == 8) {  in LowerEXTRACT_VECTOR_ELT_SSE4()
17909     unsigned IdxVal = Idx->getAsZExtVal();  in LowerEXTRACT_VECTOR_ELT_SSE4()
17912     return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);  in LowerEXTRACT_VECTOR_ELT_SSE4()
17915   if (VT == MVT::f32) {  in LowerEXTRACT_VECTOR_ELT_SSE4()
17923     SDNode *User = *Op.getNode()->use_begin();  in LowerEXTRACT_VECTOR_ELT_SSE4()
17924     if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) &&  in LowerEXTRACT_VECTOR_ELT_SSE4()
17925         (User->getOpcode() != ISD::BITCAST ||  in LowerEXTRACT_VECTOR_ELT_SSE4()
17926          User->getValueType(0) != MVT::i32))  in LowerEXTRACT_VECTOR_ELT_SSE4()
17933   if (VT == MVT::i32 || VT == MVT::i64)  in LowerEXTRACT_VECTOR_ELT_SSE4()
17940 /// AVX-512 feature.
17957     // Extending v8i1/v16i1 to 512-bit get better performance on KNL  in ExtractBitFromMaskVector()
17971   unsigned IdxVal = IdxC->getZExtValue();  in ExtractBitFromMaskVector()
17988   MVT VT = N->getSimpleValueType(0);  in getExtractedDemandedElts()  local
17989   unsigned NumElts = VT.getVectorNumElements();  in getExtractedDemandedElts()
17991   for (SDNode *User : N->uses()) {  in getExtractedDemandedElts()
17992     switch (User->getOpcode()) {  in getExtractedDemandedElts()
17996       if (!isa<ConstantSDNode>(User->getOperand(1))) {  in getExtractedDemandedElts()
18000       DemandedElts.setBit(User->getConstantOperandVal(1));  in getExtractedDemandedElts()
18003       if (!User->getValueType(0).isSimple() ||  in getExtractedDemandedElts()
18004           !User->getValueType(0).isVector()) {  in getExtractedDemandedElts()
18036     // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)  in LowerEXTRACT_VECTOR_ELT()
18044     // |  Uops  |  0  - DV  |  5  |  6  |  7  |    |  in LowerEXTRACT_VECTOR_ELT()
18045     // ---------------------------------------------  in LowerEXTRACT_VECTOR_ELT()
18056     // |Uops| 1 | 2 - D  |3 -  D  | 4 | 5 |  |  in LowerEXTRACT_VECTOR_ELT()
18057     // ---------------------------------------------------------  in LowerEXTRACT_VECTOR_ELT()
18058     // |2^  |   | 0.5    | 0.5    |1.0|   |CP| vmovaps xmmword ptr [rsp-0x18], xmm0  in LowerEXTRACT_VECTOR_ELT()
18059     // |1   |0.5|        |        |   |0.5|  | lea rax, ptr [rsp-0x18]  in LowerEXTRACT_VECTOR_ELT()
18066   unsigned IdxVal = IdxC->getZExtValue();  in LowerEXTRACT_VECTOR_ELT()
18068   // If this is a 256-bit vector result, first extract the 128-bit vector and  in LowerEXTRACT_VECTOR_ELT()
18069   // then extract the element from the 128-bit vector.  in LowerEXTRACT_VECTOR_ELT()
18071     // Get the 128-bit vector.  in LowerEXTRACT_VECTOR_ELT()
18080     IdxVal &= ElemsPerChunk - 1;  in LowerEXTRACT_VECTOR_ELT()
18087   MVT VT = Op.getSimpleValueType();  in LowerEXTRACT_VECTOR_ELT()  local
18089   if (VT == MVT::i16) {  in LowerEXTRACT_VECTOR_ELT()
18104     return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract);  in LowerEXTRACT_VECTOR_ELT()
18111   // Only extract a single element from a v16i8 source - determine the common  in LowerEXTRACT_VECTOR_ELT()
18112   // DWORD/WORD that all extractions share, and extract the sub-byte.  in LowerEXTRACT_VECTOR_ELT()
18114   if (VT == MVT::i8) {  in LowerEXTRACT_VECTOR_ELT()
18118     // Extract either the lowest i32 or any i16, and extract the sub-byte.  in LowerEXTRACT_VECTOR_ELT()
18128       return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in LowerEXTRACT_VECTOR_ELT()
18140       return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in LowerEXTRACT_VECTOR_ELT()
18144   if (VT == MVT::f16 || VT.getSizeInBits() == 32) {  in LowerEXTRACT_VECTOR_ELT()
18149     SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1);  in LowerEXTRACT_VECTOR_ELT()
18152     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,  in LowerEXTRACT_VECTOR_ELT()
18156   if (VT.getSizeInBits() == 64) {  in LowerEXTRACT_VECTOR_ELT()
18166     int Mask[2] = { 1, -1 };  in LowerEXTRACT_VECTOR_ELT()
18168     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,  in LowerEXTRACT_VECTOR_ELT()
18176 /// AVX-512 feature.
18197   // Copy into a k-register, extract to v1i1 and insert_subvector.  in InsertBitToMaskVector()
18204   MVT VT = Op.getSimpleValueType();  in LowerINSERT_VECTOR_ELT()  local
18205   MVT EltVT = VT.getVectorElementType();  in LowerINSERT_VECTOR_ELT()
18206   unsigned NumElts = VT.getVectorNumElements();  in LowerINSERT_VECTOR_ELT()
18219     MVT IVT = VT.changeVectorElementTypeToInteger();  in LowerINSERT_VECTOR_ELT()
18223     return DAG.getBitcast(VT, Res);  in LowerINSERT_VECTOR_ELT()
18229     // possible vector indices, and FP insertion has less gpr->simd traffic.  in LowerINSERT_VECTOR_ELT()
18242     SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1);  in LowerINSERT_VECTOR_ELT()
18249     // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.  in LowerINSERT_VECTOR_ELT()
18254   if (N2C->getAPIntValue().uge(NumElts))  in LowerINSERT_VECTOR_ELT()
18256   uint64_t IdxVal = N2C->getZExtValue();  in LowerINSERT_VECTOR_ELT()
18259   bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);  in LowerINSERT_VECTOR_ELT()
18262     // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend.  in LowerINSERT_VECTOR_ELT()
18265         ((VT == MVT::v16i8 && !Subtarget.hasSSE41()) ||  in LowerINSERT_VECTOR_ELT()
18266          ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) {  in LowerINSERT_VECTOR_ELT()
18267       SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());  in LowerINSERT_VECTOR_ELT()
18268       SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());  in LowerINSERT_VECTOR_ELT()
18271       SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts);  in LowerINSERT_VECTOR_ELT()
18272       return DAG.getNode(ISD::OR, dl, VT, N0, CstVector);  in LowerINSERT_VECTOR_ELT()
18277         (EltSizeInBits >= 16 || (IsZeroElt && !VT.is128BitVector()))) {  in LowerINSERT_VECTOR_ELT()
18281       SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)  in LowerINSERT_VECTOR_ELT()
18282                                     : getOnesVector(VT, DAG, dl);  in LowerINSERT_VECTOR_ELT()
18283       return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);  in LowerINSERT_VECTOR_ELT()
18287   // If the vector is wider than 128 bits, extract the 128-bit subvector, insert  in LowerINSERT_VECTOR_ELT()
18289   if (VT.is256BitVector() || VT.is512BitVector()) {  in LowerINSERT_VECTOR_ELT()
18290     // With a 256-bit vector, we can insert into the zero element efficiently  in LowerINSERT_VECTOR_ELT()
18292     if (VT.is256BitVector() && IdxVal == 0) {  in LowerINSERT_VECTOR_ELT()
18295       // doing anyway after extracting to a 128-bit vector.  in LowerINSERT_VECTOR_ELT()
18298         SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);  in LowerINSERT_VECTOR_ELT()
18299         return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,  in LowerINSERT_VECTOR_ELT()
18306            "Vectors will always have power-of-two number of elements.");  in LowerINSERT_VECTOR_ELT()
18308     // If we are not inserting into the low 128-bit vector chunk,  in LowerINSERT_VECTOR_ELT()
18315       SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);  in LowerINSERT_VECTOR_ELT()
18319       return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask);  in LowerINSERT_VECTOR_ELT()
18322     // Get the desired 128-bit vector chunk.  in LowerINSERT_VECTOR_ELT()
18327     unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);  in LowerINSERT_VECTOR_ELT()
18335   assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");  in LowerINSERT_VECTOR_ELT()
18341       N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);  in LowerINSERT_VECTOR_ELT()
18349       MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);  in LowerINSERT_VECTOR_ELT()
18352       return DAG.getBitcast(VT, N1);  in LowerINSERT_VECTOR_ELT()
18358   if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {  in LowerINSERT_VECTOR_ELT()
18360     if (VT == MVT::v8i16) {  in LowerINSERT_VECTOR_ELT()
18364       assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");  in LowerINSERT_VECTOR_ELT()
18369     assert(N1.getValueType() != MVT::i32 && "Unexpected VT");  in LowerINSERT_VECTOR_ELT()
18372     return DAG.getNode(Opc, dl, VT, N0, N1, N2);  in LowerINSERT_VECTOR_ELT()
18388         // If this is an insertion of 32-bits into the low 32-bits of  in LowerINSERT_VECTOR_ELT()
18393         // generate insertps because blendps does not have a 32-bit memory  in LowerINSERT_VECTOR_ELT()
18396         return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1,  in LowerINSERT_VECTOR_ELT()
18401       return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,  in LowerINSERT_VECTOR_ELT()
18423   // If this is a 256-bit vector result, first insert into a 128-bit  in LowerSCALAR_TO_VECTOR()
18424   // vector and then insert into the 256-bit vector.  in LowerSCALAR_TO_VECTOR()
18426     // Insert into a 128-bit vector.  in LowerSCALAR_TO_VECTOR()
18433     // Insert the 128-bit vector.  in LowerSCALAR_TO_VECTOR()
18485   // References to absolute symbols are never PC-relative.  in getGlobalWrapperKind()
18486   if (GV && GV->isAbsoluteSymbolRef())  in getGlobalWrapperKind()
18489   // The following OpFlags under RIP-rel PIC use RIP.  in getGlobalWrapperKind()
18518       CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag);  in LowerConstantPool()
18540   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);  in LowerJumpTable()
18564   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();  in LowerBlockAddress()
18565   int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();  in LowerBlockAddress()
18591     GV = G->getGlobal();  in LowerGlobalOrExternal()
18592     Offset = G->getOffset();  in LowerGlobalOrExternal()
18595     ExternalSym = ES->getSymbol();  in LowerGlobalOrExternal()
18615     // Suppress the folding if Offset is negative: movl foo-1, %eax is not  in LowerGlobalOrExternal()
18648   // If there was a non-zero offset that we didn't fold, create an explicit  in LowerGlobalOrExternal()
18673     auto UI = TGA->use_begin();  in GetTLSADDR()
18675     if (UI != TGA->use_end())  in GetTLSADDR()
18676       return SDValue(*UI->use_begin()->use_begin(), 0);  in GetTLSADDR()
18678     TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),  in GetTLSADDR()
18679                                      GA->getOffset(), OperandFlags);  in GetTLSADDR()
18752   MFI->incNumLocalDynamicTLSAccesses();  in LowerToTLSLocalDynamicModel()
18774   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,  in LowerToTLSLocalDynamicModel()
18775                                            GA->getValueType(0),  in LowerToTLSLocalDynamicModel()
18776                                            GA->getOffset(), OperandFlags);  in LowerToTLSLocalDynamicModel()
18789   // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).  in LowerToTLSExecModel()
18798   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is  in LowerToTLSExecModel()
18816   // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)  in LowerToTLSExecModel()
18818       DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),  in LowerToTLSExecModel()
18819                                  GA->getOffset(), OperandFlags);  in LowerToTLSExecModel()
18846   const GlobalValue *GV = GA->getGlobal();  in LowerGlobalTLSAddress()
18887     SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,  in LowerGlobalTLSAddress()
18888                                                 GA->getValueType(0),  in LowerGlobalTLSAddress()
18889                                                 GA->getOffset(), OpFlag);  in LowerGlobalTLSAddress()
18932     // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or  in LowerGlobalTLSAddress()
18933     // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly  in LowerGlobalTLSAddress()
18949     if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {  in LowerGlobalTLSAddress()
18971     SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,  in LowerGlobalTLSAddress()
18972                                              GA->getValueType(0),  in LowerGlobalTLSAddress()
18973                                              GA->getOffset(), X86II::MO_SECREL);  in LowerGlobalTLSAddress()
19000       // offset and returning `true` for TLS-desc currently duplicates both  in addressingModeSupportsTLS()
19001       // which is detrimental :-/  in addressingModeSupportsTLS()
19017 // Try to use a packed vector operation to handle i64 on 32-bit targets when
19027   bool IsStrict = Op->isStrictFPOpcode();  in LowerI64IntToFP_AVX512DQ()
19031   MVT VT = Op.getSimpleValueType();  in LowerI64IntToFP_AVX512DQ()  local
19034        (VT != MVT::f32 && VT != MVT::f64))  in LowerI64IntToFP_AVX512DQ()
19039   // Using 256-bit to ensure result is 128-bits for f32 case.  in LowerI64IntToFP_AVX512DQ()
19042   MVT VecVT = MVT::getVectorVT(VT, NumElts);  in LowerI64IntToFP_AVX512DQ()
19049     SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,  in LowerI64IntToFP_AVX512DQ()
19056   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,  in LowerI64IntToFP_AVX512DQ()
19060 // Try to use a packed vector operation to handle i64 on 32-bit targets.
19068   bool IsStrict = Op->isStrictFPOpcode();  in LowerI64IntToFP16()
19071   MVT VT = Op.getSimpleValueType();  in LowerI64IntToFP16()  local
19073   if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)  in LowerI64IntToFP16()
19085     SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,  in LowerI64IntToFP16()
19092   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,  in LowerI64IntToFP16()
19120 /// round-trip between XMM and GPR.
19132   // See if we have a 128-bit vector cast op for this type of cast.  in vectorizeExtractedCast()
19141   // If we are extracting from a non-zero element, first shuffle the source  in vectorizeExtractedCast()
19144     SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1);  in vectorizeExtractedCast()
19148   // If the source vector is wider than 128-bits, extract the low part. Do not  in vectorizeExtractedCast()
19153   // cast (extelt V, 0) --> extelt (cast (extract_subv V)), 0  in vectorizeExtractedCast()
19154   // cast (extelt V, C) --> extelt (cast (extract_subv (shuffle V, [C...]))), 0  in vectorizeExtractedCast()
19161 /// try to vectorize the cast ops. This will avoid an expensive round-trip
19168   MVT VT = CastToFP.getSimpleValueType();  in lowerFPToIntToFP()  local
19169   if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector())  in lowerFPToIntToFP()
19178   // See if we have 128-bit vector cast instructions for this type of cast.  in lowerFPToIntToFP()
19180   if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||  in lowerFPToIntToFP()
19186   unsigned VTSize = VT.getSizeInBits();  in lowerFPToIntToFP()
19189   MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);  in lowerFPToIntToFP()
19191   // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.  in lowerFPToIntToFP()
19197   // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0  in lowerFPToIntToFP()
19199   // We are not defining the high elements (for example, zero them) because  in lowerFPToIntToFP()
19207   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);  in lowerFPToIntToFP()
19213   bool IsStrict = Op->isStrictFPOpcode();  in lowerINT_TO_FP_vXi64()
19214   MVT VT = Op->getSimpleValueType(0);  in lowerINT_TO_FP_vXi64()  local
19215   SDValue Src = Op->getOperand(IsStrict ? 1 : 0);  in lowerINT_TO_FP_vXi64()
19224     // With AVX512DQ, but not VLX we need to widen to get a 512-bit result type.  in lowerINT_TO_FP_vXi64()
19225     assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) &&  in lowerINT_TO_FP_vXi64()
19226            "Unexpected VT!");  in lowerINT_TO_FP_vXi64()
19227     MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;  in lowerINT_TO_FP_vXi64()
19238                         {Op->getOperand(0), Src});  in lowerINT_TO_FP_vXi64()
19244     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,  in lowerINT_TO_FP_vXi64()
19252   bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP ||  in lowerINT_TO_FP_vXi64()
19253                   Op->getOpcode() == ISD::STRICT_SINT_TO_FP;  in lowerINT_TO_FP_vXi64()
19254   if (VT != MVT::v4f32 || IsSigned)  in lowerINT_TO_FP_vXi64()
19278   SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts);  in lowerINT_TO_FP_vXi64()
19301   bool IsStrict = Op->isStrictFPOpcode();  in promoteXINT_TO_FP()
19303   SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();  in promoteXINT_TO_FP()
19304   MVT VT = Op.getSimpleValueType();  in promoteXINT_TO_FP()  local
19305   MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;  in promoteXINT_TO_FP()
19310         ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},  in promoteXINT_TO_FP()
19314   return DAG.getNode(ISD::FP_ROUND, dl, VT,  in promoteXINT_TO_FP()
19318 static bool isLegalConversion(MVT VT, bool IsSigned,  in isLegalConversion()  argument
19320   if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned)  in isLegalConversion()
19322   if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned)  in isLegalConversion()
19324   if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32))  in isLegalConversion()
19327     if (VT == MVT::v16i32)  in isLegalConversion()
19329     if (VT == MVT::v8i64 && Subtarget.hasDQI())  in isLegalConversion()
19333       (VT == MVT::v2i64 || VT == MVT::v4i64))  in isLegalConversion()
19340   bool IsStrict = Op->isStrictFPOpcode();  in LowerSINT_TO_FP()
19343   SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();  in LowerSINT_TO_FP()
19345   MVT VT = Op.getSimpleValueType();  in LowerSINT_TO_FP()  local
19348   if (isSoftF16(VT, Subtarget))  in LowerSINT_TO_FP()
19363     if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {  in LowerSINT_TO_FP()
19368             X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},  in LowerSINT_TO_FP()
19371       return DAG.getNode(X86ISD::CVTSI2P, dl, VT,  in LowerSINT_TO_FP()
19384   bool UseSSEReg = isScalarFPTypeInSSEReg(VT);  in LowerSINT_TO_FP()
19399   if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {  in LowerSINT_TO_FP()
19402       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in LowerSINT_TO_FP()
19405     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);  in LowerSINT_TO_FP()
19408   if (VT == MVT::f128 || !Subtarget.hasX87())  in LowerSINT_TO_FP()
19413     // Bitcasting to f64 here allows us to do a single 64-bit store from  in LowerSINT_TO_FP()
19415     // with two 32-bit stores.  in LowerSINT_TO_FP()
19428       BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG);  in LowerSINT_TO_FP()
19478 /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
19487 /// 64-bit unsigned integer to double expansion.
19491   // We can't use this algorithm for strict fp. It produces -0.0 instead of +0.0  in LowerUINT_TO_FP_i64()
19493   // fall back to Expand for when i64 or is legal or use FILD in 32-bit mode.  in LowerUINT_TO_FP_i64()
19494   assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!");  in LowerUINT_TO_FP_i64()
19526   // Load the 64-bit value into an XMM register.  in LowerUINT_TO_FP_i64()
19539   // TODO: Are there any fast-math-flags to propagate here?  in LowerUINT_TO_FP_i64()
19547     SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});  in LowerUINT_TO_FP_i64()
19555 /// 32-bit unsigned integer to float expansion.
19559   unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;  in LowerUINT_TO_FP_i32()
19564   // Load the 32-bit value into an XMM register.  in LowerUINT_TO_FP_i32()
19581   if (Op.getNode()->isStrictFPOpcode()) {  in LowerUINT_TO_FP_i32()
19583     // TODO: Are there any fast-math-flags to propagate here?  in LowerUINT_TO_FP_i32()
19599   // TODO: Are there any fast-math-flags to propagate here?  in LowerUINT_TO_FP_i32()
19612   bool IsStrict = Op->isStrictFPOpcode();  in lowerUINT_TO_FP_v2i32()
19625       SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other},  in lowerUINT_TO_FP_v2i32()
19644   // since double has 52-bits of mantissa. Then subtract 2^52 in floating  in lowerUINT_TO_FP_v2i32()
19662   bool IsStrict = Op->isStrictFPOpcode();  in lowerUINT_TO_FP_vXi32()
19663   SDValue V = Op->getOperand(IsStrict ? 1 : 0);  in lowerUINT_TO_FP_vXi32()
19669     // With AVX512, but not VLX we need to widen to get a 512-bit result type.  in lowerUINT_TO_FP_vXi32()
19671     MVT VT = Op->getSimpleValueType(0);  in lowerUINT_TO_FP_vXi32()  local
19673     // v8i32->v8f64 is legal with AVX512 so just return it.  in lowerUINT_TO_FP_vXi32()
19674     if (VT == MVT::v8f64)  in lowerUINT_TO_FP_vXi32()
19677     assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&  in lowerUINT_TO_FP_vXi32()
19678            "Unexpected VT!");  in lowerUINT_TO_FP_vXi32()
19679     MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;  in lowerUINT_TO_FP_vXi32()
19680     MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;  in lowerUINT_TO_FP_vXi32()
19690                         {Op->getOperand(0), V});  in lowerUINT_TO_FP_vXi32()
19696     Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,  in lowerUINT_TO_FP_vXi32()
19705       Op->getSimpleValueType(0) == MVT::v4f64) {  in lowerUINT_TO_FP_vXi32()
19738   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);  in lowerUINT_TO_FP_vXi32()
19745   if (VecFloatVT != Op->getSimpleValueType(0))  in lowerUINT_TO_FP_vXi32()
19749   // - The vector of constants:  in lowerUINT_TO_FP_vXi32()
19750   // -- 0x4b000000  in lowerUINT_TO_FP_vXi32()
19751   // -- 0x53000000  in lowerUINT_TO_FP_vXi32()
19752   // - A shift:  in lowerUINT_TO_FP_vXi32()
19753   // -- v >> 16  in lowerUINT_TO_FP_vXi32()
19764   SDValue Low, High;  in lowerUINT_TO_FP_vXi32()  local
19778     // High will be bitcasted right away, so do not bother bitcasting back to  in lowerUINT_TO_FP_vXi32()
19780     High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,  in lowerUINT_TO_FP_vXi32()
19789     High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);  in lowerUINT_TO_FP_vXi32()
19796   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);  in lowerUINT_TO_FP_vXi32()
19798   // constant, we avoid reassociation in MachineCombiner when unsafe-fp-math is  in lowerUINT_TO_FP_vXi32()
19800   SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);  in lowerUINT_TO_FP_vXi32()
19801   // TODO: Are there any fast-math-flags to propagate here?  in lowerUINT_TO_FP_vXi32()
19819   unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;  in lowerUINT_TO_FP_vec()
19839   bool IsStrict = Op->isStrictFPOpcode();  in LowerUINT_TO_FP()
19845   MVT DstVT = Op->getSimpleValueType(0);  in LowerUINT_TO_FP()
19869     // using VCVTUSI2SS/SD.  Same for i64 in 64-bit mode.  in LowerUINT_TO_FP()
19873   // Promote i32 to i64 and use a signed conversion on 64-bit targets.  in LowerUINT_TO_FP()
19887   // The transform for i64->f64 isn't correct for 0 when rounding to negative  in LowerUINT_TO_FP()
19888   // infinity. It produces -0.0, so disable under strictfp.  in LowerUINT_TO_FP()
19892   // The transform for i32->f64/f32 isn't correct for 0 when rounding to  in LowerUINT_TO_FP()
19901   // Make a 64-bit buffer, and use it to build an FILD.  in LowerUINT_TO_FP()
19903   int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();  in LowerUINT_TO_FP()
19924     // Bitcasting to f64 here allows us to do a single 64-bit store from  in LowerUINT_TO_FP()
19926     // with two 32-bit stores.  in LowerUINT_TO_FP()
19950   Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign();  in LowerUINT_TO_FP()
19965   // TODO: Are there any fast-math-flags to propagate here?  in LowerUINT_TO_FP()
19999   bool IsStrict = Op->isStrictFPOpcode();  in FP_TO_INTHelper()
20015   // used for the 32-bit subtarget, but also for f80 on a 64-bit target.  in FP_TO_INTHelper()
20021     // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.  in FP_TO_INTHelper()
20031   // We lower FP->int64 into FISTP64 followed by a load from a temporary  in FP_TO_INTHelper()
20052     //  FistSrc = (Value - FltOfs);  in FP_TO_INTHelper()
20053     //  Fist-to-mem64 FistSrc  in FP_TO_INTHelper()
20054     //  Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent  in FP_TO_INTHelper()
20055     //  to XOR'ing the high 32 bits with Adjust.  in FP_TO_INTHelper()
20116   // FIXME This causes a redundant load/store if the SSE-class value is already  in FP_TO_INTHelper()
20152   MVT VT = Op.getSimpleValueType();  in LowerAVXExtend()  local
20157   assert(VT.isVector() && InVT.isVector() && "Expected vector type");  in LowerAVXExtend()
20160   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&  in LowerAVXExtend()
20162   assert((VT.getVectorElementType() == MVT::i16 ||  in LowerAVXExtend()
20163           VT.getVectorElementType() == MVT::i32 ||  in LowerAVXExtend()
20164           VT.getVectorElementType() == MVT::i64) &&  in LowerAVXExtend()
20173   if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {  in LowerAVXExtend()
20174     assert(InVT == MVT::v32i8 && "Unexpected VT!");  in LowerAVXExtend()
20183   //   v8i16 -> v8i32  in LowerAVXExtend()
20184   //   Use vpmovzwd for 4 lower elements  v8i16 -> v4i32.  in LowerAVXExtend()
20185   //   Use vpunpckhwd for 4 upper elements  v8i16 -> v4i32.  in LowerAVXExtend()
20188   //   v4i32 -> v4i64  in LowerAVXExtend()
20189   //   Use vpmovzdq for 4 lower elements  v4i32 -> v2i64.  in LowerAVXExtend()
20190   //   Use vpunpckhdq for 4 upper elements  v4i32 -> v2i64.  in LowerAVXExtend()
20193   MVT HalfVT = VT.getHalfNumVectorElementsVT();  in LowerAVXExtend()
20196   // Short-circuit if we can determine that each 128-bit half is the same value.  in LowerAVXExtend()
20199     if (hasIdenticalHalvesShuffleMask(Shuf->getMask()))  in LowerAVXExtend()
20200       return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo);  in LowerAVXExtend()
20208   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);  in LowerAVXExtend()
20212 static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In,  in SplitAndExtendv16i1()  argument
20214   assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT.");  in SplitAndExtendv16i1()
20222   return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in SplitAndExtendv16i1()
20228   MVT VT = Op->getSimpleValueType(0);  in LowerZERO_EXTEND_Mask()  local
20229   SDValue In = Op->getOperand(0);  in LowerZERO_EXTEND_Mask()
20232   unsigned NumElts = VT.getVectorNumElements();  in LowerZERO_EXTEND_Mask()
20236   if (VT.getVectorElementType() != MVT::i8) {  in LowerZERO_EXTEND_Mask()
20237     SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);  in LowerZERO_EXTEND_Mask()
20238     return DAG.getNode(ISD::SRL, DL, VT, Extend,  in LowerZERO_EXTEND_Mask()
20239                        DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));  in LowerZERO_EXTEND_Mask()
20242   // Extend VT if BWI is not supported.  in LowerZERO_EXTEND_Mask()
20243   MVT ExtVT = VT;  in LowerZERO_EXTEND_Mask()
20247       return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG);  in LowerZERO_EXTEND_Mask()
20252   // Widen to 512-bits if VLX is not supported.  in LowerZERO_EXTEND_Mask()
20269   if (VT != ExtVT) {  in LowerZERO_EXTEND_Mask()
20274   // Extract back to 128/256-bit if we widened.  in LowerZERO_EXTEND_Mask()
20275   if (WideVT != VT)  in LowerZERO_EXTEND_Mask()
20276     SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal,  in LowerZERO_EXTEND_Mask()
20298 /// AVX2 (Int256) sub-targets require extra shuffling as the PACK*S operates
20299 /// within each 128-bit lane.
20305   assert(DstVT.isVector() && "VT not a vector?");  in truncateVectorWithPACK()
20331   // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB.  in truncateVectorWithPACK()
20339   // Sub-128-bit truncation - widen to 128-bit src and pack in the lower half.  in truncateVectorWithPACK()
20340   // On pre-AVX512, pack the src in both halves to help value tracking.  in truncateVectorWithPACK()
20369   // 256bit -> 128bit truncate - PACK lower/upper 128-bit subvectors.  in truncateVectorWithPACK()
20377   // AVX2: 512bit -> 256bit truncate - PACK lower/upper 256-bit subvectors.  in truncateVectorWithPACK()
20378   // AVX2: 512bit -> 128bit truncate - PACK(PACK, PACK).  in truncateVectorWithPACK()
20384     // 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),  in truncateVectorWithPACK()
20395     // If 512bit -> 128bit truncate another stage.  in truncateVectorWithPACK()
20401   assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater");  in truncateVectorWithPACK()
20404     // Avoid CONCAT_VECTORS on sub-128bit nodes as these can fail after  in truncateVectorWithPACK()
20419 /// e.g. trunc <8 x i32> X to <8 x i16> -->
20420 /// MaskX = X & 0xffff (clear high bits to prevent saturation)
20464   // Truncation from 128-bit to vXi32 can be better handled with PSHUFD.  in matchTruncateWithPACK()
20465   // Truncation to sub-64-bit vXi16 can be better handled with PSHUFD/PSHUFLW.  in matchTruncateWithPACK()
20472   // Prefer to lower v4i64 -> v4i32 as a shuffle unless we can cheaply  in matchTruncateWithPACK()
20489   // Pre-SSE41 we can only use PACKUSWB.  in matchTruncateWithPACK()
20491   if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) {  in matchTruncateWithPACK()
20496   // Truncate with PACKSS if we are truncating a vector with sign-bits  in matchTruncateWithPACK()
20501   // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with  in matchTruncateWithPACK()
20509   unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits;  in matchTruncateWithPACK()
20518   if (In.getOpcode() == ISD::SRL && In->hasOneUse())  in matchTruncateWithPACK()
20522         return DAG.getNode(ISD::SRA, DL, SrcVT, In->ops());  in matchTruncateWithPACK()
20529 /// This function lowers a vector truncation of 'extended sign-bits' or
20530 /// 'extended zero-bits' values.
20598   // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS  in LowerTruncateVecPack()
20599   // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to  in LowerTruncateVecPack()
20607   // Special case vXi64 -> vXi16, shuffle to vXi32 and then use PACKSS.  in LowerTruncateVecPack()
20620   MVT VT = Op.getSimpleValueType();  in LowerTruncateVecI1()  local
20623   assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type.");  in LowerTruncateVecI1()
20626   unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;  in LowerTruncateVecI1()
20639       return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT),  in LowerTruncateVecI1()
20650     // trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors  in LowerTruncateVecI1()
20654     // directly, so we need to shuffle high elements to low and use  in LowerTruncateVecI1()
20662             {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});  in LowerTruncateVecI1()
20665         assert(InVT == MVT::v16i16 && "Unexpected VT!");  in LowerTruncateVecI1()
20673       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);  in LowerTruncateVecI1()
20675     // We either have 8 elements or we're allowed to use 512-bit vectors.  in LowerTruncateVecI1()
20682     ShiftInx = InVT.getScalarSizeInBits() - 1;  in LowerTruncateVecI1()
20692     return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT);  in LowerTruncateVecI1()
20693   return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE);  in LowerTruncateVecI1()
20698   MVT VT = Op.getSimpleValueType();  in LowerTRUNCATE()  local
20701   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&  in LowerTRUNCATE()
20706   if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(InVT)) {  in LowerTRUNCATE()
20708         VT.is128BitVector() && Subtarget.hasAVX512()) {  in LowerTRUNCATE()
20712       // truncate the remainder. We'd rather produce two 64-bit results and  in LowerTRUNCATE()
20718       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in LowerTRUNCATE()
20722       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);  in LowerTRUNCATE()
20725     // Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS.  in LowerTRUNCATE()
20727         (InVT.is512BitVector() && VT.is256BitVector()))  in LowerTRUNCATE()
20729               LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG))  in LowerTRUNCATE()
20732     // Pre-AVX512 see if we can make use of PACKSS/PACKUS.  in LowerTRUNCATE()
20734       return LowerTruncateVecPack(VT, In, DL, Subtarget, DAG);  in LowerTRUNCATE()
20740   if (VT.getVectorElementType() == MVT::i1)  in LowerTRUNCATE()
20747             LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG))  in LowerTRUNCATE()
20753       assert(VT == MVT::v32i8 && "Unexpected VT!");  in LowerTRUNCATE()
20759     // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be  in LowerTRUNCATE()
20767   assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");  in LowerTRUNCATE()
20769   if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {  in LowerTRUNCATE()
20770     // On AVX2, v4i64 -> v4i32 becomes VPERMD.  in LowerTRUNCATE()
20772       static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};  in LowerTRUNCATE()
20775       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,  in LowerTRUNCATE()
20784     return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo),  in LowerTRUNCATE()
20788   if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {  in LowerTRUNCATE()
20789     // On AVX2, v8i32 -> v8i16 becomes PSHUFB.  in LowerTRUNCATE()
20793                                       -1, -1, -1, -1, -1, -1, -1, -1,  in LowerTRUNCATE()
20795                                       -1, -1, -1, -1, -1, -1, -1, -1 };  in LowerTRUNCATE()
20800       static const int ShufMask2[] = {0, 2, -1, -1};  in LowerTRUNCATE()
20808                ? truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG)  in LowerTRUNCATE()
20809                : truncateVectorWithPACKSS(VT, In, DL, Subtarget, DAG);  in LowerTRUNCATE()
20812   if (VT == MVT::v16i8 && InVT == MVT::v16i16)  in LowerTRUNCATE()
20813     return truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG);  in LowerTRUNCATE()
20815   llvm_unreachable("All 256->128 cases should have been handled above!");  in LowerTRUNCATE()
20820 static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl,  in expandFP_TO_UINT_SSE()  argument
20824   unsigned DstBits = VT.getScalarSizeInBits();  in expandFP_TO_UINT_SSE()
20825   assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported");  in expandFP_TO_UINT_SSE()
20828   // 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big").  in expandFP_TO_UINT_SSE()
20829   SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src);  in expandFP_TO_UINT_SSE()
20831       DAG.getNode(X86ISD::CVTTP2SI, dl, VT,  in expandFP_TO_UINT_SSE()
20842   // AVX1 can't use the signsplat masking for 256-bit vectors - we have to  in expandFP_TO_UINT_SSE()
20844   if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) {  in expandFP_TO_UINT_SSE()
20845     SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big);  in expandFP_TO_UINT_SSE()
20846     return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small);  in expandFP_TO_UINT_SSE()
20850       DAG.getNode(X86ISD::VSRAI, dl, VT, Small,  in expandFP_TO_UINT_SSE()
20851                   DAG.getTargetConstant(DstBits - 1, dl, MVT::i8));  in expandFP_TO_UINT_SSE()
20852   return DAG.getNode(ISD::OR, dl, VT, Small,  in expandFP_TO_UINT_SSE()
20853                      DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown));  in expandFP_TO_UINT_SSE()
20857   bool IsStrict = Op->isStrictFPOpcode();  in LowerFP_TO_INT()
20860   MVT VT = Op->getSimpleValueType(0);  in LowerFP_TO_INT()  local
20862   SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();  in LowerFP_TO_INT()
20868     MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;  in LowerFP_TO_INT()
20870       return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},  in LowerFP_TO_INT()
20873     return DAG.getNode(Op.getOpcode(), dl, VT,  in LowerFP_TO_INT()
20875   } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) {  in LowerFP_TO_INT()
20879   if (VT.isVector()) {  in LowerFP_TO_INT()
20880     if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {  in LowerFP_TO_INT()
20891         // Widen to 512-bits.  in LowerFP_TO_INT()
20897         // TODO: Should we just do this for non-strict as well?  in LowerFP_TO_INT()
20919       if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)  in LowerFP_TO_INT()
20922       MVT ResVT = VT;  in LowerFP_TO_INT()
20923       MVT EleVT = VT.getVectorElementType();  in LowerFP_TO_INT()
20951       if (ResVT != VT)  in LowerFP_TO_INT()
20952         Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,  in LowerFP_TO_INT()
20960     // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first.  in LowerFP_TO_INT()
20961     if (VT.getVectorElementType() == MVT::i16) {  in LowerFP_TO_INT()
20965       MVT NVT = VT.changeVectorElementType(MVT::i32);  in LowerFP_TO_INT()
20977       Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in LowerFP_TO_INT()
20984     // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.  in LowerFP_TO_INT()
20985     if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {  in LowerFP_TO_INT()
20991     // Widen vXi32 fp_to_uint with avx512f to 512-bit source.  in LowerFP_TO_INT()
20992     if ((VT == MVT::v4i32 || VT == MVT::v8i32) &&  in LowerFP_TO_INT()
21001       // TODO: Should we just do this for non-strict as well?  in LowerFP_TO_INT()
21015       Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,  in LowerFP_TO_INT()
21023     // Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source.  in LowerFP_TO_INT()
21024     if ((VT == MVT::v2i64 || VT == MVT::v4i64) &&  in LowerFP_TO_INT()
21031       // TODO: Should we just do this for non-strict as well?  in LowerFP_TO_INT()
21045       Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,  in LowerFP_TO_INT()
21053     if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {  in LowerFP_TO_INT()
21055         // Non-strict nodes without VLX can we widened to v4f32->v4i64 by type  in LowerFP_TO_INT()
21077         return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp});  in LowerFP_TO_INT()
21080       return DAG.getNode(Opc, dl, VT, Tmp);  in LowerFP_TO_INT()
21085     if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) ||  in LowerFP_TO_INT()
21086         (VT == MVT::v4i32 && SrcVT == MVT::v4f64) ||  in LowerFP_TO_INT()
21087         (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) {  in LowerFP_TO_INT()
21089       return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget);  in LowerFP_TO_INT()
21095   assert(!VT.isVector());  in LowerFP_TO_INT()
21106     if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) ||  in LowerFP_TO_INT()
21107                       (VT == MVT::i64 && Subtarget.is64Bit()))) {  in LowerFP_TO_INT()
21108       unsigned DstBits = VT.getScalarSizeInBits();  in LowerFP_TO_INT()
21111                                         DAG.getConstant(UIntLimit, dl, VT));  in LowerFP_TO_INT()
21115       // (i32) 0 to 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big").  in LowerFP_TO_INT()
21116       // (i64) 0 to 2^63-1 ("Small") and from 2^63 to 2^64-1 ("Big").  in LowerFP_TO_INT()
21118           DAG.getNode(X86ISD::CVTTS2SI, dl, VT,  in LowerFP_TO_INT()
21121           X86ISD::CVTTS2SI, dl, VT,  in LowerFP_TO_INT()
21132           ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8));  in LowerFP_TO_INT()
21133       return DAG.getNode(ISD::OR, dl, VT, Small,  in LowerFP_TO_INT()
21134                          DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown));  in LowerFP_TO_INT()
21138     if (VT == MVT::i64)  in LowerFP_TO_INT()
21141     assert(VT == MVT::i32 && "Unexpected VT!");  in LowerFP_TO_INT()
21143     // Promote i32 to i64 and use a signed operation on 64-bit targets.  in LowerFP_TO_INT()
21154       Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in LowerFP_TO_INT()
21169   if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {  in LowerFP_TO_INT()
21178     Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in LowerFP_TO_INT()
21192       LC = RTLIB::getFPTOSINT(SrcVT, VT);  in LowerFP_TO_INT()
21194       LC = RTLIB::getFPTOUINT(SrcVT, VT);  in LowerFP_TO_INT()
21198         makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain);  in LowerFP_TO_INT()
21237   EVT DstVT = N->getValueType(0);  in LRINT_LLRINTHelper()
21238   SDValue Src = N->getOperand(0);  in LRINT_LLRINTHelper()
21256   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();  in LRINT_LLRINTHelper()
21285   bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;  in LowerFP_TO_INT_SAT()
21288   SDValue Src = Node->getOperand(0);  in LowerFP_TO_INT_SAT()
21295   EVT DstVT = Node->getValueType(0);  in LowerFP_TO_INT_SAT()
21303   EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();  in LowerFP_TO_INT_SAT()
21316   // Promote conversions to unsigned 32-bit to 64-bit, because it will allow  in LowerFP_TO_INT_SAT()
21329   // floating-point values.  in LowerFP_TO_INT_SAT()
21431   bool IsStrict = Op->isStrictFPOpcode();  in LowerFP_EXTEND()
21434   MVT VT = Op.getSimpleValueType();  in LowerFP_EXTEND()  local
21439   // Let f16->f80 get lowered to a libcall, except for darwin, where we should  in LowerFP_EXTEND()
21441   if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80 &&  in LowerFP_EXTEND()
21453     if (VT != MVT::f32) {  in LowerFP_EXTEND()
21456             ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other},  in LowerFP_EXTEND()
21460       return DAG.getNode(ISD::FP_EXTEND, DL, VT,  in LowerFP_EXTEND()
21468       assert(VT == MVT::f32 && SVT == MVT::f16 && "unexpected extend libcall");  in LowerFP_EXTEND()
21470       // Need a libcall, but ABI for f16 is soft-float on MacOS.  in LowerFP_EXTEND()
21487           CallingConv::C, EVT(VT).getTypeForEVT(*DAG.getContext()), Callee,  in LowerFP_EXTEND()
21531       return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},  in LowerFP_EXTEND()
21532                          {Op->getOperand(0), Res});  in LowerFP_EXTEND()
21533     return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);  in LowerFP_EXTEND()
21534   } else if (VT == MVT::v4f64 || VT == MVT::v8f64) {  in LowerFP_EXTEND()
21543     return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},  in LowerFP_EXTEND()
21544                        {Op->getOperand(0), Res});  in LowerFP_EXTEND()
21545   return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);  in LowerFP_EXTEND()
21549   bool IsStrict = Op->isStrictFPOpcode();  in LowerFP_ROUND()
21554   MVT VT = Op.getSimpleValueType();  in LowerFP_ROUND()  local
21557   if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80))  in LowerFP_ROUND()
21560   if (VT == MVT::f16 && (SVT == MVT::f64 || SVT == MVT::f32) &&  in LowerFP_ROUND()
21596   if (VT.getScalarType() == MVT::bf16) {  in LowerFP_ROUND()
21604   if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {  in LowerFP_ROUND()
21608     if (VT.isVector())  in LowerFP_ROUND()
21622       // FIXME: Should we use zeros for upper elements for non-strict?  in LowerFP_ROUND()
21641   bool IsStrict = Op->isStrictFPOpcode();  in LowerFP16_TO_FP()
21644          "Unexpected VT!");  in LowerFP16_TO_FP()
21670   bool IsStrict = Op->isStrictFPOpcode();  in LowerFP_TO_FP16()
21673          "Unexpected VT!");  in LowerFP_TO_FP16()
21686     // FIXME: Should we use zeros for upper elements for non-strict?  in LowerFP_TO_FP16()
21754   // clang-format off  in lowerAddSubToHorizontalOp()
21761   // clang-format on  in lowerAddSubToHorizontalOp()
21780   // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit  in lowerAddSubToHorizontalOp()
21781   // equivalent, so extract the 256/512-bit source op to 128-bit if we can.  in lowerAddSubToHorizontalOp()
21788   // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0  in lowerAddSubToHorizontalOp()
21789   // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0  in lowerAddSubToHorizontalOp()
21790   // add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1  in lowerAddSubToHorizontalOp()
21791   // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0  in lowerAddSubToHorizontalOp()
21812   MVT VT = Op.getSimpleValueType();  in LowerFROUND()  local
21815   const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);  in LowerFROUND()
21821   SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT,  in LowerFROUND()
21822                               DAG.getConstantFP(Point5Pred, dl, VT), N0);  in LowerFROUND()
21823   N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder);  in LowerFROUND()
21826   return DAG.getNode(ISD::FTRUNC, dl, VT, N0);  in LowerFROUND()
21840     for (SDNode *User : Op->uses())  in LowerFABSorFNEG()
21841       if (User->getOpcode() == ISD::FNEG)  in LowerFABSorFNEG()
21845   MVT VT = Op.getSimpleValueType();  in LowerFABSorFNEG()  local
21847   bool IsF128 = (VT == MVT::f128);  in LowerFABSorFNEG()
21848   assert(VT.isFloatingPoint() && VT != MVT::f80 &&  in LowerFABSorFNEG()
21849          DAG.getTargetLoweringInfo().isTypeLegal(VT) &&  in LowerFABSorFNEG()
21853   // decide if we should generate a 16-byte constant mask when we only need 4 or  in LowerFABSorFNEG()
21857   // generate a 16-byte vector constant and logic op even for the scalar case.  in LowerFABSorFNEG()
21858   // Using a 16-byte mask allows folding the load of the mask with  in LowerFABSorFNEG()
21860   bool IsFakeVector = !VT.isVector() && !IsF128;  in LowerFABSorFNEG()
21861   MVT LogicVT = VT;  in LowerFABSorFNEG()
21863     LogicVT = (VT == MVT::f64)   ? MVT::v2f64  in LowerFABSorFNEG()
21864               : (VT == MVT::f32) ? MVT::v4f32  in LowerFABSorFNEG()
21867   unsigned EltBits = VT.getScalarSizeInBits();  in LowerFABSorFNEG()
21871   const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);  in LowerFABSorFNEG()
21881   if (VT.isVector() || IsF128)  in LowerFABSorFNEG()
21884   // For the scalar case extend to a 128-bit vector, perform the logic op,  in LowerFABSorFNEG()
21888   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,  in LowerFABSorFNEG()
21898   MVT VT = Op.getSimpleValueType();  in LowerFCOPYSIGN()  local
21899   if (Sign.getSimpleValueType().bitsLT(VT))  in LowerFCOPYSIGN()
21900     Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);  in LowerFCOPYSIGN()
21903   if (Sign.getSimpleValueType().bitsGT(VT))  in LowerFCOPYSIGN()
21904     Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign,  in LowerFCOPYSIGN()
21909   bool IsF128 = (VT == MVT::f128);  in LowerFCOPYSIGN()
21910   assert(VT.isFloatingPoint() && VT != MVT::f80 &&  in LowerFCOPYSIGN()
21911          DAG.getTargetLoweringInfo().isTypeLegal(VT) &&  in LowerFCOPYSIGN()
21914   const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);  in LowerFCOPYSIGN()
21916   // Perform all scalar logic operations as 16-byte vectors because there are no  in LowerFCOPYSIGN()
21921   bool IsFakeVector = !VT.isVector() && !IsF128;  in LowerFCOPYSIGN()
21922   MVT LogicVT = VT;  in LowerFCOPYSIGN()
21924     LogicVT = (VT == MVT::f64)   ? MVT::v2f64  in LowerFCOPYSIGN()
21925               : (VT == MVT::f32) ? MVT::v4f32  in LowerFCOPYSIGN()
21929   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in LowerFCOPYSIGN()
21945     APFloat APF = Op0CN->getValueAPF();  in LowerFCOPYSIGN()
21957   return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,  in LowerFCOPYSIGN()
21964   MVT VT = Op.getSimpleValueType();  in LowerFGETSIGN()  local
21974   Res = DAG.getZExtOrTrunc(Res, dl, VT);  in LowerFGETSIGN()
21975   Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT));  in LowerFGETSIGN()
21982   // instruction.  Since the shift amount is in-range-or-undefined, we know  in getBT()
21993   // See if we can use the 32-bit instruction instead of the 64-bit one for a  in getBT()
22002   // BT ignores high bits (like shifts) we can use anyextend.  in getBT()
22007     if (BitNo.getOpcode() == ISD::AND && BitNo->hasOneUse())  in getBT()
22066 /// Try to map a 128-bit or larger integer comparison to vector instructions
22068 static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,  in combineVectorSizedSetCCEquality()  argument
22083   // logically-combined vector-sized operands compared to zero. This pattern may  in combineVectorSizedSetCCEquality()
22100   // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.  in combineVectorSizedSetCCEquality()
22101   // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.  in combineVectorSizedSetCCEquality()
22142     auto ScalarToVector = [&](SDValue X) -> SDValue {  in combineVectorSizedSetCCEquality()
22170       // This is a bitwise-combined equality comparison of 2 pairs of vectors:  in combineVectorSizedSetCCEquality()
22191       return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp),  in combineVectorSizedSetCCEquality()
22200       return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0));  in combineVectorSizedSetCCEquality()
22203     // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq  in combineVectorSizedSetCCEquality()
22204     // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne  in combineVectorSizedSetCCEquality()
22206            "Non 128-bit vector on pre-SSE41 target");  in combineVectorSizedSetCCEquality()
22209     return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);  in combineVectorSizedSetCCEquality()
22217 /// are supported when the pointer SrcMask is non-null.
22218 /// TODO - move this to SelectionDAG?
22224   EVT VT = MVT::Other;  in matchScalarReduction()  local
22236     if (I->getOpcode() == unsigned(BinOp)) {  in matchScalarReduction()
22237       Opnds.push_back(I->getOperand(0));  in matchScalarReduction()
22238       Opnds.push_back(I->getOperand(1));  in matchScalarReduction()
22239       // Re-evaluate the number of nodes to be traversed.  in matchScalarReduction()
22244     // Quit if a non-EXTRACT_VECTOR_ELT  in matchScalarReduction()
22245     if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)  in matchScalarReduction()
22249     auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1));  in matchScalarReduction()
22253     SDValue Src = I->getOperand(0);  in matchScalarReduction()
22256       VT = Src.getValueType();  in matchScalarReduction()
22258       if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType())  in matchScalarReduction()
22260       unsigned NumElts = VT.getVectorNumElements();  in matchScalarReduction()
22267     unsigned CIdx = Idx->getZExtValue();  in matchScalarReduction()
22268     if (M->second[CIdx])  in matchScalarReduction()
22270     M->second.setBit(CIdx);  in matchScalarReduction()
22276       SrcMask->push_back(SrcOpMap[SrcOp]);  in matchScalarReduction()
22292   EVT VT = LHS.getValueType();  in LowerVectorAllEqual()  local
22293   unsigned ScalarSize = VT.getScalarSizeInBits();  in LowerVectorAllEqual()
22299   // Quit if not convertable to legal scalar or 128/256-bit vector.  in LowerVectorAllEqual()
22300   if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))  in LowerVectorAllEqual()
22303   // FCMP may use ISD::SETNE when nnan - early out if we manage to get here.  in LowerVectorAllEqual()
22304   if (VT.isFloatingPoint())  in LowerVectorAllEqual()
22320   // For sub-128-bit vector, cast to (legal) integer and compare with zero.  in LowerVectorAllEqual()
22321   if (VT.getSizeInBits() < 128) {  in LowerVectorAllEqual()
22322     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());  in LowerVectorAllEqual()
22343   // Without PTEST, a masked v2i64 or-reduction is not faster than  in LowerVectorAllEqual()
22350   // Split down to 128/256/512-bit vector.  in LowerVectorAllEqual()
22358     VT = EVT::getVectorVT(*DAG.getContext(), MVT::i64, VT.getSizeInBits() / 64);  in LowerVectorAllEqual()
22359     LHS = DAG.getBitcast(VT, LHS);  in LowerVectorAllEqual()
22360     RHS = DAG.getBitcast(VT, RHS);  in LowerVectorAllEqual()
22364   if (VT.getSizeInBits() > TestSize) {  in LowerVectorAllEqual()
22367       // If ICMP(AND(LHS,MASK),MASK) - reduce using AND splits.  in LowerVectorAllEqual()
22368       while (VT.getSizeInBits() > TestSize) {  in LowerVectorAllEqual()
22370         VT = Split.first.getValueType();  in LowerVectorAllEqual()
22371         LHS = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second);  in LowerVectorAllEqual()
22373       RHS = DAG.getAllOnesConstant(DL, VT);  in LowerVectorAllEqual()
22376       // ALLOF(CMPEQ(X,Y)) -> AND(CMPEQ(X[0],Y[0]),CMPEQ(X[1],Y[1]),....)  in LowerVectorAllEqual()
22378       VT = MVT::getVectorVT(SVT, VT.getSizeInBits() / SVT.getSizeInBits());  in LowerVectorAllEqual()
22379       LHS = DAG.getBitcast(VT, MaskBits(LHS));  in LowerVectorAllEqual()
22380       RHS = DAG.getBitcast(VT, MaskBits(RHS));  in LowerVectorAllEqual()
22381       EVT BoolVT = VT.changeVectorElementType(MVT::i1);  in LowerVectorAllEqual()
22383       V = DAG.getSExtOrTrunc(V, DL, VT);  in LowerVectorAllEqual()
22384       while (VT.getSizeInBits() > TestSize) {  in LowerVectorAllEqual()
22386         VT = Split.first.getValueType();  in LowerVectorAllEqual()
22387         V = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second);  in LowerVectorAllEqual()
22389       V = DAG.getNOT(DL, V, VT);  in LowerVectorAllEqual()
22395       SDValue V = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);  in LowerVectorAllEqual()
22396       while (VT.getSizeInBits() > TestSize) {  in LowerVectorAllEqual()
22398         VT = Split.first.getValueType();  in LowerVectorAllEqual()
22399         V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);  in LowerVectorAllEqual()
22402       RHS = DAG.getConstant(0, DL, VT);  in LowerVectorAllEqual()
22406   if (UseKORTEST && VT.is512BitVector()) {  in LowerVectorAllEqual()
22407     MVT TestVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);  in LowerVectorAllEqual()
22416     MVT TestVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);  in LowerVectorAllEqual()
22423   assert(VT.getSizeInBits() == 128 && "Failure to split to 128-bits");  in LowerVectorAllEqual()
22434 // Check whether an AND/OR'd reduction tree is PTEST-able, or if we can fallback
22449   if (!Subtarget.hasSSE2() || !Op->hasOneUse())  in MatchVectorAllEqualTest()
22452   // Check whether we're masking/truncating an OR-reduction result, in which  in MatchVectorAllEqualTest()
22467         Mask = Cst->getAPIntValue();  in MatchVectorAllEqualTest()
22478   // Match icmp(and(extract(X,0),extract(X,1)),-1) allof reduction patterns.  in MatchVectorAllEqualTest()
22481     EVT VT = VecIns[0].getValueType();  in MatchVectorAllEqualTest()  local
22483                         [VT](SDValue V) { return VT == V.getValueType(); }) &&  in MatchVectorAllEqualTest()
22486     // Quit if not splittable to scalar/128/256/512-bit vector.  in MatchVectorAllEqualTest()
22487     if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))  in MatchVectorAllEqualTest()
22492     for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;  in MatchVectorAllEqualTest()
22498       VecIns.push_back(DAG.getNode(LogicOp, DL, VT, LHS, RHS));  in MatchVectorAllEqualTest()
22502                                CmpNull ? DAG.getConstant(0, DL, VT)  in MatchVectorAllEqualTest()
22503                                        : DAG.getAllOnesConstant(DL, VT),  in MatchVectorAllEqualTest()
22508   // Match icmp(reduce_and(X),-1) allof reduction patterns.  in MatchVectorAllEqualTest()
22528       // Match icmp(bitcast(icmp_eq(X,Y)),-1) reduction patterns.  in MatchVectorAllEqualTest()
22533         ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get();  in MatchVectorAllEqualTest()
22542       // Match icmp(bitcast(vXi1 trunc(Y)),-1) reduction patterns.  in MatchVectorAllEqualTest()
22564   for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;  in hasNonFlagsUse()
22568     if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {  in hasNonFlagsUse()
22570       UOpNo = User->use_begin().getOperandNo();  in hasNonFlagsUse()
22571       User = *User->use_begin();  in hasNonFlagsUse()
22574     if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&  in hasNonFlagsUse()
22575         !(User->getOpcode() == ISD::SELECT && UOpNo == 0))  in hasNonFlagsUse()
22581 // Transform to an x86-specific ALU node with flags if there is a chance of
22585   for (SDNode *U : Op->uses())  in isProfitableToUseFlagOp()
22586     if (U->getOpcode() != ISD::CopyToReg &&  in isProfitableToUseFlagOp()
22587         U->getOpcode() != ISD::SETCC &&  in isProfitableToUseFlagOp()
22588         U->getOpcode() != ISD::STORE)  in isProfitableToUseFlagOp()
22614     switch (Op->getOpcode()) {  in EmitTest()
22619       if (Op.getNode()->getFlags().hasNoSignedWrap())  in EmitTest()
22644   // non-casted variable when we check for possible users.  in EmitTest()
22660     // Otherwise use a regular EFLAGS-setting instruction.  in EmitTest()
22662     // clang-format off  in EmitTest()
22669     // clang-format on  in EmitTest()
22684     return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0),  in EmitTest()
22685                        Op->getOperand(1)).getValue(1);  in EmitTest()
22697   SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands);  in EmitTest()
22715           CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");  in EmitCmp()
22726     // Don't do this if the immediate can fit in 8-bits.  in EmitCmp()
22727     if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||  in EmitCmp()
22728         (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {  in EmitCmp()
22750   // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)?  in EmitCmp()
22760   // 0-x == y --> x+y == 0  in EmitCmp()
22761   // 0-x != y --> x+y != 0  in EmitCmp()
22769   // x == 0-y --> x+y == 0  in EmitCmp()
22770   // x != 0-y --> x+y != 0  in EmitCmp()
22785                                                           EVT VT) const {  in isXAndYEqZeroPreferableToXAndYEqY()
22786   return !VT.isVector() || Cond != ISD::CondCode::SETEQ;  in isXAndYEqZeroPreferableToXAndYEqY()
22791   if (N->getOpcode() == ISD::FDIV)  in optimizeFMulOrFDivAsShiftAddBitcast()
22794   EVT FPVT = N->getValueType(0);  in optimizeFMulOrFDivAsShiftAddBitcast()
22797   // This indicates a non-free bitcast.  in optimizeFMulOrFDivAsShiftAddBitcast()
22799   // integer vector anyways for the int->fp cast.  in optimizeFMulOrFDivAsShiftAddBitcast()
22809   EVT VT = Op.getValueType();  in isFsqrtCheap()  local
22812   if (VT.getScalarType() == MVT::f16)  in isFsqrtCheap()
22816   if (DAG.doesNodeExist(X86ISD::FRSQRT, DAG.getVTList(VT), Op))  in isFsqrtCheap()
22819   if (VT.isVector())  in isFsqrtCheap()
22824 /// The minimum architected relative accuracy is 2^-12. We need one
22825 /// Newton-Raphson step to have a good float result (24 bits of precision).
22832   EVT VT = Op.getValueType();  in getSqrtEstimate()  local
22834   // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.  in getSqrtEstimate()
22835   // It is likely not profitable to do this for f64 because a double-precision  in getSqrtEstimate()
22842   if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||  in getSqrtEstimate()
22843       (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) ||  in getSqrtEstimate()
22844       (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) ||  in getSqrtEstimate()
22845       (VT == MVT::v8f32 && Subtarget.hasAVX()) ||  in getSqrtEstimate()
22846       (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) {  in getSqrtEstimate()
22851     // There is no FSQRT for 512-bits, but there is RSQRT14.  in getSqrtEstimate()
22852     unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;  in getSqrtEstimate()
22853     SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op);  in getSqrtEstimate()
22855       Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate);  in getSqrtEstimate()
22859   if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&  in getSqrtEstimate()
22865     if (VT == MVT::f16) {  in getSqrtEstimate()
22873     return DAG.getNode(X86ISD::RSQRT14, DL, VT, Op);  in getSqrtEstimate()
22878 /// The minimum architected relative accuracy is 2^-12. We need one
22879 /// Newton-Raphson step to have a good float result (24 bits of precision).
22884   EVT VT = Op.getValueType();  in getRecipEstimate()  local
22886   // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.  in getRecipEstimate()
22887   // It is likely not profitable to do this for f64 because a double-precision  in getRecipEstimate()
22893   if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||  in getRecipEstimate()
22894       (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||  in getRecipEstimate()
22895       (VT == MVT::v8f32 && Subtarget.hasAVX()) ||  in getRecipEstimate()
22896       (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) {  in getRecipEstimate()
22899     // real-world code. These defaults are intended to match GCC behavior.  in getRecipEstimate()
22900     if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)  in getRecipEstimate()
22906     // There is no FSQRT for 512-bits, but there is RCP14.  in getRecipEstimate()
22907     unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP;  in getRecipEstimate()
22908     return DAG.getNode(Opcode, DL, VT, Op);  in getRecipEstimate()
22911   if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&  in getRecipEstimate()
22916     if (VT == MVT::f16) {  in getRecipEstimate()
22924     return DAG.getNode(X86ISD::RCP14, DL, VT, Op);  in getRecipEstimate()
22944   if (isIntDivCheap(N->getValueType(0), Attr))  in BuildSDIVPow2()
22956   EVT VT = N->getValueType(0);  in BuildSDIVPow2()  local
22958   if (VT != MVT::i16 && VT != MVT::i32 &&  in BuildSDIVPow2()
22959       !(Subtarget.is64Bit() && VT == MVT::i64))  in BuildSDIVPow2()
22962   // If the divisor is 2 or -2, the default expansion is better.  in BuildSDIVPow2()
22964       Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true))  in BuildSDIVPow2()
22993         if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth)  in LowerAndToBT()
23001     uint64_t AndRHSVal = AndRHS->getZExtValue();  in LowerAndToBT()
23039 // Check if pre-AVX condcode can be performed by a single FCMP op.
23044 /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
23052   //  0 - EQ  in translateX86FSETCC()
23053   //  1 - LT  in translateX86FSETCC()
23054   //  2 - LE  in translateX86FSETCC()
23055   //  3 - UNORD  in translateX86FSETCC()
23056   //  4 - NEQ  in translateX86FSETCC()
23057   //  5 - NLT  in translateX86FSETCC()
23058   //  6 - NLE  in translateX86FSETCC()
23059   //  7 - ORD  in translateX86FSETCC()
23061   // clang-format off  in translateX86FSETCC()
23083   // clang-format on  in translateX86FSETCC()
23107 /// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
23109 static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS,  in splitIntVSETCC()  argument
23112   assert(VT.isInteger() && VT == LHS.getValueType() &&  in splitIntVSETCC()
23113          VT == RHS.getValueType() && "Unsupported VTs!");  in splitIntVSETCC()
23127   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in splitIntVSETCC()
23128   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,  in splitIntVSETCC()
23138   MVT VT = Op.getSimpleValueType();  in LowerIntVSETCC_AVX512()  local
23139   assert(VT.getVectorElementType() == MVT::i1 &&  in LowerIntVSETCC_AVX512()
23142   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();  in LowerIntVSETCC_AVX512()
23150   return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);  in LowerIntVSETCC_AVX512()
23163   MVT VT = V.getSimpleValueType();  in incDecVectorConstant()  local
23164   MVT EltVT = VT.getVectorElementType();  in incDecVectorConstant()
23165   unsigned NumElts = VT.getVectorNumElements();  in incDecVectorConstant()
23169     auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));  in incDecVectorConstant()
23170     if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)  in incDecVectorConstant()
23174     const APInt &EltC = Elt->getAPIntValue();  in incDecVectorConstant()
23181     NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));  in incDecVectorConstant()
23184   return DAG.getBuildVector(VT, DL, NewVecC);  in incDecVectorConstant()
23191 static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,  in LowerVSETCCWithSUBUS()  argument
23198   MVT VET = VT.getVectorElementType();  in LowerVSETCCWithSUBUS()
23210     // Only do this pre-AVX since vpcmp* is no longer destructive.  in LowerVSETCCWithSUBUS()
23224     // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0  in LowerVSETCCWithSUBUS()
23233   // Psubus is better than flip-sign because it requires no inversion.  in LowerVSETCCWithSUBUS()
23241   SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);  in LowerVSETCCWithSUBUS()
23242   return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,  in LowerVSETCCWithSUBUS()
23243                      DAG.getConstant(0, dl, VT));  in LowerVSETCCWithSUBUS()
23253   MVT VT = Op->getSimpleValueType(0);  in LowerVSETCC()  local
23254   ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();  in LowerVSETCC()
23269     // compare like we do for non-strict, we might trigger spurious exceptions  in LowerVSETCC()
23272     if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&  in LowerVSETCC()
23276       unsigned Num = VT.getVectorNumElements();  in LowerVSETCC()
23283       // floating-point vector result that matches the operand type. This allows  in LowerVSETCC()
23285       VT = Op0.getSimpleValueType();  in LowerVSETCC()
23305             Opc, dl, {VT, MVT::Other},  in LowerVSETCC()
23311         SignalCmp->setFlags(Op->getFlags());  in LowerVSETCC()
23335               Opc, dl, {VT, MVT::Other},  in LowerVSETCC()
23338               Opc, dl, {VT, MVT::Other},  in LowerVSETCC()
23344               Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8));  in LowerVSETCC()
23346               Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8));  in LowerVSETCC()
23348         Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);  in LowerVSETCC()
23352               Opc, dl, {VT, MVT::Other},  in LowerVSETCC()
23357               Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));  in LowerVSETCC()
23365             Opc, dl, {VT, MVT::Other},  in LowerVSETCC()
23370             Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8));  in LowerVSETCC()
23373     if (VT.getFixedSizeInBits() >  in LowerVSETCC()
23377       EVT CastVT = EVT(VT).changeVectorElementTypeToInteger();  in LowerVSETCC()
23399   assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() &&  in LowerVSETCC()
23402   // The non-AVX512 code below works under the assumption that source and  in LowerVSETCC()
23404   assert((Subtarget.hasAVX512() || (VT == VTOp0)) &&  in LowerVSETCC()
23408   if (VT.getVectorElementType() == MVT::i1) {  in LowerVSETCC()
23409     // In AVX-512 architecture setcc returns mask with i1 elements,  in LowerVSETCC()
23417   if (VT.is128BitVector() && Subtarget.hasXOP()) {  in LowerVSETCC()
23421     // clang-format off  in LowerVSETCC()
23433     // clang-format on  in LowerVSETCC()
23440     return DAG.getNode(Opc, dl, VT, Op0, Op1,  in LowerVSETCC()
23444   // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.  in LowerVSETCC()
23452               BC0.getOperand(1), VT.getScalarSizeInBits(), UndefElts, EltBits,  in LowerVSETCC()
23456           Op1 = DAG.getBitcast(VT, BC0.getOperand(1));  in LowerVSETCC()
23462   // ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2.  in LowerVSETCC()
23466     if (C1 && C1->getAPIntValue().isPowerOf2()) {  in LowerVSETCC()
23467       unsigned BitWidth = VT.getScalarSizeInBits();  in LowerVSETCC()
23468       unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1;  in LowerVSETCC()
23471       Result = DAG.getNode(ISD::SHL, dl, VT, Result,  in LowerVSETCC()
23472                            DAG.getConstant(ShiftAmt, dl, VT));  in LowerVSETCC()
23473       Result = DAG.getNode(ISD::SRA, dl, VT, Result,  in LowerVSETCC()
23474                            DAG.getConstant(BitWidth - 1, dl, VT));  in LowerVSETCC()
23479   // Break 256-bit integer vector compare into smaller ones.  in LowerVSETCC()
23480   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerVSETCC()
23481     return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);  in LowerVSETCC()
23483   // Break 512-bit integer vector compare into smaller ones.  in LowerVSETCC()
23485   if (VT.is512BitVector())  in LowerVSETCC()
23486     return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);  in LowerVSETCC()
23489   // not-of-PCMPEQ:  in LowerVSETCC()
23490   // X != INT_MIN --> X >s INT_MIN  in LowerVSETCC()
23491   // X != INT_MAX --> X <s INT_MAX --> INT_MAX >s X  in LowerVSETCC()
23492   // +X != 0 --> +X >s 0  in LowerVSETCC()
23504   // If both operands are known non-negative, then an unsigned compare is the  in LowerVSETCC()
23515       TLI.isOperationLegal(ISD::UMIN, VT)) {  in LowerVSETCC()
23519       // X > C --> X >= (C+1) --> X == umax(X, C+1)  in LowerVSETCC()
23527       // X < C --> X <= (C-1) --> X == umin(X, C-1)  in LowerVSETCC()
23537     // clang-format off  in LowerVSETCC()
23543     // clang-format on  in LowerVSETCC()
23546     SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);  in LowerVSETCC()
23547     Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);  in LowerVSETCC()
23549     // If the logical-not of the result is required, perform that now.  in LowerVSETCC()
23551       Result = DAG.getNOT(dl, Result, VT);  in LowerVSETCC()
23559             LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG))  in LowerVSETCC()
23577   if (VT == MVT::v2i64) {  in LowerVSETCC()
23591         return DAG.getBitcast(VT, Result);  in LowerVSETCC()
23596         Op1 = DAG.getConstant(-1, dl, MVT::v4i32);  in LowerVSETCC()
23602         return DAG.getBitcast(VT, Result);  in LowerVSETCC()
23605       // If the i64 elements are sign-extended enough to be representable as i32  in LowerVSETCC()
23616         return DAG.getBitcast(VT, Result);  in LowerVSETCC()
23637       // Create masks for only the low parts/high parts of the 64 bit integers.  in LowerVSETCC()
23650       return DAG.getBitcast(VT, Result);  in LowerVSETCC()
23665       // Make sure the lower and upper halves are both all-ones.  in LowerVSETCC()
23673       return DAG.getBitcast(VT, Result);  in LowerVSETCC()
23680     MVT EltVT = VT.getVectorElementType();  in LowerVSETCC()
23682                                  VT);  in LowerVSETCC()
23683     Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM);  in LowerVSETCC()
23684     Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM);  in LowerVSETCC()
23687   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);  in LowerVSETCC()
23689   // If the logical-not of the result is required, perform that now.  in LowerVSETCC()
23691     Result = DAG.getNOT(dl, Result, VT);  in LowerVSETCC()
23708   MVT VT = Op0.getSimpleValueType();  in EmitAVX512Test()  local
23709   if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) &&  in EmitAVX512Test()
23710       !(Subtarget.hasDQI() && VT == MVT::v8i1) &&  in EmitAVX512Test()
23711       !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))  in EmitAVX512Test()
23725   if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1))  in EmitAVX512Test()
23727   if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))  in EmitAVX512Test()
23771     // Try to use PTEST/PMOVMSKB for a tree AND/ORs equality compared with -1/0.  in emitFlagsForSetcc()
23804       EVT VT = Op0.getValueType();  in emitFlagsForSetcc()  local
23805       if (VT == MVT::i32 || VT == MVT::i64 || Op0->hasOneUse()) {  in emitFlagsForSetcc()
23806         SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32);  in emitFlagsForSetcc()
23810                                   DAG.getConstant(0, dl, VT), Op0);  in emitFlagsForSetcc()
23816     // (seteq (add X, -1), -1). Similar for setne.  in emitFlagsForSetcc()
23845   MVT VT = Op->getSimpleValueType(0);  in LowerSETCC()  local
23847   if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);  in LowerSETCC()
23849   assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");  in LowerSETCC()
23855       cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();  in LowerSETCC()
23877     // Attempt to canonicalize SGT/UGT -> SGE/UGE compares with constant which  in LowerSETCC()
23880     // equivalent for SLE/ULE -> SLT/ULT isn't likely to happen as we already  in LowerSETCC()
23883     // encoding size - so it must either already be a i8 or i32 immediate, or it  in LowerSETCC()
23888       const APInt &Op1Val = Op1C->getAPIntValue();  in LowerSETCC()
23939   X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());  in LowerSETCCCARRY()
24012   assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!");  in LowerXALUO()
24013   return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC);  in LowerXALUO()
24038   return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));  in isTruncWithZeroHighBitsInput()
24047   MVT VT = Op1.getSimpleValueType();  in LowerSELECT()  local
24050   if (isSoftF16(VT, Subtarget)) {  in LowerSELECT()
24051     MVT NVT = VT.changeTypeToInteger();  in LowerSELECT()
24052     return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond,  in LowerSELECT()
24060   if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) &&  in LowerSELECT()
24061       VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {  in LowerSELECT()
24065         translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),  in LowerSELECT()
24072       assert(!VT.isVector() && "Not a scalar type?");  in LowerSELECT()
24073       return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);  in LowerSELECT()
24077       SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,  in LowerSELECT()
24096         MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;  in LowerSELECT()
24101         MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;  in LowerSELECT()
24106         return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,  in LowerSELECT()
24109       SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);  in LowerSELECT()
24110       SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);  in LowerSELECT()
24111       return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);  in LowerSELECT()
24116   if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) {  in LowerSELECT()
24118     return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);  in LowerSELECT()
24133   // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y  in LowerSELECT()
24134   // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y  in LowerSELECT()
24135   // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y  in LowerSELECT()
24136   // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y  in LowerSELECT()
24137   // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y  in LowerSELECT()
24138   // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y  in LowerSELECT()
24139   // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x  in LowerSELECT()
24140   // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x  in LowerSELECT()
24148     // Special handling for __builtin_ffs(X) - 1 pattern which looks like  in LowerSELECT()
24149     // (select (seteq X, 0), -1, (cttz_zero_undef X)). Disable the special  in LowerSELECT()
24157     if (Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) &&  in LowerSELECT()
24166       // 'X - 1' sets the carry flag if X == 0.  in LowerSELECT()
24167       // '0 - X' sets the carry flag if X != 0.  in LowerSELECT()
24168       // Convert the carry flag to a -1/0 mask with sbb:  in LowerSELECT()
24169       // select (X != 0), -1, Y --> 0 - X; or (sbb), Y  in LowerSELECT()
24170       // select (X == 0), Y, -1 --> 0 - X; or (sbb), Y  in LowerSELECT()
24171       // select (X != 0), Y, -1 --> X - 1; or (sbb), Y  in LowerSELECT()
24172       // select (X == 0), -1, Y --> X - 1; or (sbb), Y  in LowerSELECT()
24181       SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in LowerSELECT()
24184       return DAG.getNode(ISD::OR, DL, VT, SBB, Y);  in LowerSELECT()
24208         if (CmpSz > VT.getSizeInBits())  in LowerSELECT()
24209           Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);  in LowerSELECT()
24210         else if (CmpSz < VT.getSizeInBits())  in LowerSELECT()
24211           Neg = DAG.getNode(ISD::AND, DL, VT,  in LowerSELECT()
24212               DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),  in LowerSELECT()
24213               DAG.getConstant(1, DL, VT));  in LowerSELECT()
24216         SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1))  in LowerSELECT()
24217         SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z  in LowerSELECT()
24218         return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2);  // And Op y  in LowerSELECT()
24220     } else if ((VT == MVT::i32 || VT == MVT::i64) && isNullConstant(Op2) &&  in LowerSELECT()
24221                Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) &&  in LowerSELECT()
24224       // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x  in LowerSELECT()
24229       // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x  in LowerSELECT()
24230       unsigned ShCt = VT.getSizeInBits() - 1;  in LowerSELECT()
24231       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT);  in LowerSELECT()
24232       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt);  in LowerSELECT()
24234         Shift = DAG.getNOT(DL, Shift, VT);  in LowerSELECT()
24235       return DAG.getNode(ISD::AND, DL, VT, Shift, Op1);  in LowerSELECT()
24254     if (VT.isFloatingPoint() && !VT.isVector() &&  in LowerSELECT()
24255         !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV())  // FPStack?  in LowerSELECT()
24256       IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());  in LowerSELECT()
24275     // Look past the truncate if the high bits are known zero.  in LowerSELECT()
24296   // a <  b ? -1 :  0 -> RES = ~setcc_carry  in LowerSELECT()
24297   // a <  b ?  0 : -1 -> RES = setcc_carry  in LowerSELECT()
24298   // a >= b ? -1 :  0 -> RES = setcc_carry  in LowerSELECT()
24299   // a >= b ?  0 : -1 -> RES = ~setcc_carry  in LowerSELECT()
24301     unsigned CondCode = CC->getAsZExtVal();  in LowerSELECT()
24349   return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags());  in LowerSELECT()
24355   MVT VT = Op->getSimpleValueType(0);  in LowerSIGN_EXTEND_Mask()  local
24356   SDValue In = Op->getOperand(0);  in LowerSIGN_EXTEND_Mask()
24359   MVT VTElt = VT.getVectorElementType();  in LowerSIGN_EXTEND_Mask()
24360   unsigned NumElts = VT.getVectorNumElements();  in LowerSIGN_EXTEND_Mask()
24362   // Extend VT if the scalar type is i8/i16 and BWI is not supported.  in LowerSIGN_EXTEND_Mask()
24363   MVT ExtVT = VT;  in LowerSIGN_EXTEND_Mask()
24367       return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG);  in LowerSIGN_EXTEND_Mask()
24372   // Widen to 512-bits if VLX is not supported.  in LowerSIGN_EXTEND_Mask()
24388     SDValue NegOne = DAG.getConstant(-1, dl, WideVT);  in LowerSIGN_EXTEND_Mask()
24394   if (VT != ExtVT) {  in LowerSIGN_EXTEND_Mask()
24399   // Extract back to 128/256-bit if we widened.  in LowerSIGN_EXTEND_Mask()
24400   if (WideVT != VT)  in LowerSIGN_EXTEND_Mask()
24401     V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V,  in LowerSIGN_EXTEND_Mask()
24409   SDValue In = Op->getOperand(0);  in LowerANY_EXTEND()
24422 // non-SSE4.1 targets. For zero extend this should only handle inputs of
24427   SDValue In = Op->getOperand(0);  in LowerEXTEND_VECTOR_INREG()
24428   MVT VT = Op->getSimpleValueType(0);  in LowerEXTEND_VECTOR_INREG()  local
24431   MVT SVT = VT.getVectorElementType();  in LowerEXTEND_VECTOR_INREG()
24439   if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&  in LowerEXTEND_VECTOR_INREG()
24440       !(VT.is256BitVector() && Subtarget.hasAVX()) &&  in LowerEXTEND_VECTOR_INREG()
24441       !(VT.is512BitVector() && Subtarget.hasAVX512()))  in LowerEXTEND_VECTOR_INREG()
24446   unsigned NumElts = VT.getVectorNumElements();  in LowerEXTEND_VECTOR_INREG()
24448   // For 256-bit vectors, we only need the lower (128-bit) half of the input.  in LowerEXTEND_VECTOR_INREG()
24449   // For 512-bit vectors, we need 128-bits or 256-bits.  in LowerEXTEND_VECTOR_INREG()
24452     // at least 128-bits.  in LowerEXTEND_VECTOR_INREG()
24458   // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results,  in LowerEXTEND_VECTOR_INREG()
24460   // need to be handled here for 256/512-bit results.  in LowerEXTEND_VECTOR_INREG()
24462     assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");  in LowerEXTEND_VECTOR_INREG()
24465       return DAG.getNode(Op.getOpcode(), dl, VT, In);  in LowerEXTEND_VECTOR_INREG()
24472     return DAG.getNode(ExtOpc, dl, VT, In);  in LowerEXTEND_VECTOR_INREG()
24475   // pre-AVX2 256-bit extensions need to be split into 128-bit instructions.  in LowerEXTEND_VECTOR_INREG()
24477     assert(VT.is256BitVector() && "256-bit vector expected");  in LowerEXTEND_VECTOR_INREG()
24478     MVT HalfVT = VT.getHalfNumVectorElementsVT();  in LowerEXTEND_VECTOR_INREG()
24489     return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in LowerEXTEND_VECTOR_INREG()
24494   assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");  in LowerEXTEND_VECTOR_INREG()
24497   // If the source elements are already all-signbits, we don't need to extend,  in LowerEXTEND_VECTOR_INREG()
24505     return DAG.getBitcast(VT,  in LowerEXTEND_VECTOR_INREG()
24509   // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.  in LowerEXTEND_VECTOR_INREG()
24516     MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT;  in LowerEXTEND_VECTOR_INREG()
24526       Mask[i * Scale + (Scale - 1)] = i;  in LowerEXTEND_VECTOR_INREG()
24531     unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();  in LowerEXTEND_VECTOR_INREG()
24536   if (VT == MVT::v2i64) {  in LowerEXTEND_VECTOR_INREG()
24537     assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT");  in LowerEXTEND_VECTOR_INREG()
24541     SignExt = DAG.getBitcast(VT, SignExt);  in LowerEXTEND_VECTOR_INREG()
24549   MVT VT = Op->getSimpleValueType(0);  in LowerSIGN_EXTEND()  local
24550   SDValue In = Op->getOperand(0);  in LowerSIGN_EXTEND()
24557   assert(VT.isVector() && InVT.isVector() && "Expected vector type");  in LowerSIGN_EXTEND()
24558   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&  in LowerSIGN_EXTEND()
24560   assert((VT.getVectorElementType() == MVT::i16 ||  in LowerSIGN_EXTEND()
24561           VT.getVectorElementType() == MVT::i32 ||  in LowerSIGN_EXTEND()
24562           VT.getVectorElementType() == MVT::i64) &&  in LowerSIGN_EXTEND()
24569   if (VT == MVT::v32i16 && !Subtarget.hasBWI()) {  in LowerSIGN_EXTEND()
24570     assert(InVT == MVT::v32i8 && "Unexpected VT!");  in LowerSIGN_EXTEND()
24582   // for v4i32 the high shuffle mask will be {2, 3, -1, -1}  in LowerSIGN_EXTEND()
24583   // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32  in LowerSIGN_EXTEND()
24584   // concat the vectors to original VT  in LowerSIGN_EXTEND()
24585   MVT HalfVT = VT.getHalfNumVectorElementsVT();  in LowerSIGN_EXTEND()
24589   SmallVector<int,8> ShufMask(NumElems, -1);  in LowerSIGN_EXTEND()
24596   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);  in LowerSIGN_EXTEND()
24599 /// Change a vector store into a pair of half-size vector stores.
24601   SDValue StoredVal = Store->getValue();  in splitVectorStore()
24604          "Expecting 256/512-bit op");  in splitVectorStore()
24611   if (!Store->isSimple())  in splitVectorStore()
24618   SDValue Ptr0 = Store->getBasePtr();  in splitVectorStore()
24622       DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),  in splitVectorStore()
24623                    Store->getOriginalAlign(),  in splitVectorStore()
24624                    Store->getMemOperand()->getFlags());  in splitVectorStore()
24625   SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1,  in splitVectorStore()
24626                              Store->getPointerInfo().getWithOffset(HalfOffset),  in splitVectorStore()
24627                              Store->getOriginalAlign(),  in splitVectorStore()
24628                              Store->getMemOperand()->getFlags());  in splitVectorStore()
24636   SDValue StoredVal = Store->getValue();  in scalarizeVectorStore()
24638          StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op");  in scalarizeVectorStore()
24644   if (!Store->isSimple())  in scalarizeVectorStore()
24655     SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(),  in scalarizeVectorStore()
24659     SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr,  in scalarizeVectorStore()
24660                               Store->getPointerInfo().getWithOffset(Offset),  in scalarizeVectorStore()
24661                               Store->getOriginalAlign(),  in scalarizeVectorStore()
24662                               Store->getMemOperand()->getFlags());  in scalarizeVectorStore()
24672   SDValue StoredVal = St->getValue();  in LowerStore()
24678     assert(NumElts <= 8 && "Unexpected VT");  in LowerStore()
24679     assert(!St->isTruncatingStore() && "Expected non-truncating store");  in LowerStore()
24694     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),  in LowerStore()
24695                         St->getPointerInfo(), St->getOriginalAlign(),  in LowerStore()
24696                         St->getMemOperand()->getFlags());  in LowerStore()
24699   if (St->isTruncatingStore())  in LowerStore()
24702   // If this is a 256-bit store of concatenated ops, we are better off splitting  in LowerStore()
24703   // that store into two 128-bit stores. This avoids spurious use of 256-bit ops  in LowerStore()
24719   assert(StoreVT.is64BitVector() && "Unexpected VT");  in LowerStore()
24729     // Widen the vector, cast to a v2x64 type, extract the single 64-bit element  in LowerStore()
24737     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),  in LowerStore()
24738                         St->getPointerInfo(), St->getOriginalAlign(),  in LowerStore()
24739                         St->getMemOperand()->getFlags());  in LowerStore()
24743   SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()};  in LowerStore()
24745                                  St->getMemOperand());  in LowerStore()
24767     assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load");  in LowerLoad()
24768     assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT");  in LowerLoad()
24772     SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(),  in LowerLoad()
24773                                 Ld->getPointerInfo(), Ld->getOriginalAlign(),  in LowerLoad()
24774                                 Ld->getMemOperand()->getFlags());  in LowerLoad()
24777     assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!");  in LowerLoad()
24813     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();  in LowerBRCOND()
24844       // have a fall-through edge, because this requires an explicit  in LowerBRCOND()
24846       if (Op.getNode()->hasOneUse()) {  in LowerBRCOND()
24847         SDNode *User = *Op.getNode()->use_begin();  in LowerBRCOND()
24851         if (User->getOpcode() == ISD::BR) {  in LowerBRCOND()
24852           SDValue FalseBB = User->getOperand(1);  in LowerBRCOND()
24854             DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);  in LowerBRCOND()
24900   // Look past the truncate if the high bits are known zero.  in LowerBRCOND()
24940   EVT VT = Node->getValueType(0);  in LowerDYNAMIC_STACKALLOC()  local
24967       SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);  in LowerDYNAMIC_STACKALLOC()
24969       Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value  in LowerDYNAMIC_STACKALLOC()
24973           DAG.getNode(ISD::AND, dl, VT, Result,  in LowerDYNAMIC_STACKALLOC()
24974                       DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));  in LowerDYNAMIC_STACKALLOC()
24998     MF.getInfo<X86MachineFunctionInfo>()->setHasDynAlloca(true);  in LowerDYNAMIC_STACKALLOC()
25001     Register SPReg = RegInfo->getStackRegister();  in LowerDYNAMIC_STACKALLOC()
25006       SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),  in LowerDYNAMIC_STACKALLOC()
25007                        DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT));  in LowerDYNAMIC_STACKALLOC()
25025   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();  in LowerVASTART()
25032     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);  in LowerVASTART()
25038   //   gp_offset         (0 - 6 * 8)  in LowerVASTART()
25039   //   fp_offset         (48 - 48 + 8 * 16)  in LowerVASTART()
25047       DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN,  in LowerVASTART()
25055       DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN,  in LowerVASTART()
25061   SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);  in LowerVASTART()
25069   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);  in LowerVASTART()
25079          "LowerVAARG only handles 64-bit va_arg!");  in LowerVAARG()
25089   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();  in LowerVAARG()
25093   EVT ArgVT = Op.getNode()->getValueType(0);  in LowerVAARG()
25137   // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,  in LowerVACOPY()
25139   assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!");  in LowerVACOPY()
25148   const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();  in LowerVACOPY()
25149   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();  in LowerVACOPY()
25181 static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,  in getTargetVShiftByConstNode()  argument
25184   MVT ElementType = VT.getVectorElementType();  in getTargetVShiftByConstNode()
25188   if (VT != SrcOp.getSimpleValueType())  in getTargetVShiftByConstNode()
25189     SrcOp = DAG.getBitcast(VT, SrcOp);  in getTargetVShiftByConstNode()
25198       ShiftAmt = ElementType.getSizeInBits() - 1;  in getTargetVShiftByConstNode()
25200       return DAG.getConstant(0, dl, VT);  in getTargetVShiftByConstNode()
25204          && "Unknown target vector shift-by-constant node");  in getTargetVShiftByConstNode()
25223     SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT);  in getTargetVShiftByConstNode()
25224     if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt}))  in getTargetVShiftByConstNode()
25228   return DAG.getNode(Opc, dl, VT, SrcOp,  in getTargetVShiftByConstNode()
25233 static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,  in getTargetVShiftNode()  argument
25244     SmallVector<int> Mask(AmtVT.getVectorNumElements(), -1);  in getTargetVShiftNode()
25249   // Peek through any zext node if we can get back to a 128-bit source.  in getTargetVShiftNode()
25260   // The shift uses the entire lower 64-bits of the amount vector, so no need to  in getTargetVShiftNode()
25266       // If the shift amount has come from a scalar, then zero-extend the scalar  in getTargetVShiftNode()
25275       // then we can zero-extend it by setting all the other mask elements to  in getTargetVShiftNode()
25290   // Extract if the shift amount vector is larger than 128-bits.  in getTargetVShiftNode()
25296   // Zero-extend bottom element to v2i64 vector type, either by extension or  in getTargetVShiftNode()
25307           (128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);  in getTargetVShiftNode()
25316   // Change opcode to non-immediate version.  in getTargetVShiftNode()
25319   // The return type has to be a 128-bit type with the same element  in getTargetVShiftNode()
25321   MVT EltVT = VT.getVectorElementType();  in getTargetVShiftNode()
25325   return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);  in getTargetVShiftNode()
25368   MVT VT = Op.getSimpleValueType();  in getVectorMaskingNode()  local
25369   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());  in getVectorMaskingNode()
25379     PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);  in getVectorMaskingNode()
25380   return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);  in getVectorMaskingNode()
25396     if (MaskConst->getZExtValue() & 0x1)  in getScalarMaskingNode()
25399   MVT VT = Op.getSimpleValueType();  in getScalarMaskingNode()  local
25409     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);  in getScalarMaskingNode()
25412     PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);  in getScalarMaskingNode()
25413   return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);  in getScalarMaskingNode()
25417   if (!Fn->hasPersonalityFn())  in getSEHRegistrationNodeSize()
25420   // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See  in getSEHRegistrationNodeSize()
25422   switch (classifyEHPersonality(Fn->getPersonalityFn())) {  in getSEHRegistrationNodeSize()
25428       "can only recover FP for 32-bit MSVC EH personality functions");  in getSEHRegistrationNodeSize()
25435 ///   RegNodeBase = EntryEBP - RegNodeSize
25436 ///   ParentFP = RegNodeBase - ParentFrameOffset
25450   if (!Fn->hasPersonalityFn())  in recoverFramePointer()
25456       GlobalValue::dropLLVMManglingEscape(Fn->getName()));  in recoverFramePointer()
25468   // RegNodeBase = EntryEBP - RegNodeSize  in recoverFramePointer()
25469   // ParentFP = RegNodeBase - ParentFrameOffset  in recoverFramePointer()
25480       return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION;  in LowerINTRINSIC_WO_CHAIN()
25486       unsigned RC = C->getZExtValue();  in LowerINTRINSIC_WO_CHAIN()
25500       RC = C->getZExtValue();  in LowerINTRINSIC_WO_CHAIN()
25516   MVT VT = Op.getSimpleValueType();  in LowerINTRINSIC_WO_CHAIN()  local
25520   SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags());  in LowerINTRINSIC_WO_CHAIN()
25523     switch(IntrData->Type) {  in LowerINTRINSIC_WO_CHAIN()
25526       // First, we check if the intrinsic may have non-default rounding mode,  in LowerINTRINSIC_WO_CHAIN()
25527       // (IntrData->Opc1 != 0), then we check the rounding mode operand.  in LowerINTRINSIC_WO_CHAIN()
25528       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25539       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
25547         Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25549         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25559       // First, we check if the intrinsic may have non-default rounding mode,  in LowerINTRINSIC_WO_CHAIN()
25560       // (IntrData->Opc1 != 0), then we check the rounding mode operand.  in LowerINTRINSIC_WO_CHAIN()
25561       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25573       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
25581         Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25583         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25596       if (IntrData->Type == INTR_TYPE_3OP_IMM8 &&  in LowerINTRINSIC_WO_CHAIN()
25598         Src3 = DAG.getTargetConstant(Src3->getAsZExtVal() & 0xff, dl, MVT::i8);  in LowerINTRINSIC_WO_CHAIN()
25602       // First, we check if the intrinsic may have non-default rounding mode,  in LowerINTRINSIC_WO_CHAIN()
25603       // (IntrData->Opc1 != 0), then we check the rounding mode operand.  in LowerINTRINSIC_WO_CHAIN()
25604       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25616       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
25620       assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant);  in LowerINTRINSIC_WO_CHAIN()
25623         Src4 = DAG.getTargetConstant(Src4->getAsZExtVal() & 0xff, dl, MVT::i8);  in LowerINTRINSIC_WO_CHAIN()
25626       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
25635       //   - RC Opcode is specified and  in LowerINTRINSIC_WO_CHAIN()
25636       //   - RC is not "current direction".  in LowerINTRINSIC_WO_CHAIN()
25637       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25650           DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru,  in LowerINTRINSIC_WO_CHAIN()
25661         Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25663         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25667       return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru,  in LowerINTRINSIC_WO_CHAIN()
25675       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25677       // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands  in LowerINTRINSIC_WO_CHAIN()
25678       // (2) With rounding mode and sae - 7 operands.  in LowerINTRINSIC_WO_CHAIN()
25686                 DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2,  in LowerINTRINSIC_WO_CHAIN()
25692         return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,  in LowerINTRINSIC_WO_CHAIN()
25700       unsigned Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25708       return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1,  in LowerINTRINSIC_WO_CHAIN()
25722         NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);  in LowerINTRINSIC_WO_CHAIN()
25724         NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,  in LowerINTRINSIC_WO_CHAIN()
25739         Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25741         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25745       return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),  in LowerINTRINSIC_WO_CHAIN()
25754       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25758           NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2,  in LowerINTRINSIC_WO_CHAIN()
25764         NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2);  in LowerINTRINSIC_WO_CHAIN()
25773       unsigned Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25774       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25777           Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25782       return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2),  in LowerINTRINSIC_WO_CHAIN()
25794         Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25796         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25800       return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),  in LowerINTRINSIC_WO_CHAIN()
25810       unsigned Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
25811       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25814           Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25818       return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3),  in LowerINTRINSIC_WO_CHAIN()
25830       return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1);  in LowerINTRINSIC_WO_CHAIN()
25837       return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);  in LowerINTRINSIC_WO_CHAIN()
25845       MVT VT = Op.getSimpleValueType();  in LowerINTRINSIC_WO_CHAIN()  local
25848       if (IntrData->Type == CFMA_OP_MASKZ)  in LowerINTRINSIC_WO_CHAIN()
25849         PassThru = getZeroVector(VT, Subtarget, DAG, dl);  in LowerINTRINSIC_WO_CHAIN()
25852       //   - RC Opcode is specified and  in LowerINTRINSIC_WO_CHAIN()
25853       //   - RC is not "current direction".  in LowerINTRINSIC_WO_CHAIN()
25855       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25859           NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3,  in LowerINTRINSIC_WO_CHAIN()
25865         NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);  in LowerINTRINSIC_WO_CHAIN()
25871       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
25877       SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm);  in LowerINTRINSIC_WO_CHAIN()
25893       // First, we check if the intrinsic may have non-default rounding mode,  in LowerINTRINSIC_WO_CHAIN()
25894       // (IntrData->Opc1 != 0), then we check the rounding mode operand.  in LowerINTRINSIC_WO_CHAIN()
25895       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25898           return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),  in LowerINTRINSIC_WO_CHAIN()
25904       return DAG.getNode(IntrData->Opc0, dl, MaskVT,  in LowerINTRINSIC_WO_CHAIN()
25914       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
25917           Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae);  in LowerINTRINSIC_WO_CHAIN()
25923         Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC);  in LowerINTRINSIC_WO_CHAIN()
25935       ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
25942       SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);  in LowerINTRINSIC_WO_CHAIN()
26000       // Catch shift-by-constant.  in LowerINTRINSIC_WO_CHAIN()
26002         return getTargetVShiftByConstNode(IntrData->Opc0, dl,  in LowerINTRINSIC_WO_CHAIN()
26004                                           CShAmt->getZExtValue(), DAG);  in LowerINTRINSIC_WO_CHAIN()
26007       return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),  in LowerINTRINSIC_WO_CHAIN()
26019         PassThru = getZeroVector(VT, Subtarget, DAG, dl);  in LowerINTRINSIC_WO_CHAIN()
26021       return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru,  in LowerINTRINSIC_WO_CHAIN()
26031       SDValue Passthru = (IntrData->Type == FIXUPIMM)  in LowerINTRINSIC_WO_CHAIN()
26033                              : getZeroVector(VT, Subtarget, DAG, dl);  in LowerINTRINSIC_WO_CHAIN()
26035       unsigned Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
26036       if (IntrData->Opc1 != 0) {  in LowerINTRINSIC_WO_CHAIN()
26039           Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
26044       SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm);  in LowerINTRINSIC_WO_CHAIN()
26052       assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");  in LowerINTRINSIC_WO_CHAIN()
26057       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
26061       assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");  in LowerINTRINSIC_WO_CHAIN()
26066       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
26070       assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode");  in LowerINTRINSIC_WO_CHAIN()
26075       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
26080       SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);  in LowerINTRINSIC_WO_CHAIN()
26087         Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),  in LowerINTRINSIC_WO_CHAIN()
26091                                     DAG.getConstant(-1, dl, MVT::i8));  in LowerINTRINSIC_WO_CHAIN()
26092         Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),  in LowerINTRINSIC_WO_CHAIN()
26108         return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);  in LowerINTRINSIC_WO_CHAIN()
26113       return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(),  in LowerINTRINSIC_WO_CHAIN()
26123       unsigned Opc = IntrData->Opc0;  in LowerINTRINSIC_WO_CHAIN()
26137         Opc = IntrData->Opc1;  in LowerINTRINSIC_WO_CHAIN()
26149         return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);  in LowerINTRINSIC_WO_CHAIN()
26155       return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,  in LowerINTRINSIC_WO_CHAIN()
26311     SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));  in LowerINTRINSIC_WO_CHAIN()
26326     SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));  in LowerINTRINSIC_WO_CHAIN()
26339     SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops()));  in LowerINTRINSIC_WO_CHAIN()
26351     return DAG.getNode(getGlobalWrapperKind(nullptr, /*OpFlags=*/0), dl, VT,  in LowerINTRINSIC_WO_CHAIN()
26359     auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal());  in LowerINTRINSIC_WO_CHAIN()
26361         GlobalValue::dropLLVMManglingEscape(Fn->getName()));  in LowerINTRINSIC_WO_CHAIN()
26364     // supported on 32-bit Windows, which isn't PIC.  in LowerINTRINSIC_WO_CHAIN()
26365     SDValue Result = DAG.getMCSymbol(LSDASym, VT);  in LowerINTRINSIC_WO_CHAIN()
26366     return DAG.getNode(X86ISD::Wrapper, dl, VT, Result);  in LowerINTRINSIC_WO_CHAIN()
26373     auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);  in LowerINTRINSIC_WO_CHAIN()
26386     if (RegInfo->hasBasePointer(MF))  in LowerINTRINSIC_WO_CHAIN()
26387       Reg = RegInfo->getBaseRegister();  in LowerINTRINSIC_WO_CHAIN()
26389       bool CantUseFP = RegInfo->hasStackRealignment(MF);  in LowerINTRINSIC_WO_CHAIN()
26391         Reg = RegInfo->getPtrSizedStackRegister(MF);  in LowerINTRINSIC_WO_CHAIN()
26393         Reg = RegInfo->getPtrSizedFrameRegister(MF);  in LowerINTRINSIC_WO_CHAIN()
26395     return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);  in LowerINTRINSIC_WO_CHAIN()
26410                     Op->getOperand(1), Op->getOperand(2));  in LowerINTRINSIC_WO_CHAIN()
26431       // to 8-bits which may make it no longer out of bounds.  in LowerINTRINSIC_WO_CHAIN()
26432       unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255);  in LowerINTRINSIC_WO_CHAIN()
26483       // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).  in LowerINTRINSIC_WO_CHAIN()
26505   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,  in getAVX2GatherNode()
26523                               MemIntr->getMemoryVT(), MemIntr->getMemOperand());  in getAVX2GatherNode()
26531   MVT VT = Op.getSimpleValueType();  in getGatherNode()  local
26538   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,  in getGatherNode()
26541                               VT.getVectorNumElements());  in getGatherNode()
26561                               MemIntr->getMemoryVT(), MemIntr->getMemOperand());  in getGatherNode()
26575   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,  in getScatterNode()
26592                               MemIntr->getMemoryVT(), MemIntr->getMemOperand());  in getScatterNode()
26606   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl,  in getPrefetchNode()
26623 /// Returns a Glue value which can be used to add extra copy-from-reg if the
26632   SDValue Chain = N->getOperand(0);  in expandIntrinsicWChainHelper()
26636     assert(N->getNumOperands() == 3 && "Unexpected number of operands!");  in expandIntrinsicWChainHelper()
26637     Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue);  in expandIntrinsicWChainHelper()
26662     // Merge the two 32-bit values into a 64-bit one.  in expandIntrinsicWChainHelper()
26670   // Use a buildpair to merge the two 32-bit values into a 64-bit one.  in expandIntrinsicWChainHelper()
26685   // The processor's time-stamp counter (a 64-bit MSR) is stored into the  in getReadTimeStampCounter()
26686   // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR  in getReadTimeStampCounter()
26687   // and the EAX register is loaded with the low-order 32 bits.  in getReadTimeStampCounter()
26723   EHInfo->EHRegNodeFrameIndex = FINode->getIndex();  in MarkEHRegistrationNode()
26741   EHInfo->EHGuardFrameIndex = FINode->getIndex();  in MarkEHGuard()
26774   // 64-bit targets support extended Swift async frame setup,  in isExtendedSwiftAsyncFrameSupported()
26776   return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();  in isExtendedSwiftAsyncFrameSupported()
26792         X86FI->setHasSwiftAsyncContext(true);  in LowerINTRINSIC_W_CHAIN()
26793         SDValue Chain = Op->getOperand(0);  in LowerINTRINSIC_W_CHAIN()
26800         return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,  in LowerINTRINSIC_W_CHAIN()
26805         if (!X86FI->getSwiftAsyncContextFrameIdx())  in LowerINTRINSIC_W_CHAIN()
26806           X86FI->setSwiftAsyncContextFrameIdx(  in LowerINTRINSIC_W_CHAIN()
26810             DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(),  in LowerINTRINSIC_W_CHAIN()
26813         return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,  in LowerINTRINSIC_W_CHAIN()
26814                            Op->getOperand(0));  in LowerINTRINSIC_W_CHAIN()
26861       SDValue Chain = Op->getOperand(0);  in LowerINTRINSIC_W_CHAIN()
26880           DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2),  in LowerINTRINSIC_W_CHAIN()
26881                       Op->getOperand(3), Op->getOperand(4));  in LowerINTRINSIC_W_CHAIN()
26883       return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,  in LowerINTRINSIC_W_CHAIN()
26904       return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,  in LowerINTRINSIC_W_CHAIN()
26933       MachineMemOperand *MMO = MemIntr->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
26934       EVT MemVT = MemIntr->getMemoryVT();  in LowerINTRINSIC_W_CHAIN()
26940       return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),  in LowerINTRINSIC_W_CHAIN()
26971       MachineMemOperand *MMO = MemIntr->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
26972       EVT MemVT = MemIntr->getMemoryVT();  in LowerINTRINSIC_W_CHAIN()
26981       return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),  in LowerINTRINSIC_W_CHAIN()
26994       return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC,  in LowerINTRINSIC_W_CHAIN()
27001       MVT VT = Op.getSimpleValueType();  in LowerINTRINSIC_W_CHAIN()  local
27008       MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
27011                                   {Chain, Op1, Op2}, VT, MMO);  in LowerINTRINSIC_W_CHAIN()
27013       Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);  in LowerINTRINSIC_W_CHAIN()
27014       return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);  in LowerINTRINSIC_W_CHAIN()
27020       MVT VT = Op.getSimpleValueType();  in LowerINTRINSIC_W_CHAIN()  local
27027       SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32);  in LowerINTRINSIC_W_CHAIN()
27028       MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
27031                                   {Chain, Op1, Op2, Size}, VT, MMO);  in LowerINTRINSIC_W_CHAIN()
27033       Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT);  in LowerINTRINSIC_W_CHAIN()
27034       unsigned Imm = Op2->getAsZExtVal();  in LowerINTRINSIC_W_CHAIN()
27036         Res = DAG.getNode(ISD::SHL, DL, VT, Res,  in LowerINTRINSIC_W_CHAIN()
27037                           DAG.getShiftAmountConstant(Imm, VT, DL));  in LowerINTRINSIC_W_CHAIN()
27038       return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain);  in LowerINTRINSIC_W_CHAIN()
27048       MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
27050           X86ISD::CMPCCXADD, DL, Op->getVTList(), {Chain, Addr, Src1, Src2, CC},  in LowerINTRINSIC_W_CHAIN()
27066       MVT VT = Op2.getSimpleValueType();  in LowerINTRINSIC_W_CHAIN()  local
27088       MachineMemOperand *MMO = cast<MemSDNode>(Op)->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
27089       return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(),  in LowerINTRINSIC_W_CHAIN()
27090                                      {Chain, Op1, Op2}, VT, MMO);  in LowerINTRINSIC_W_CHAIN()
27102       MVT VT = Op2.getSimpleValueType();  in LowerINTRINSIC_W_CHAIN()  local
27123       MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand();  in LowerINTRINSIC_W_CHAIN()
27126                                   {Chain, Op1, Op2}, VT, MMO);  in LowerINTRINSIC_W_CHAIN()
27135   switch(IntrData->Type) {  in LowerINTRINSIC_W_CHAIN()
27140     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other);  in LowerINTRINSIC_W_CHAIN()
27141     SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));  in LowerINTRINSIC_W_CHAIN()
27145     SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),  in LowerINTRINSIC_W_CHAIN()
27146                      DAG.getConstant(1, dl, Op->getValueType(1)),  in LowerINTRINSIC_W_CHAIN()
27149     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops);  in LowerINTRINSIC_W_CHAIN()
27152     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,  in LowerINTRINSIC_W_CHAIN()
27162     return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,  in LowerINTRINSIC_W_CHAIN()
27184     return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,  in LowerINTRINSIC_W_CHAIN()
27191     unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);  in LowerINTRINSIC_W_CHAIN()
27203     getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget,  in LowerINTRINSIC_W_CHAIN()
27219     expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,  in LowerINTRINSIC_W_CHAIN()
27225     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);  in LowerINTRINSIC_W_CHAIN()
27226     SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));  in LowerINTRINSIC_W_CHAIN()
27229     SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);  in LowerINTRINSIC_W_CHAIN()
27230     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),  in LowerINTRINSIC_W_CHAIN()
27244     EVT MemVT  = MemIntr->getMemoryVT();  in LowerINTRINSIC_W_CHAIN()
27246     uint16_t TruncationOp = IntrData->Opc0;  in LowerINTRINSIC_W_CHAIN()
27251                                  MemIntr->getMemOperand());  in LowerINTRINSIC_W_CHAIN()
27258                                 MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED,  in LowerINTRINSIC_W_CHAIN()
27266                                MemIntr->getMemOperand(), DAG);  in LowerINTRINSIC_W_CHAIN()
27272                                    VMask, MemVT, MemIntr->getMemOperand(), DAG);  in LowerINTRINSIC_W_CHAIN()
27296     SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT);  in LowerRETURNADDR()
27319   EVT VT = Op.getValueType();  in LowerFRAMEADDR()  local
27323   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {  in LowerFRAMEADDR()
27327     int FrameAddrIndex = FuncInfo->getFAIndex();  in LowerFRAMEADDR()
27330       unsigned SlotSize = RegInfo->getSlotSize();  in LowerFRAMEADDR()
27333       FuncInfo->setFAIndex(FrameAddrIndex);  in LowerFRAMEADDR()
27335     return DAG.getFrameIndex(FrameAddrIndex, VT);  in LowerFRAMEADDR()
27339       RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());  in LowerFRAMEADDR()
27342   assert(((FrameReg == X86::RBP && VT == MVT::i64) ||  in LowerFRAMEADDR()
27343           (FrameReg == X86::EBP && VT == MVT::i32)) &&  in LowerFRAMEADDR()
27345   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);  in LowerFRAMEADDR()
27346   while (Depth--)  in LowerFRAMEADDR()
27347     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,  in LowerFRAMEADDR()
27354 Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT,  in getRegisterByName()  argument
27374       Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF);  in getRegisterByName()
27390   return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));  in LowerFRAME_TO_ARGS_OFFSET()
27421   Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());  in LowerEH_RETURN()
27429                                  DAG.getIntPtrConstant(RegInfo->getSlotSize(),  in LowerEH_RETURN()
27450     (void)TII->getGlobalBaseReg(&DAG.getMachineFunction());  in lowerEH_SJLJ_SETJMP()
27483   const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();  in LowerINIT_TRAMPOLINE()
27489     // Large code-model.  in LowerINIT_TRAMPOLINE()
27490     const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.  in LowerINIT_TRAMPOLINE()
27493     const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;  in LowerINIT_TRAMPOLINE()
27494     const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;  in LowerINIT_TRAMPOLINE()
27538       cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());  in LowerINIT_TRAMPOLINE()
27539     CallingConv::ID CC = Func->getCallingConv();  in LowerINIT_TRAMPOLINE()
27552       FunctionType *FTy = Func->getFunctionType();  in LowerINIT_TRAMPOLINE()
27553       const AttributeList &Attrs = Func->getAttributes();  in LowerINIT_TRAMPOLINE()
27555       if (!Attrs.isEmpty() && !Func->isVarArg()) {  in LowerINIT_TRAMPOLINE()
27559         for (FunctionType::param_iterator I = FTy->param_begin(),  in LowerINIT_TRAMPOLINE()
27560              E = FTy->param_end(); I != E; ++I, ++Idx)  in LowerINIT_TRAMPOLINE()
27568           report_fatal_error("Nest register in use - reduce number of inreg"  in LowerINIT_TRAMPOLINE()
27594     const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;  in LowerINIT_TRAMPOLINE()
27626      01 Round to -inf  in LowerGET_ROUNDING()
27631     -1 Undefined  in LowerGET_ROUNDING()
27635      3 Round to -inf  in LowerGET_ROUNDING()
27637   To perform the conversion, we use a packed lookup table of the four 2-bit  in LowerGET_ROUNDING()
27639     0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]  in LowerGET_ROUNDING()
27645   MVT VT = Op.getSimpleValueType();  in LowerGET_ROUNDING()  local
27679   RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT);  in LowerGET_ROUNDING()
27688   SDValue Chain = Op.getNode()->getOperand(0);  in LowerSET_ROUNDING()
27711   SDValue NewRM = Op.getNode()->getOperand(1);  in LowerSET_ROUNDING()
27714     uint64_t RM = CVal->getZExtValue();  in LowerSET_ROUNDING()
27717     // clang-format off  in LowerSET_ROUNDING()
27724     // clang-format on  in LowerSET_ROUNDING()
27729     //    0 Round to 0       -> 11  in LowerSET_ROUNDING()
27730     //    1 Round to nearest -> 00  in LowerSET_ROUNDING()
27731     //    2 Round to +inf    -> 10  in LowerSET_ROUNDING()
27732     //    3 Round to -inf    -> 01  in LowerSET_ROUNDING()
27733     // The 2-bit value needs then to be shifted so that it occupies bits 11:10.  in LowerSET_ROUNDING()
27806   SDValue Chain = Op->getOperand(0);  in LowerGET_FPENV_MEM()
27807   SDValue Ptr = Op->getOperand(1);  in LowerGET_FPENV_MEM()
27809   EVT MemVT = Node->getMemoryVT();  in LowerGET_FPENV_MEM()
27811   MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();  in LowerGET_FPENV_MEM()
27822         (MMO->getFlags() & ~MachineMemOperand::MOStore);  in LowerGET_FPENV_MEM()
27872   SDValue Chain = Op->getOperand(0);  in LowerSET_FPENV_MEM()
27873   SDValue Ptr = Op->getOperand(1);  in LowerSET_FPENV_MEM()
27875   EVT MemVT = Node->getMemoryVT();  in LowerSET_FPENV_MEM()
27877   MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();  in LowerSET_FPENV_MEM()
27885   SDValue Chain = Op.getNode()->getOperand(0);  in LowerRESET_FPENV()
27891   // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to  in LowerRESET_FPENV()
27900   // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear  in LowerRESET_FPENV()
27924   MVT VT = Op.getSimpleValueType();  in LowerVectorCTLZ_AVX512CDI()  local
27925   MVT EltVT = VT.getVectorElementType();  in LowerVectorCTLZ_AVX512CDI()
27926   unsigned NumElems = VT.getVectorNumElements();  in LowerVectorCTLZ_AVX512CDI()
27943   SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);  in LowerVectorCTLZ_AVX512CDI()
27944   SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);  in LowerVectorCTLZ_AVX512CDI()
27946   return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);  in LowerVectorCTLZ_AVX512CDI()
27953   MVT VT = Op.getSimpleValueType();  in LowerVectorCTLZInRegLUT()  local
27954   int NumElts = VT.getVectorNumElements();  in LowerVectorCTLZInRegLUT()
27955   int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8);  in LowerVectorCTLZInRegLUT()
27958   // Per-nibble leading zero PSHUFB lookup table.  in LowerVectorCTLZInRegLUT()
27994   // Merge result back from vXi8 back to VT, working on the lo/hi halves  in LowerVectorCTLZInRegLUT()
27999   while (CurrVT != VT) {  in LowerVectorCTLZInRegLUT()
28035   MVT VT = Op.getSimpleValueType();  in LowerVectorCTLZ()  local
28038       // vXi8 vectors need to be promoted to 512-bits for vXi32.  in LowerVectorCTLZ()
28039       (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8))  in LowerVectorCTLZ()
28042   // Decompose 256-bit ops into smaller 128-bit ops.  in LowerVectorCTLZ()
28043   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerVectorCTLZ()
28046   // Decompose 512-bit ops into smaller 256-bit ops.  in LowerVectorCTLZ()
28047   if (VT.is512BitVector() && !Subtarget.hasBWI())  in LowerVectorCTLZ()
28056   MVT VT = Op.getSimpleValueType();  in LowerCTLZ()  local
28057   MVT OpVT = VT;  in LowerCTLZ()
28058   unsigned NumBits = VT.getSizeInBits();  in LowerCTLZ()
28062   if (VT.isVector())  in LowerCTLZ()
28066   if (VT == MVT::i8) {  in LowerCTLZ()
28078     SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT),  in LowerCTLZ()
28084   // Finally xor with NumBits-1.  in LowerCTLZ()
28086                    DAG.getConstant(NumBits - 1, dl, OpVT));  in LowerCTLZ()
28088   if (VT == MVT::i8)  in LowerCTLZ()
28095   MVT VT = Op.getSimpleValueType();  in LowerCTTZ()  local
28096   unsigned NumBits = VT.getScalarSizeInBits();  in LowerCTTZ()
28100   assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&  in LowerCTTZ()
28104   SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in LowerCTTZ()
28112   SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT),  in LowerCTTZ()
28115   return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);  in LowerCTTZ()
28120   MVT VT = Op.getSimpleValueType();  in lowerAddSub()  local
28123   if (VT == MVT::i16 || VT == MVT::i32)  in lowerAddSub()
28126   if (VT == MVT::v32i16 || VT == MVT::v64i8)  in lowerAddSub()
28131          "Only handle AVX 256-bit vector integer operation");  in lowerAddSub()
28137   MVT VT = Op.getSimpleValueType();  in LowerADDSAT_SUBSAT()  local
28142   if (VT == MVT::v32i16 || VT == MVT::v64i8 ||  in LowerADDSAT_SUBSAT()
28143       (VT.is256BitVector() && !Subtarget.hasInt256())) {  in LowerADDSAT_SUBSAT()
28152       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);  in LowerADDSAT_SUBSAT()
28154   unsigned BitWidth = VT.getScalarSizeInBits();  in LowerADDSAT_SUBSAT()
28156     if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {  in LowerADDSAT_SUBSAT()
28157       // Handle a special-case with a bit-hack instead of cmp+select:  in LowerADDSAT_SUBSAT()
28158       // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)  in LowerADDSAT_SUBSAT()
28163       if (C && C->getAPIntValue().isSignMask()) {  in LowerADDSAT_SUBSAT()
28164         SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);  in LowerADDSAT_SUBSAT()
28165         SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);  in LowerADDSAT_SUBSAT()
28166         SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);  in LowerADDSAT_SUBSAT()
28167         SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);  in LowerADDSAT_SUBSAT()
28168         return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);  in LowerADDSAT_SUBSAT()
28171     if (!TLI.isOperationLegal(ISD::UMAX, VT)) {  in LowerADDSAT_SUBSAT()
28172       // usubsat X, Y --> (X >u Y) ? X - Y : 0  in LowerADDSAT_SUBSAT()
28173       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);  in LowerADDSAT_SUBSAT()
28176       if (SetCCResultType == VT &&  in LowerADDSAT_SUBSAT()
28177           DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())  in LowerADDSAT_SUBSAT()
28178         return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);  in LowerADDSAT_SUBSAT()
28179       return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));  in LowerADDSAT_SUBSAT()
28184       (!VT.isVector() || VT == MVT::v2i64)) {  in LowerADDSAT_SUBSAT()
28187     SDValue Zero = DAG.getConstant(0, DL, VT);  in LowerADDSAT_SUBSAT()
28190                     DAG.getVTList(VT, SetCCResultType), X, Y);  in LowerADDSAT_SUBSAT()
28193     SDValue SatMin = DAG.getConstant(MinVal, DL, VT);  in LowerADDSAT_SUBSAT()
28194     SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);  in LowerADDSAT_SUBSAT()
28197     Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin);  in LowerADDSAT_SUBSAT()
28198     return DAG.getSelect(DL, VT, Overflow, Result, SumDiff);  in LowerADDSAT_SUBSAT()
28207   MVT VT = Op.getSimpleValueType();  in LowerABS()  local
28210   if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {  in LowerABS()
28211     // Since X86 does not have CMOV for 8-bit integer, we don't convert  in LowerABS()
28212     // 8-bit integer abs to NEG and CMOV.  in LowerABS()
28214     SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),  in LowerABS()
28215                               DAG.getConstant(0, DL, VT), N0);  in LowerABS()
28218     return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);  in LowerABS()
28221   // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X).  in LowerABS()
28222   if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) {  in LowerABS()
28224     SDValue Neg = DAG.getNegative(Src, DL, VT);  in LowerABS()
28225     return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Neg, Src);  in LowerABS()
28228   if (VT.is256BitVector() && !Subtarget.hasInt256()) {  in LowerABS()
28229     assert(VT.isInteger() &&  in LowerABS()
28230            "Only handle AVX 256-bit vector integer operation");  in LowerABS()
28234   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())  in LowerABS()
28243   MVT VT = Op.getSimpleValueType();  in LowerAVG()  local
28247   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerAVG()
28250   if (VT == MVT::v32i16 || VT == MVT::v64i8)  in LowerAVG()
28259   MVT VT = Op.getSimpleValueType();  in LowerMINMAX()  local
28263   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerMINMAX()
28266   if (VT == MVT::v32i16 || VT == MVT::v64i8)  in LowerMINMAX()
28278   EVT VT = Op.getValueType();  in LowerFMINIMUM_FMAXIMUM()  local
28282   uint64_t SizeInBits = VT.getScalarSizeInBits();  in LowerFMINIMUM_FMAXIMUM()
28285   EVT IVT = VT.changeTypeToInteger();  in LowerFMINIMUM_FMAXIMUM()
28295       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);  in LowerFMINIMUM_FMAXIMUM()
28301   //             Num   xNaN              +0     -0  in LowerFMINIMUM_FMAXIMUM()
28302   //          ---------------         ---------------  in LowerFMINIMUM_FMAXIMUM()
28304   // X        ---------------  X      ---------------  in LowerFMINIMUM_FMAXIMUM()
28305   //    xNaN  |   X  |  X/Y |     -0  |  +0  |  -0  |  in LowerFMINIMUM_FMAXIMUM()
28306   //          ---------------         ---------------  in LowerFMINIMUM_FMAXIMUM()
28317       return CstOp->getValueAPF().bitcastToAPInt() == Zero;  in LowerFMINIMUM_FMAXIMUM()
28319       return CstOp->getAPIntValue() == Zero;  in LowerFMINIMUM_FMAXIMUM()
28320     if (Op->getOpcode() == ISD::BUILD_VECTOR ||  in LowerFMINIMUM_FMAXIMUM()
28321         Op->getOpcode() == ISD::SPLAT_VECTOR) {  in LowerFMINIMUM_FMAXIMUM()
28322       for (const SDValue &OpVal : Op->op_values()) {  in LowerFMINIMUM_FMAXIMUM()
28328         if (!CstOp->getValueAPF().isZero())  in LowerFMINIMUM_FMAXIMUM()
28330         if (CstOp->getValueAPF().bitcastToAPInt() != Zero)  in LowerFMINIMUM_FMAXIMUM()
28341                           Op->getFlags().hasNoSignedZeros() ||  in LowerFMINIMUM_FMAXIMUM()
28353   } else if (!VT.isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) &&  in LowerFMINIMUM_FMAXIMUM()
28354              (Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {  in LowerFMINIMUM_FMAXIMUM()
28359     MVT VectorType = MVT::getVectorVT(VT.getSimpleVT(), 128 / SizeInBits);  in LowerFMINIMUM_FMAXIMUM()
28371     NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X);  in LowerFMINIMUM_FMAXIMUM()
28372     NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);  in LowerFMINIMUM_FMAXIMUM()
28373     return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());  in LowerFMINIMUM_FMAXIMUM()
28376     if (Subtarget.is64Bit() || VT != MVT::f64) {  in LowerFMINIMUM_FMAXIMUM()
28381       assert(VT == MVT::f64);  in LowerFMINIMUM_FMAXIMUM()
28395       NewX = DAG.getSelect(DL, VT, IsXSigned, X, Y);  in LowerFMINIMUM_FMAXIMUM()
28396       NewY = DAG.getSelect(DL, VT, IsXSigned, Y, X);  in LowerFMINIMUM_FMAXIMUM()
28398       NewX = DAG.getSelect(DL, VT, IsXSigned, Y, X);  in LowerFMINIMUM_FMAXIMUM()
28399       NewY = DAG.getSelect(DL, VT, IsXSigned, X, Y);  in LowerFMINIMUM_FMAXIMUM()
28404                    Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN);  in LowerFMINIMUM_FMAXIMUM()
28412   SDValue MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());  in LowerFMINIMUM_FMAXIMUM()
28418   return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);  in LowerFMINIMUM_FMAXIMUM()
28423   MVT VT = Op.getSimpleValueType();  in LowerABD()  local
28427   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerABD()
28430   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.useBWIRegs())  in LowerABD()
28437   if (VT.isScalarInteger()) {  in LowerABD()
28438     unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);  in LowerABD()
28441       // abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))  in LowerABD()
28442       // abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))  in LowerABD()
28448       return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);  in LowerABD()
28459   MVT VT = Op.getSimpleValueType();  in LowerMUL()  local
28461   // Decompose 256-bit ops into 128-bit ops.  in LowerMUL()
28462   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerMUL()
28465   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())  in LowerMUL()
28471   // Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16  in LowerMUL()
28473   if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) {  in LowerMUL()
28474     unsigned NumElts = VT.getVectorNumElements();  in LowerMUL()
28475     unsigned NumLanes = VT.getSizeInBits() / 128;  in LowerMUL()
28478     if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||  in LowerMUL()
28479         (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {  in LowerMUL()
28480       MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());  in LowerMUL()
28482           ISD::TRUNCATE, dl, VT,  in LowerMUL()
28497         for (auto [Idx, Val] : enumerate(B->ops())) {  in LowerMUL()
28505         SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT));  in LowerMUL()
28506         SDValue BLo = DAG.getNode(ISD::AND, dl, VT, Mask, B);  in LowerMUL()
28507         SDValue BHi = DAG.getNode(X86ISD::ANDNP, dl, VT, Mask, B);  in LowerMUL()
28510         RLo = DAG.getNode(ISD::AND, dl, VT, DAG.getBitcast(VT, RLo), Mask);  in LowerMUL()
28513         return DAG.getNode(ISD::OR, dl, VT, RLo, DAG.getBitcast(VT, RHi));  in LowerMUL()
28519     // pmullw, so it doesn't matter what's in the high byte of each 16-bit  in LowerMUL()
28521     SDValue Undef = DAG.getUNDEF(VT);  in LowerMUL()
28522     SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef));  in LowerMUL()
28523     SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef));  in LowerMUL()
28541       BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef));  in LowerMUL()
28542       BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef));  in LowerMUL()
28548     return getPack(DAG, Subtarget, dl, VT, RLo, RHi);  in LowerMUL()
28552   if (VT == MVT::v4i32) {  in LowerMUL()
28557     static const int UnpackMask[] = { 1, -1, 3, -1 };  in LowerMUL()
28558     SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);  in LowerMUL()
28559     SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);  in LowerMUL()
28570     Evens = DAG.getBitcast(VT, Evens);  in LowerMUL()
28571     Odds = DAG.getBitcast(VT, Odds);  in LowerMUL()
28576     return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);  in LowerMUL()
28579   assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&  in LowerMUL()
28603   SDValue Zero = DAG.getConstant(0, dl, VT);  in LowerMUL()
28608     AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);  in LowerMUL()
28612     SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);  in LowerMUL()
28613     AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);  in LowerMUL()
28618     SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);  in LowerMUL()
28619     AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);  in LowerMUL()
28622   SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);  in LowerMUL()
28623   Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG);  in LowerMUL()
28625   return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi);  in LowerMUL()
28629                                      MVT VT, bool IsSigned,  in LowervXi8MulWithUNPCK()  argument
28633   unsigned NumElts = VT.getVectorNumElements();  in LowervXi8MulWithUNPCK()
28635   // For vXi8 we will unpack the low and high half of each 128 bit lane to widen  in LowervXi8MulWithUNPCK()
28641   // and use pmullw to calculate the full 16-bit product.  in LowervXi8MulWithUNPCK()
28644   // pmulhw to calculate the full 16-bit product. This trick means we don't  in LowervXi8MulWithUNPCK()
28648   SDValue Zero = DAG.getConstant(0, dl, VT);  in LowervXi8MulWithUNPCK()
28652     ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A));  in LowervXi8MulWithUNPCK()
28653     AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A));  in LowervXi8MulWithUNPCK()
28655     ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero));  in LowervXi8MulWithUNPCK()
28656     AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero));  in LowervXi8MulWithUNPCK()
28688     BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B));  in LowervXi8MulWithUNPCK()
28689     BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B));  in LowervXi8MulWithUNPCK()
28691     BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero));  in LowervXi8MulWithUNPCK()
28692     BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero));  in LowervXi8MulWithUNPCK()
28702     *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);  in LowervXi8MulWithUNPCK()
28704   return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true);  in LowervXi8MulWithUNPCK()
28710   MVT VT = Op.getSimpleValueType();  in LowerMULH()  local
28711   bool IsSigned = Op->getOpcode() == ISD::MULHS;  in LowerMULH()
28712   unsigned NumElts = VT.getVectorNumElements();  in LowerMULH()
28716   // Decompose 256-bit ops into 128-bit ops.  in LowerMULH()
28717   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerMULH()
28720   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI())  in LowerMULH()
28723   if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {  in LowerMULH()
28724     assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||  in LowerMULH()
28725            (VT == MVT::v8i32 && Subtarget.hasInt256()) ||  in LowerMULH()
28726            (VT == MVT::v16i32 && Subtarget.hasAVX512()));  in LowerMULH()
28740     const int Mask[] = {1, -1,  3, -1,  5, -1,  7, -1,  in LowerMULH()
28741                         9, -1, 11, -1, 13, -1, 15, -1};  in LowerMULH()
28744         DAG.getVectorShuffle(VT, dl, A, A, ArrayRef(&Mask[0], NumElts));  in LowerMULH()
28747         DAG.getVectorShuffle(VT, dl, B, B, ArrayRef(&Mask[0], NumElts));  in LowerMULH()
28756     SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,  in LowerMULH()
28761     SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,  in LowerMULH()
28770     SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);  in LowerMULH()
28775       SDValue Zero = DAG.getConstant(0, dl, VT);  in LowerMULH()
28776       SDValue T1 = DAG.getNode(ISD::AND, dl, VT,  in LowerMULH()
28777                                DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);  in LowerMULH()
28778       SDValue T2 = DAG.getNode(ISD::AND, dl, VT,  in LowerMULH()
28779                                DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);  in LowerMULH()
28781       SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);  in LowerMULH()
28782       Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);  in LowerMULH()
28789   assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||  in LowerMULH()
28790          (VT == MVT::v64i8 && Subtarget.hasBWI())) &&  in LowerMULH()
28796   // With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack  in LowerMULH()
28799   if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||  in LowerMULH()
28800       (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {  in LowerMULH()
28807     return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);  in LowerMULH()
28810   return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG);  in LowerMULH()
28816   MVT VT = Op.getSimpleValueType();  in LowerMULO()  local
28819   if (!VT.isVector())  in LowerMULO()
28823   bool IsSigned = Op->getOpcode() == ISD::SMULO;  in LowerMULO()
28826   EVT OvfVT = Op->getValueType(1);  in LowerMULO()
28828   if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) ||  in LowerMULO()
28829       (VT == MVT::v64i8 && !Subtarget.hasBWI())) {  in LowerMULO()
28848     SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in LowerMULO()
28857       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);  in LowerMULO()
28859   if ((VT == MVT::v16i8 && Subtarget.hasInt256()) ||  in LowerMULO()
28860       (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) {  in LowerMULO()
28861     unsigned NumElts = VT.getVectorNumElements();  in LowerMULO()
28868     SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);  in LowerMULO()
28872       SDValue High, LowSign;  in LowerMULO()  local
28876         // Shift the high down filling with sign bits.  in LowerMULO()
28877         High = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Mul, 8, DAG);  in LowerMULO()
28886           High = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, High);  in LowerMULO()
28891         High = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG);  in LowerMULO()
28892         High = DAG.getNode(ISD::TRUNCATE, dl, VT, High);  in LowerMULO()
28894             DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT));  in LowerMULO()
28897       Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE);  in LowerMULO()
28899       SDValue High =  in LowerMULO()  local
28907           High = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, High);  in LowerMULO()
28911         High = DAG.getNode(ISD::TRUNCATE, dl, VT, High);  in LowerMULO()
28915           DAG.getSetCC(dl, SetccVT, High,  in LowerMULO()
28916                        DAG.getConstant(0, dl, High.getValueType()), ISD::SETNE);  in LowerMULO()
28925   SDValue High =  in LowerMULO()  local
28926       LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low);  in LowerMULO()
28930     // SMULO overflows if the high bits don't match the sign of the low.  in LowerMULO()
28932         DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT));  in LowerMULO()
28933     Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE);  in LowerMULO()
28935     // UMULO overflows if the high bits are non-zero.  in LowerMULO()
28937         DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE);  in LowerMULO()
28947   EVT VT = Op.getValueType();  in LowerWin64_i128OP()  local
28948   assert(VT.isInteger() && VT.getSizeInBits() == 128 &&  in LowerWin64_i128OP()
28951   if (isa<ConstantSDNode>(Op->getOperand(1))) {  in LowerWin64_i128OP()
28954       return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), VT, Result[0], Result[1]);  in LowerWin64_i128OP()
28959   switch (Op->getOpcode()) {  in LowerWin64_i128OP()
28960   // clang-format off  in LowerWin64_i128OP()
28966   // clang-format on  in LowerWin64_i128OP()
28974   for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {  in LowerWin64_i128OP()
28975     EVT ArgVT = Op->getOperand(i).getValueType();  in LowerWin64_i128OP()
28979     int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();  in LowerWin64_i128OP()
28984         DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16));  in LowerWin64_i128OP()
29007   return DAG.getBitcast(VT, CallInfo.first);  in LowerWin64_i128OP()
29014   EVT VT = Op.getValueType();  in LowerWin64_FP_TO_INT128()  local
29015   bool IsStrict = Op->isStrictFPOpcode();  in LowerWin64_FP_TO_INT128()
29020   assert(VT.isInteger() && VT.getSizeInBits() == 128 &&  in LowerWin64_FP_TO_INT128()
29024   if (Op->getOpcode() == ISD::FP_TO_SINT ||  in LowerWin64_FP_TO_INT128()
29025       Op->getOpcode() == ISD::STRICT_FP_TO_SINT)  in LowerWin64_FP_TO_INT128()
29026     LC = RTLIB::getFPTOSINT(ArgVT, VT);  in LowerWin64_FP_TO_INT128()
29028     LC = RTLIB::getFPTOUINT(ArgVT, VT);  in LowerWin64_FP_TO_INT128()
29037   // expected VT (i128).  in LowerWin64_FP_TO_INT128()
29040   Result = DAG.getBitcast(VT, Result);  in LowerWin64_FP_TO_INT128()
29047   EVT VT = Op.getValueType();  in LowerWin64_INT128_TO_FP()  local
29048   bool IsStrict = Op->isStrictFPOpcode();  in LowerWin64_INT128_TO_FP()
29057   if (Op->getOpcode() == ISD::SINT_TO_FP ||  in LowerWin64_INT128_TO_FP()
29058       Op->getOpcode() == ISD::STRICT_SINT_TO_FP)  in LowerWin64_INT128_TO_FP()
29059     LC = RTLIB::getSINTTOFP(ArgVT, VT);  in LowerWin64_INT128_TO_FP()
29061     LC = RTLIB::getUINTTOFP(ArgVT, VT);  in LowerWin64_INT128_TO_FP()
29070   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();  in LowerWin64_INT128_TO_FP()
29077       makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);  in LowerWin64_INT128_TO_FP()
29095             (0x8080808080808080ULL >> (64 - (8 * Amt))));  in getGFNICtrlImm()
29097     return getGFNICtrlImm(ISD::SRL, 8 - Amt) | getGFNICtrlImm(ISD::SHL, Amt);  in getGFNICtrlImm()
29099     return getGFNICtrlImm(ISD::SHL, 8 - Amt) | getGFNICtrlImm(ISD::SRL, Amt);  in getGFNICtrlImm()
29105 SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT,  in getGFNICtrlMask()  argument
29107   assert(VT.getVectorElementType() == MVT::i8 &&  in getGFNICtrlMask()
29108          (VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type");  in getGFNICtrlMask()
29111   for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) {  in getGFNICtrlMask()
29115   return DAG.getBuildVector(VT, DL, MaskBits);  in getGFNICtrlMask()
29118 // Return true if the required (according to Opcode) shift-imm form is natively
29120 static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,  in supportedVectorShiftWithImm()  argument
29125   if (!VT.isSimple())  in supportedVectorShiftWithImm()
29128   if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))  in supportedVectorShiftWithImm()
29131   if (VT.getScalarSizeInBits() < 16)  in supportedVectorShiftWithImm()
29134   if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&  in supportedVectorShiftWithImm()
29135       (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))  in supportedVectorShiftWithImm()
29138   bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||  in supportedVectorShiftWithImm()
29139                 (VT.is256BitVector() && Subtarget.hasInt256());  in supportedVectorShiftWithImm()
29142                            (VT != MVT::v2i64 && VT != MVT::v4i64));  in supportedVectorShiftWithImm()
29147 // These instructions are defined together with shift-immediate.
29149 bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget,  in supportedVectorShiftWithBaseAmnt()  argument
29151   return supportedVectorShiftWithImm(VT, Subtarget, Opcode);  in supportedVectorShiftWithBaseAmnt()
29154 // Return true if the required (according to Opcode) variable-shift form is
29156 static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget,  in supportedVectorVarShift()  argument
29161   if (!VT.isSimple())  in supportedVectorVarShift()
29164   if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))  in supportedVectorVarShift()
29167   if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)  in supportedVectorVarShift()
29170   // vXi16 supported only on AVX-512, BWI  in supportedVectorVarShift()
29171   if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())  in supportedVectorVarShift()
29175       (Subtarget.useAVX512Regs() || !VT.is512BitVector()))  in supportedVectorVarShift()
29178   bool LShift = VT.is128BitVector() || VT.is256BitVector();  in supportedVectorVarShift()
29179   bool AShift = LShift &&  VT != MVT::v2i64 && VT != MVT::v4i64;  in supportedVectorVarShift()
29185   MVT VT = Op.getSimpleValueType();  in LowerShiftByScalarImmediate()  local
29190   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in LowerShiftByScalarImmediate()
29193     assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");  in LowerShiftByScalarImmediate()
29194     MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);  in LowerShiftByScalarImmediate()
29199       assert((VT != MVT::v4i64 || Subtarget.hasInt256()) &&  in LowerShiftByScalarImmediate()
29201       return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R);  in LowerShiftByScalarImmediate()
29209                                                  ShiftAmt - 32, DAG);  in LowerShiftByScalarImmediate()
29210       if (VT == MVT::v2i64)  in LowerShiftByScalarImmediate()
29212       if (VT == MVT::v4i64)  in LowerShiftByScalarImmediate()
29220           getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);  in LowerShiftByScalarImmediate()
29222       if (VT == MVT::v2i64)  in LowerShiftByScalarImmediate()
29224       if (VT == MVT::v4i64)  in LowerShiftByScalarImmediate()
29228     return DAG.getBitcast(VT, Ex);  in LowerShiftByScalarImmediate()
29238     return DAG.getUNDEF(VT);  in LowerShiftByScalarImmediate()
29242   if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) {  in LowerShiftByScalarImmediate()
29245     // shl: (shl V, 1) -> (add (freeze V), (freeze V))  in LowerShiftByScalarImmediate()
29247       // R may be undef at run-time, but (shl R, 1) must be an even number (LSB  in LowerShiftByScalarImmediate()
29253       return DAG.getNode(ISD::ADD, dl, VT, R, R);  in LowerShiftByScalarImmediate()
29256     return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);  in LowerShiftByScalarImmediate()
29260   if (((!Subtarget.hasXOP() && VT == MVT::v2i64) ||  in LowerShiftByScalarImmediate()
29261        (Subtarget.hasInt256() && VT == MVT::v4i64)) &&  in LowerShiftByScalarImmediate()
29265   // If we're logical shifting an all-signbits value then we can just perform as  in LowerShiftByScalarImmediate()
29269     SDValue Mask = DAG.getAllOnesConstant(dl, VT);  in LowerShiftByScalarImmediate()
29270     Mask = DAG.getNode(Op.getOpcode(), dl, VT, Mask, Amt);  in LowerShiftByScalarImmediate()
29271     return DAG.getNode(ISD::AND, dl, VT, R, Mask);  in LowerShiftByScalarImmediate()
29274   if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||  in LowerShiftByScalarImmediate()
29275       (Subtarget.hasBWI() && VT == MVT::v64i8)) {  in LowerShiftByScalarImmediate()
29276     unsigned NumElts = VT.getVectorNumElements();  in LowerShiftByScalarImmediate()
29281       // R may be undef at run-time, but (shl R, 1) must be an even number (LSB  in LowerShiftByScalarImmediate()
29287       return DAG.getNode(ISD::ADD, dl, VT, R, R);  in LowerShiftByScalarImmediate()
29292       SDValue Zeros = DAG.getConstant(0, dl, VT);  in LowerShiftByScalarImmediate()
29293       if (VT.is512BitVector()) {  in LowerShiftByScalarImmediate()
29294         assert(VT == MVT::v64i8 && "Unexpected element type!");  in LowerShiftByScalarImmediate()
29296         return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);  in LowerShiftByScalarImmediate()
29298       return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);  in LowerShiftByScalarImmediate()
29302     if (VT == MVT::v16i8 && Subtarget.hasXOP())  in LowerShiftByScalarImmediate()
29306       SDValue Mask = getGFNICtrlMask(Op.getOpcode(), DAG, dl, VT, ShiftAmt);  in LowerShiftByScalarImmediate()
29307       return DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, R, Mask,  in LowerShiftByScalarImmediate()
29315       SHL = DAG.getBitcast(VT, SHL);  in LowerShiftByScalarImmediate()
29317       APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt);  in LowerShiftByScalarImmediate()
29318       return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT));  in LowerShiftByScalarImmediate()
29324       SRL = DAG.getBitcast(VT, SRL);  in LowerShiftByScalarImmediate()
29326       APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt);  in LowerShiftByScalarImmediate()
29327       return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT));  in LowerShiftByScalarImmediate()
29331       SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);  in LowerShiftByScalarImmediate()
29333       SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);  in LowerShiftByScalarImmediate()
29334       Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);  in LowerShiftByScalarImmediate()
29335       Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);  in LowerShiftByScalarImmediate()
29346   MVT VT = Op.getSimpleValueType();  in LowerShiftByScalarVariable()  local
29353   int BaseShAmtIdx = -1;  in LowerShiftByScalarVariable()
29355     if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode))  in LowerShiftByScalarVariable()
29356       return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx,  in LowerShiftByScalarVariable()
29359     // vXi8 shifts - shift as v8i16 + mask result.  in LowerShiftByScalarVariable()
29360     if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) ||  in LowerShiftByScalarVariable()
29361          (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) ||  in LowerShiftByScalarVariable()
29362          VT == MVT::v64i8) &&  in LowerShiftByScalarVariable()
29364       unsigned NumElts = VT.getVectorNumElements();  in LowerShiftByScalarVariable()
29370         // Create the mask using vXi16 shifts. For shift-rights we need to move  in LowerShiftByScalarVariable()
29372         SDValue BitMask = DAG.getConstant(-1, dl, ExtVT);  in LowerShiftByScalarVariable()
29378         BitMask = DAG.getBitcast(VT, BitMask);  in LowerShiftByScalarVariable()
29379         BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask,  in LowerShiftByScalarVariable()
29385         Res = DAG.getBitcast(VT, Res);  in LowerShiftByScalarVariable()
29386         Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask);  in LowerShiftByScalarVariable()
29390           // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW.  in LowerShiftByScalarVariable()
29395           SignMask = DAG.getBitcast(VT, SignMask);  in LowerShiftByScalarVariable()
29396           Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask);  in LowerShiftByScalarVariable()
29397           Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask);  in LowerShiftByScalarVariable()
29411   MVT VT = Amt.getSimpleValueType();  in convertShiftLeftToScale()  local
29412   if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||  in convertShiftLeftToScale()
29413         (Subtarget.hasInt256() && VT == MVT::v16i16) ||  in convertShiftLeftToScale()
29414         (Subtarget.hasAVX512() && VT == MVT::v32i16) ||  in convertShiftLeftToScale()
29415         (!Subtarget.hasAVX512() && VT == MVT::v16i8) ||  in convertShiftLeftToScale()
29416         (Subtarget.hasInt256() && VT == MVT::v32i8) ||  in convertShiftLeftToScale()
29417         (Subtarget.hasBWI() && VT == MVT::v64i8)))  in convertShiftLeftToScale()
29420   MVT SVT = VT.getVectorElementType();  in convertShiftLeftToScale()
29422   unsigned NumElems = VT.getVectorNumElements();  in convertShiftLeftToScale()
29435     return DAG.getBuildVector(VT, dl, Elts);  in convertShiftLeftToScale()
29440   if (VT == MVT::v4i32) {  in convertShiftLeftToScale()
29441     Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT));  in convertShiftLeftToScale()
29442     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt,  in convertShiftLeftToScale()
29443                       DAG.getConstant(0x3f800000U, dl, VT));  in convertShiftLeftToScale()
29445     return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt);  in convertShiftLeftToScale()
29449   if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) {  in convertShiftLeftToScale()
29450     SDValue Z = DAG.getConstant(0, dl, VT);  in convertShiftLeftToScale()
29451     SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z));  in convertShiftLeftToScale()
29452     SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z));  in convertShiftLeftToScale()
29456       return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);  in convertShiftLeftToScale()
29457     return getPack(DAG, Subtarget, dl, VT, Lo, Hi);  in convertShiftLeftToScale()
29465   MVT VT = Op.getSimpleValueType();  in LowerShift()  local
29469   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in LowerShift()
29476   assert(VT.isVector() && "Custom lowering only for vector shifts!");  in LowerShift()
29485   if (supportedVectorVarShift(VT, Subtarget, Opc))  in LowerShift()
29491   if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) ||  in LowerShift()
29492        (VT == MVT::v4i64 && Subtarget.hasInt256())) &&  in LowerShift()
29494     SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);  in LowerShift()
29495     SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);  in LowerShift()
29496     R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);  in LowerShift()
29497     R = DAG.getNode(ISD::XOR, dl, VT, R, M);  in LowerShift()
29498     R = DAG.getNode(ISD::SUB, dl, VT, R, M);  in LowerShift()
29502   // XOP has 128-bit variable logical/arithmetic shifts.  in LowerShift()
29503   // +ve/-ve Amt = shift left/right.  in LowerShift()
29504   if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||  in LowerShift()
29505                              VT == MVT::v8i16 || VT == MVT::v16i8)) {  in LowerShift()
29507       Amt = DAG.getNegative(Amt, dl, VT);  in LowerShift()
29509       return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);  in LowerShift()
29511       return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);  in LowerShift()
29514   // 2i64 vector logical shifts can efficiently avoid scalarization - do the  in LowerShift()
29515   // shifts per-lane and then shuffle the partial results back together.  in LowerShift()
29516   if (VT == MVT::v2i64 && Opc != ISD::SRA) {  in LowerShift()
29518     SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});  in LowerShift()
29519     SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});  in LowerShift()
29520     SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);  in LowerShift()
29521     SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);  in LowerShift()
29522     return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});  in LowerShift()
29535   if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 ||  in LowerShift()
29536                       (VT == MVT::v16i16 && Subtarget.hasInt256()))) {  in LowerShift()
29538     unsigned NumElts = VT.getVectorNumElements();  in LowerShift()
29541       SDValue A = Amt->getOperand(i);  in LowerShift()
29561         (VT != MVT::v16i16 ||  in LowerShift()
29562          is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) &&  in LowerShift()
29563         (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL ||  in LowerShift()
29567       if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) &&  in LowerShift()
29568           Cst2->getAPIntValue().ult(EltSizeInBits)) {  in LowerShift()
29569         SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,  in LowerShift()
29570                                                     Cst1->getZExtValue(), DAG);  in LowerShift()
29571         SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R,  in LowerShift()
29572                                                     Cst2->getZExtValue(), DAG);  in LowerShift()
29573         return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask);  in LowerShift()
29581   if (Opc == ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() ||  in LowerShift()
29584       return DAG.getNode(ISD::MUL, dl, VT, R, Scale);  in LowerShift()
29587   // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt).  in LowerShift()
29589       (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) {  in LowerShift()
29590     SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);  in LowerShift()
29591     SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);  in LowerShift()
29593       SDValue Zero = DAG.getConstant(0, dl, VT);  in LowerShift()
29594       SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ);  in LowerShift()
29595       SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale);  in LowerShift()
29596       return DAG.getSelect(dl, VT, ZAmt, R, Res);  in LowerShift()
29601   // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt).  in LowerShift()
29603   // of these cases in pre-SSE41/XOP/AVX512 but not both.  in LowerShift()
29605       (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) &&  in LowerShift()
29609     SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT);  in LowerShift()
29610     SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt);  in LowerShift()
29613           DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ);  in LowerShift()
29615           DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ);  in LowerShift()
29617           getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG);  in LowerShift()
29618       SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale);  in LowerShift()
29619       Res = DAG.getSelect(dl, VT, Amt0, R, Res);  in LowerShift()
29620       return DAG.getSelect(dl, VT, Amt1, Sra1, Res);  in LowerShift()
29626   // immediate shifts, else we need to zero-extend each lane to the lower i64  in LowerShift()
29629   if (VT == MVT::v4i32) {  in LowerShift()
29632       Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});  in LowerShift()
29633       Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});  in LowerShift()
29634       Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});  in LowerShift()
29635       Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});  in LowerShift()
29639       // just zero-extending, but for SSE just duplicating the top 16-bits is  in LowerShift()
29642         SDValue Z = DAG.getConstant(0, dl, VT);  in LowerShift()
29643         Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});  in LowerShift()
29644         Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});  in LowerShift()
29645         Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});  in LowerShift()
29646         Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});  in LowerShift()
29650                                              {4, 5, 6, 7, -1, -1, -1, -1});  in LowerShift()
29661     SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0));  in LowerShift()
29662     SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1));  in LowerShift()
29663     SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2));  in LowerShift()
29664     SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3));  in LowerShift()
29667     // TODO - ideally shuffle combining would handle this.  in LowerShift()
29669       SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});  in LowerShift()
29670       SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});  in LowerShift()
29671       return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});  in LowerShift()
29673     SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5});  in LowerShift()
29674     SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7});  in LowerShift()
29675     return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7});  in LowerShift()
29681   // NOTE: We honor prefered vector width before promoting to 512-bits.  in LowerShift()
29682   if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||  in LowerShift()
29683       (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) ||  in LowerShift()
29684       (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) ||  in LowerShift()
29685       (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) ||  in LowerShift()
29686       (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) {  in LowerShift()
29687     assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) &&  in LowerShift()
29690     MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());  in LowerShift()
29694     return DAG.getNode(ISD::TRUNCATE, dl, VT,  in LowerShift()
29701       (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||  in LowerShift()
29702        (VT == MVT::v64i8 && Subtarget.hasBWI())) &&  in LowerShift()
29704     int NumElts = VT.getVectorNumElements();  in LowerShift()
29716     if (VT == MVT::v16i8 && Subtarget.hasInt256()) {  in LowerShift()
29721       return DAG.getZExtOrTrunc(R, dl, VT);  in LowerShift()
29736     SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R));  in LowerShift()
29737     SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R));  in LowerShift()
29744     return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR);  in LowerShift()
29747   if (VT == MVT::v16i8 ||  in LowerShift()
29748       (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) ||  in LowerShift()
29749       (VT == MVT::v64i8 && Subtarget.hasBWI())) {  in LowerShift()
29750     MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);  in LowerShift()
29753       if (VT.is512BitVector()) {  in LowerShift()
29757         MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());  in LowerShift()
29758         V0 = DAG.getBitcast(VT, V0);  in LowerShift()
29759         V1 = DAG.getBitcast(VT, V1);  in LowerShift()
29760         Sel = DAG.getBitcast(VT, Sel);  in LowerShift()
29761         Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel,  in LowerShift()
29763         return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1));  in LowerShift()
29767         V0 = DAG.getBitcast(VT, V0);  in LowerShift()
29768         V1 = DAG.getBitcast(VT, V1);  in LowerShift()
29769         Sel = DAG.getBitcast(VT, Sel);  in LowerShift()
29771                               DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1));  in LowerShift()
29773       // On pre-SSE41 targets we test for the sign bit by comparing to  in LowerShift()
29774       // zero - a negative value will set all bits of the lanes to true  in LowerShift()
29786     Amt = DAG.getBitcast(VT, Amt);  in LowerShift()
29790       SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT));  in LowerShift()
29791       R = SignBitSelect(VT, Amt, M, R);  in LowerShift()
29794       Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);  in LowerShift()
29797       M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT));  in LowerShift()
29798       R = SignBitSelect(VT, Amt, M, R);  in LowerShift()
29801       Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);  in LowerShift()
29804       M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT));  in LowerShift()
29805       R = SignBitSelect(VT, Amt, M, R);  in LowerShift()
29813       SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt);  in LowerShift()
29814       SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt);  in LowerShift()
29815       SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R);  in LowerShift()
29816       SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R);  in LowerShift()
29852       return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);  in LowerShift()
29856   if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {  in LowerShift()
29858     SDValue Z = DAG.getConstant(0, dl, VT);  in LowerShift()
29859     SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z);  in LowerShift()
29860     SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z);  in LowerShift()
29861     SDValue RLo = getUnpackl(DAG, dl, VT, Z, R);  in LowerShift()
29862     SDValue RHi = getUnpackh(DAG, dl, VT, Z, R);  in LowerShift()
29871     return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);  in LowerShift()
29874   if (VT == MVT::v8i16) {  in LowerShift()
29875     // If we have a constant shift amount, the non-SSE41 path is best as  in LowerShift()
29884         MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2);  in LowerShift()
29889             VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1));  in LowerShift()
29891       // On pre-SSE41 targets we splat the sign bit - a negative value will  in LowerShift()
29895           getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG);  in LowerShift()
29896       return DAG.getSelect(dl, VT, C, V0, V1);  in LowerShift()
29904           ISD::OR, dl, VT,  in LowerShift()
29905           getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG),  in LowerShift()
29906           getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG));  in LowerShift()
29908       Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG);  in LowerShift()
29912     SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG);  in LowerShift()
29916     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);  in LowerShift()
29919     M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG);  in LowerShift()
29923     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);  in LowerShift()
29926     M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG);  in LowerShift()
29930     Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt);  in LowerShift()
29933     M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG);  in LowerShift()
29938   // Decompose 256-bit shifts into 128-bit shifts.  in LowerShift()
29939   if (VT.is256BitVector())  in LowerShift()
29942   if (VT == MVT::v32i16 || VT == MVT::v64i8)  in LowerShift()
29950   MVT VT = Op.getSimpleValueType();  in LowerFunnelShift()  local
29958   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in LowerFunnelShift()
29961   if (VT.isVector()) {  in LowerFunnelShift()
29964     unsigned NumElts = VT.getVectorNumElements();  in LowerFunnelShift()
29973         return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,  in LowerFunnelShift()
29976       return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,  in LowerFunnelShift()
29979     assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||  in LowerFunnelShift()
29980             VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 ||  in LowerFunnelShift()
29981             VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) &&  in LowerFunnelShift()
29984     // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.  in LowerFunnelShift()
29985     // fshr(x,y,z) -> unpack(y,x) >> (z & (bw-1))).  in LowerFunnelShift()
29991       uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt;  in LowerFunnelShift()
29992       uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt);  in LowerFunnelShift()
29998            (useVPTERNLOG(Subtarget, VT) &&  in LowerFunnelShift()
30001         // bit-select - lower using vXi16 shifts and then perform the bitmask at  in LowerFunnelShift()
30003         APInt MaskX = APInt::getHighBitsSet(8, 8 - ShXAmt);  in LowerFunnelShift()
30004         APInt MaskY = APInt::getLowBitsSet(8, 8 - ShYAmt);  in LowerFunnelShift()
30011         ShX = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShX),  in LowerFunnelShift()
30012                           DAG.getConstant(MaskX, DL, VT));  in LowerFunnelShift()
30013         ShY = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShY),  in LowerFunnelShift()
30014                           DAG.getConstant(MaskY, DL, VT));  in LowerFunnelShift()
30015         return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);  in LowerFunnelShift()
30018       SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, Op0,  in LowerFunnelShift()
30019                                 DAG.getShiftAmountConstant(ShXAmt, VT, DL));  in LowerFunnelShift()
30020       SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Op1,  in LowerFunnelShift()
30021                                 DAG.getShiftAmountConstant(ShYAmt, VT, DL));  in LowerFunnelShift()
30022       return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);  in LowerFunnelShift()
30025     SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);  in LowerFunnelShift()
30026     SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);  in LowerFunnelShift()
30037     // Split 256-bit integers on XOP/pre-AVX2 targets.  in LowerFunnelShift()
30038     // Split 512-bit integers on non 512-bit BWI targets.  in LowerFunnelShift()
30039     if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) ||  in LowerFunnelShift()
30041         (VT.is512BitVector() && !Subtarget.useBWIRegs() &&  in LowerFunnelShift()
30043       // Pre-mask the amount modulo using the wider vector.  in LowerFunnelShift()
30044       Op = DAG.getNode(Op.getOpcode(), DL, VT, Op0, Op1, AmtMod);  in LowerFunnelShift()
30050       int ScalarAmtIdx = -1;  in LowerFunnelShift()
30056         SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));  in LowerFunnelShift()
30057         SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));  in LowerFunnelShift()
30062         return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);  in LowerFunnelShift()
30070     // If per-element shifts are legal, fallback to generic expansion.  in LowerFunnelShift()
30071     if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) || Subtarget.hasXOP())  in LowerFunnelShift()
30075     // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.  in LowerFunnelShift()
30076     // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).  in LowerFunnelShift()
30089       return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);  in LowerFunnelShift()
30092     // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)  in LowerFunnelShift()
30095       SDValue Z = DAG.getConstant(0, DL, VT);  in LowerFunnelShift()
30096       SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));  in LowerFunnelShift()
30097       SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));  in LowerFunnelShift()
30098       SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));  in LowerFunnelShift()
30099       SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));  in LowerFunnelShift()
30102       return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);  in LowerFunnelShift()
30109       (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&  in LowerFunnelShift()
30116   // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.  in LowerFunnelShift()
30117   // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).  in LowerFunnelShift()
30118   if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&  in LowerFunnelShift()
30120     SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());  in LowerFunnelShift()
30133     return DAG.getZExtOrTrunc(Res, DL, VT);  in LowerFunnelShift()
30136   if (VT == MVT::i8 || ExpandFunnel)  in LowerFunnelShift()
30140   if (VT == MVT::i16) {  in LowerFunnelShift()
30144     return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);  in LowerFunnelShift()
30152   MVT VT = Op.getSimpleValueType();  in LowerRotate()  local
30153   assert(VT.isVector() && "Custom lowering only for vector rotates!");  in LowerRotate()
30159   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in LowerRotate()
30160   int NumElts = VT.getVectorNumElements();  in LowerRotate()
30179       return DAG.getNode(RotOpc, DL, VT, R,  in LowerRotate()
30183     // Else, fall-back on VPROLV/VPRORV.  in LowerRotate()
30187   // AVX512 VBMI2 vXi16 - lower to funnel shifts.  in LowerRotate()
30190     return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);  in LowerRotate()
30193   SDValue Z = DAG.getConstant(0, DL, VT);  in LowerRotate()
30198     if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt}))  in LowerRotate()
30199       return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt);  in LowerRotate()
30203       return DAG.getNode(ISD::ROTL, DL, VT, R,  in LowerRotate()
30204                          DAG.getNode(ISD::SUB, DL, VT, Z, Amt));  in LowerRotate()
30208   if (IsCstSplat && Subtarget.hasGFNI() && VT.getScalarType() == MVT::i8 &&  in LowerRotate()
30209       DAG.getTargetLoweringInfo().isTypeLegal(VT)) {  in LowerRotate()
30211     SDValue Mask = getGFNICtrlMask(Opcode, DAG, DL, VT, RotAmt);  in LowerRotate()
30212     return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, R, Mask,  in LowerRotate()
30216   // Split 256-bit integers on XOP/pre-AVX2 targets.  in LowerRotate()
30217   if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2()))  in LowerRotate()
30220   // XOP has 128-bit vector variable + immediate rotates.  in LowerRotate()
30221   // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.  in LowerRotate()
30225     assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");  in LowerRotate()
30230       return DAG.getNode(X86ISD::VROTLI, DL, VT, R,  in LowerRotate()
30234     // Use general rotate by variable (per-element).  in LowerRotate()
30238   // Rotate by an uniform constant - expand back to shifts.  in LowerRotate()
30243     uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);  in LowerRotate()
30244     uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;  in LowerRotate()
30245     SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,  in LowerRotate()
30246                               DAG.getShiftAmountConstant(ShlAmt, VT, DL));  in LowerRotate()
30247     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,  in LowerRotate()
30248                               DAG.getShiftAmountConstant(SrlAmt, VT, DL));  in LowerRotate()
30249     return DAG.getNode(ISD::OR, DL, VT, Shl, Srl);  in LowerRotate()
30252   // Split 512-bit integers on non 512-bit BWI targets.  in LowerRotate()
30253   if (VT.is512BitVector() && !Subtarget.useBWIRegs())  in LowerRotate()
30257       (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 ||  in LowerRotate()
30258        ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) &&  in LowerRotate()
30260        ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) &&  in LowerRotate()
30266   SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);  in LowerRotate()
30267   SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);  in LowerRotate()
30270   // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.  in LowerRotate()
30271   // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).  in LowerRotate()
30273     int BaseRotAmtIdx = -1;  in LowerRotate()
30277         return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);  in LowerRotate()
30280       SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));  in LowerRotate()
30281       SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));  in LowerRotate()
30286       return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);  in LowerRotate()
30294   // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.  in LowerRotate()
30295   // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).  in LowerRotate()
30296   // Const vXi16/vXi32 are excluded in favor of MUL-based lowering.  in LowerRotate()
30298       !supportedVectorVarShift(VT, Subtarget, ShiftOpc) &&  in LowerRotate()
30300     SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));  in LowerRotate()
30301     SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));  in LowerRotate()
30302     SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));  in LowerRotate()
30303     SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));  in LowerRotate()
30306     return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);  in LowerRotate()
30317     // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.  in LowerRotate()
30318     // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))).  in LowerRotate()
30333       return DAG.getNode(ISD::TRUNCATE, DL, VT, R);  in LowerRotate()
30341         V0 = DAG.getBitcast(VT, V0);  in LowerRotate()
30342         V1 = DAG.getBitcast(VT, V1);  in LowerRotate()
30343         Sel = DAG.getBitcast(VT, Sel);  in LowerRotate()
30345                               DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1));  in LowerRotate()
30347       // On pre-SSE41 targets we test for the sign bit by comparing to  in LowerRotate()
30348       // zero - a negative value will set all bits of the lanes to true  in LowerRotate()
30356     if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) {  in LowerRotate()
30357       Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);  in LowerRotate()
30369     Amt = DAG.getBitcast(VT, Amt);  in LowerRotate()
30374         ISD::OR, DL, VT,  in LowerRotate()
30375         DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(4, DL, VT)),  in LowerRotate()
30376         DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(4, DL, VT)));  in LowerRotate()
30377     R = SignBitSelect(VT, Amt, M, R);  in LowerRotate()
30380     Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt);  in LowerRotate()
30384         ISD::OR, DL, VT,  in LowerRotate()
30385         DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(2, DL, VT)),  in LowerRotate()
30386         DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(6, DL, VT)));  in LowerRotate()
30387     R = SignBitSelect(VT, Amt, M, R);  in LowerRotate()
30390     Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt);  in LowerRotate()
30394         ISD::OR, DL, VT,  in LowerRotate()
30395         DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(1, DL, VT)),  in LowerRotate()
30396         DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(7, DL, VT)));  in LowerRotate()
30397     return SignBitSelect(VT, Amt, M, R);  in LowerRotate()
30401   bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&  in LowerRotate()
30402                         supportedVectorVarShift(VT, Subtarget, ISD::SRL);  in LowerRotate()
30405   // Fallback for non-constants AVX2 vXi16 as well.  in LowerRotate()
30407     Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);  in LowerRotate()
30408     SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT);  in LowerRotate()
30409     AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt);  in LowerRotate()
30410     SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt);  in LowerRotate()
30411     SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR);  in LowerRotate()
30412     return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);  in LowerRotate()
30417     Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt);  in LowerRotate()
30422   Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);  in LowerRotate()
30434     SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale);  in LowerRotate()
30435     SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale);  in LowerRotate()
30436     return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);  in LowerRotate()
30440   // to v2i64 results at a time. The upper 32-bits contain the wrapped bits  in LowerRotate()
30441   // that can then be OR'd with the lower 32-bits.  in LowerRotate()
30442   assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected");  in LowerRotate()
30443   static const int OddMask[] = {1, -1, 3, -1};  in LowerRotate()
30444   SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask);  in LowerRotate()
30445   SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask);  in LowerRotate()
30453   Res02 = DAG.getBitcast(VT, Res02);  in LowerRotate()
30454   Res13 = DAG.getBitcast(VT, Res13);  in LowerRotate()
30456   return DAG.getNode(ISD::OR, DL, VT,  in LowerRotate()
30457                      DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}),  in LowerRotate()
30458                      DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7}));  in LowerRotate()
30466   unsigned OpWidth = MemType->getPrimitiveSizeInBits();  in needsCmpXchgNb()
30478   Type *MemType = SI->getValueOperand()->getType();  in shouldExpandAtomicStoreInIR()
30480   if (!SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&  in shouldExpandAtomicStoreInIR()
30482     if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&  in shouldExpandAtomicStoreInIR()
30486     if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&  in shouldExpandAtomicStoreInIR()
30498   Type *MemType = LI->getType();  in shouldExpandAtomicLoadInIR()
30500   if (!LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&  in shouldExpandAtomicLoadInIR()
30502     // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we  in shouldExpandAtomicLoadInIR()
30503     // can use movq to do the load. If we have X87 we can load into an 80-bit  in shouldExpandAtomicLoadInIR()
30505     if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&  in shouldExpandAtomicLoadInIR()
30509     // If this is a 128-bit load with AVX, 128-bit SSE loads/stores are atomic.  in shouldExpandAtomicLoadInIR()
30510     if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() &&  in shouldExpandAtomicLoadInIR()
30533     if (isPowerOf2_64(C->getZExtValue()))  in FindSingleBitChange()
30535     else if (isPowerOf2_64((~C->getValue()).getZExtValue()))  in FindSingleBitChange()
30540   // Check if V is some power of 2 pattern known to be non-zero  in FindSingleBitChange()
30559     if (I->getOpcode() == Instruction::Shl) {  in FindSingleBitChange()
30561       // -X` and some other provable power of 2 patterns that we can use CTZ on  in FindSingleBitChange()
30564       // non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also  in FindSingleBitChange()
30565       // be provably a non-zero power of 2.  in FindSingleBitChange()
30568       auto *ShiftVal = dyn_cast<ConstantInt>(I->getOperand(0));  in FindSingleBitChange()
30571       if (ShiftVal->equalsInt(1))  in FindSingleBitChange()
30577       Value *BitV = I->getOperand(1);  in FindSingleBitChange()
30583         if (*AndC == (I->getType()->getPrimitiveSizeInBits() - 1))  in FindSingleBitChange()
30597   if (AI->use_empty())  in shouldExpandLogicAtomicRMWInIR()
30600   if (AI->getOperation() == AtomicRMWInst::Xor) {  in shouldExpandLogicAtomicRMWInIR()
30601     // A ^ SignBit -> A + SignBit. This allows us to use `xadd` which is  in shouldExpandLogicAtomicRMWInIR()
30603     if (match(AI->getOperand(1), m_SignMask()))  in shouldExpandLogicAtomicRMWInIR()
30609   // Note: InstCombinePass can cause a de-optimization here. It replaces the  in shouldExpandLogicAtomicRMWInIR()
30613   Instruction *I = AI->user_back();  in shouldExpandLogicAtomicRMWInIR()
30614   auto BitChange = FindSingleBitChange(AI->getValOperand());  in shouldExpandLogicAtomicRMWInIR()
30615   if (BitChange.second == UndefBit || !AI->hasOneUse() ||  in shouldExpandLogicAtomicRMWInIR()
30616       I->getOpcode() != Instruction::And ||  in shouldExpandLogicAtomicRMWInIR()
30617       AI->getType()->getPrimitiveSizeInBits() == 8 ||  in shouldExpandLogicAtomicRMWInIR()
30618       AI->getParent() != I->getParent())  in shouldExpandLogicAtomicRMWInIR()
30621   unsigned OtherIdx = I->getOperand(0) == AI ? 1 : 0;  in shouldExpandLogicAtomicRMWInIR()
30624   if (AI == I->getOperand(OtherIdx))  in shouldExpandLogicAtomicRMWInIR()
30629     auto *C1 = cast<ConstantInt>(AI->getValOperand());  in shouldExpandLogicAtomicRMWInIR()
30630     auto *C2 = dyn_cast<ConstantInt>(I->getOperand(OtherIdx));  in shouldExpandLogicAtomicRMWInIR()
30631     if (!C2 || !isPowerOf2_64(C2->getZExtValue())) {  in shouldExpandLogicAtomicRMWInIR()
30634     if (AI->getOperation() == AtomicRMWInst::And) {  in shouldExpandLogicAtomicRMWInIR()
30635       return ~C1->getValue() == C2->getValue()  in shouldExpandLogicAtomicRMWInIR()
30645   auto BitTested = FindSingleBitChange(I->getOperand(OtherIdx));  in shouldExpandLogicAtomicRMWInIR()
30657   if (AI->getOperation() == AtomicRMWInst::And)  in shouldExpandLogicAtomicRMWInIR()
30673   switch (AI->getOperation()) {  in emitBitTestAtomicRMWIntrinsic()
30689   Instruction *I = AI->user_back();  in emitBitTestAtomicRMWIntrinsic()
30690   LLVMContext &Ctx = AI->getContext();  in emitBitTestAtomicRMWIntrinsic()
30691   Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),  in emitBitTestAtomicRMWIntrinsic()
30695   auto BitTested = FindSingleBitChange(AI->getValOperand());  in emitBitTestAtomicRMWIntrinsic()
30699     auto *C = cast<ConstantInt>(I->getOperand(I->getOperand(0) == AI ? 1 : 0));  in emitBitTestAtomicRMWIntrinsic()
30701     BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType());  in emitBitTestAtomicRMWIntrinsic()
30703     unsigned Imm = llvm::countr_zero(C->getZExtValue());  in emitBitTestAtomicRMWIntrinsic()
30706     BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType());  in emitBitTestAtomicRMWIntrinsic()
30715     unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits();  in emitBitTestAtomicRMWIntrinsic()
30717         Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1));  in emitBitTestAtomicRMWIntrinsic()
30725     Result = Builder.CreateZExtOrTrunc(Result, AI->getType());  in emitBitTestAtomicRMWIntrinsic()
30727     // If the result is only used for zero/non-zero status then we don't need to  in emitBitTestAtomicRMWIntrinsic()
30729     for (auto It = I->user_begin(); It != I->user_end(); ++It) {  in emitBitTestAtomicRMWIntrinsic()
30731         if (ICmp->isEquality()) {  in emitBitTestAtomicRMWIntrinsic()
30732           auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0));  in emitBitTestAtomicRMWIntrinsic()
30733           auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1));  in emitBitTestAtomicRMWIntrinsic()
30736             if ((C0 ? C0 : C1)->isZero())  in emitBitTestAtomicRMWIntrinsic()
30746   I->replaceAllUsesWith(Result);  in emitBitTestAtomicRMWIntrinsic()
30747   I->eraseFromParent();  in emitBitTestAtomicRMWIntrinsic()
30748   AI->eraseFromParent();  in emitBitTestAtomicRMWIntrinsic()
30753   if (!AI->hasOneUse())  in shouldExpandCmpArithRMWInIR()
30756   Value *Op = AI->getOperand(1);  in shouldExpandCmpArithRMWInIR()
30758   Instruction *I = AI->user_back();  in shouldExpandCmpArithRMWInIR()
30759   AtomicRMWInst::BinOp Opc = AI->getOperation();  in shouldExpandCmpArithRMWInIR()
30764       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))  in shouldExpandCmpArithRMWInIR()
30766       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))  in shouldExpandCmpArithRMWInIR()
30775       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))  in shouldExpandCmpArithRMWInIR()
30777       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))  in shouldExpandCmpArithRMWInIR()
30786     if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))  in shouldExpandCmpArithRMWInIR()
30789     if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))  in shouldExpandCmpArithRMWInIR()
30797       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))  in shouldExpandCmpArithRMWInIR()
30799       if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes())))  in shouldExpandCmpArithRMWInIR()
30813   LLVMContext &Ctx = AI->getContext();  in emitCmpArithAtomicRMWIntrinsic()
30814   ICmpInst *ICI = dyn_cast<ICmpInst>(AI->user_back());  in emitCmpArithAtomicRMWIntrinsic()
30816     TempI = AI->user_back();  in emitCmpArithAtomicRMWIntrinsic()
30817     assert(TempI->hasOneUse() && "Must have one use");  in emitCmpArithAtomicRMWIntrinsic()
30818     ICI = cast<ICmpInst>(TempI->user_back());  in emitCmpArithAtomicRMWIntrinsic()
30821   ICmpInst::Predicate Pred = ICI->getPredicate();  in emitCmpArithAtomicRMWIntrinsic()
30839   switch (AI->getOperation()) {  in emitCmpArithAtomicRMWIntrinsic()
30859       Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType());  in emitCmpArithAtomicRMWIntrinsic()
30860   Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(),  in emitCmpArithAtomicRMWIntrinsic()
30863       CmpArith, {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)});  in emitCmpArithAtomicRMWIntrinsic()
30865   ICI->replaceAllUsesWith(Result);  in emitCmpArithAtomicRMWIntrinsic()
30866   ICI->eraseFromParent();  in emitCmpArithAtomicRMWIntrinsic()
30868     TempI->eraseFromParent();  in emitCmpArithAtomicRMWIntrinsic()
30869   AI->eraseFromParent();  in emitCmpArithAtomicRMWIntrinsic()
30875   Type *MemType = AI->getType();  in shouldExpandAtomicRMWInIR()
30879   if (MemType->getPrimitiveSizeInBits() > NativeWidth) {  in shouldExpandAtomicRMWInIR()
30884   AtomicRMWInst::BinOp Op = AI->getOperation();  in shouldExpandAtomicRMWInIR()
30912     // These always require a non-trivial set of data operations on x86. We must  in shouldExpandAtomicRMWInIR()
30921   Type *MemType = AI->getType();  in lowerIdempotentRMWIntoFencedLoad()
30925   if (MemType->getPrimitiveSizeInBits() > NativeWidth)  in lowerIdempotentRMWIntoFencedLoad()
30931   if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))  in lowerIdempotentRMWIntoFencedLoad()
30932     if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&  in lowerIdempotentRMWIntoFencedLoad()
30933         AI->use_empty())  in lowerIdempotentRMWIntoFencedLoad()
30938   Module *M = Builder.GetInsertBlock()->getParent()->getParent();  in lowerIdempotentRMWIntoFencedLoad()
30939   auto SSID = AI->getSyncScopeID();  in lowerIdempotentRMWIntoFencedLoad()
30942   auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());  in lowerIdempotentRMWIntoFencedLoad()
30945   // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence  in lowerIdempotentRMWIntoFencedLoad()
30967     // different cache-line to prevent cache-line bouncing. In practice it  in lowerIdempotentRMWIntoFencedLoad()
30978       AI->getType(), AI->getPointerOperand(), AI->getAlign());  in lowerIdempotentRMWIntoFencedLoad()
30979   Loaded->setAtomic(Order, SSID);  in lowerIdempotentRMWIntoFencedLoad()
30980   AI->replaceAllUsesWith(Loaded);  in lowerIdempotentRMWIntoFencedLoad()
30981   AI->eraseFromParent();  in lowerIdempotentRMWIntoFencedLoad()
30996   // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,  in emitLockedStackOp()
31007   //   c) To minimize concerns about cross thread stack usage - in particular,  in emitLockedStackOp()
31009   //      captures state in the TOS frame and accesses it from many threads -  in emitLockedStackOp()
31014   // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/  in emitLockedStackOp()
31018   const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0;  in emitLockedStackOp()
31058   // The only fence that needs an instruction is a sequentially-consistent  in LowerATOMIC_FENCE()
31059   // cross-thread fence.  in LowerATOMIC_FENCE()
31069   // MEMBARRIER is a compiler barrier; it codegens to a no-op.  in LowerATOMIC_FENCE()
31097   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();  in LowerCMP_SWAP()
31107   return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(),  in LowerCMP_SWAP()
31149     assert(!Subtarget.is64Bit() && "Expected 32-bit mode");  in LowerBITCAST()
31161     assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512");  in LowerBITCAST()
31170           SrcVT == MVT::i64) && "Unexpected VT!");  in LowerBITCAST()
31202 /// Compute the horizontal sum of bytes in V for the elements of VT.
31204 /// Requires V to be a byte vector and VT to be an integer vector type with
31205 /// wider elements than V's type. The width of the elements of VT determines
31208 static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,  in LowerHorizontalByteSum()  argument
31213   MVT EltVT = VT.getVectorElementType();  in LowerHorizontalByteSum()
31218   unsigned VecSize = VT.getSizeInBits();  in LowerHorizontalByteSum()
31227     return DAG.getBitcast(VT, V);  in LowerHorizontalByteSum()
31231     // We unpack the low half and high half into i32s interleaved with zeros so  in LowerHorizontalByteSum()
31236     SDValue Zeros = DAG.getConstant(0, DL, VT);  in LowerHorizontalByteSum()
31237     SDValue V32 = DAG.getBitcast(VT, V);  in LowerHorizontalByteSum()
31238     SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros);  in LowerHorizontalByteSum()
31239     SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros);  in LowerHorizontalByteSum()  local
31246     High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,  in LowerHorizontalByteSum()
31247                        DAG.getBitcast(ByteVecVT, High), Zeros);  in LowerHorizontalByteSum()
31253                     DAG.getBitcast(ShortVecVT, High));  in LowerHorizontalByteSum()
31255     return DAG.getBitcast(VT, V);  in LowerHorizontalByteSum()
31265   SDValue ShifterV = DAG.getConstant(8, DL, VT);  in LowerHorizontalByteSum()
31266   SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV);  in LowerHorizontalByteSum()
31269   return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV);  in LowerHorizontalByteSum()
31275   MVT VT = Op.getSimpleValueType();  in LowerVectorCTPOPInRegLUT()  local
31276   MVT EltVT = VT.getVectorElementType();  in LowerVectorCTPOPInRegLUT()
31277   int NumElts = VT.getVectorNumElements();  in LowerVectorCTPOPInRegLUT()
31282   // http://wm.ite.pl/articles/sse-popcount.html  in LowerVectorCTPOPInRegLUT()
31285   // index into a in-register pre-computed pop count table. We then split up the  in LowerVectorCTPOPInRegLUT()
31286   // input vector in two new ones: (1) a vector with only the shifted-right  in LowerVectorCTPOPInRegLUT()
31289   // to index the in-register table. Next, both are added and the result is a  in LowerVectorCTPOPInRegLUT()
31299   SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec);  in LowerVectorCTPOPInRegLUT()
31300   SDValue M0F = DAG.getConstant(0x0F, DL, VT);  in LowerVectorCTPOPInRegLUT()
31302   // High nibbles  in LowerVectorCTPOPInRegLUT()
31303   SDValue FourV = DAG.getConstant(4, DL, VT);  in LowerVectorCTPOPInRegLUT()
31304   SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV);  in LowerVectorCTPOPInRegLUT()
31307   SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F);  in LowerVectorCTPOPInRegLUT()
31310   // LUT. After counting low and high nibbles, add the vector to obtain the  in LowerVectorCTPOPInRegLUT()
31312   SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles);  in LowerVectorCTPOPInRegLUT()
31313   SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles);  in LowerVectorCTPOPInRegLUT()
31314   return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt);  in LowerVectorCTPOPInRegLUT()
31322   MVT VT = Op.getSimpleValueType();  in LowerVectorCTPOP()  local
31323   assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&  in LowerVectorCTPOP()
31329     unsigned NumElems = VT.getVectorNumElements();  in LowerVectorCTPOP()
31330     assert((VT.getVectorElementType() == MVT::i8 ||  in LowerVectorCTPOP()
31331             VT.getVectorElementType() == MVT::i16) && "Unexpected type");  in LowerVectorCTPOP()
31336       return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);  in LowerVectorCTPOP()
31340   // Decompose 256-bit ops into smaller 128-bit ops.  in LowerVectorCTPOP()
31341   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerVectorCTPOP()
31344   // Decompose 512-bit ops into smaller 256-bit ops.  in LowerVectorCTPOP()
31345   if (VT.is512BitVector() && !Subtarget.hasBWI())  in LowerVectorCTPOP()
31349   if (VT.getScalarType() != MVT::i8) {  in LowerVectorCTPOP()
31350     MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);  in LowerVectorCTPOP()
31353     return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG);  in LowerVectorCTPOP()
31365   MVT VT = N.getSimpleValueType();  in LowerCTPOP()  local
31369   if (VT.isScalarInteger()) {  in LowerCTPOP()
31377     unsigned ActiveBits = Known.getBitWidth() - LZ;  in LowerCTPOP()
31378     unsigned ShiftedActiveBits = Known.getBitWidth() - (LZ + TZ);  in LowerCTPOP()
31380     // i2 CTPOP - "ctpop(x) --> sub(x, (x >> 1))".  in LowerCTPOP()
31383         Op = DAG.getNode(ISD::SRL, DL, VT, Op,  in LowerCTPOP()
31384                          DAG.getShiftAmountConstant(TZ, VT, DL));  in LowerCTPOP()
31388                                    DAG.getShiftAmountConstant(1, VT, DL)));  in LowerCTPOP()
31389       return DAG.getZExtOrTrunc(Op, DL, VT);  in LowerCTPOP()
31392     // i3 CTPOP - perform LUT into i32 integer.  in LowerCTPOP()
31395         Op = DAG.getNode(ISD::SRL, DL, VT, Op,  in LowerCTPOP()
31396                          DAG.getShiftAmountConstant(TZ, VT, DL));  in LowerCTPOP()
31399                        DAG.getShiftAmountConstant(1, VT, DL));  in LowerCTPOP()
31404       return DAG.getZExtOrTrunc(Op, DL, VT);  in LowerCTPOP()
31407     // i4 CTPOP - perform LUT into i64 integer.  in LowerCTPOP()
31412         Op = DAG.getNode(ISD::SRL, DL, VT, Op,  in LowerCTPOP()
31413                          DAG.getShiftAmountConstant(TZ, VT, DL));  in LowerCTPOP()
31421       return DAG.getZExtOrTrunc(Op, DL, VT);  in LowerCTPOP()
31424     // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.  in LowerCTPOP()
31428         Op = DAG.getNode(ISD::SRL, DL, VT, Op,  in LowerCTPOP()
31429                          DAG.getShiftAmountConstant(TZ, VT, DL));  in LowerCTPOP()
31439       return DAG.getZExtOrTrunc(Op, DL, VT);  in LowerCTPOP()
31445   assert(VT.isVector() &&  in LowerCTPOP()
31451   MVT VT = Op.getSimpleValueType();  in LowerBITREVERSE_XOP()  local
31457   if (!VT.isVector()) {  in LowerBITREVERSE_XOP()
31458     MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());  in LowerBITREVERSE_XOP()
31461     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res,  in LowerBITREVERSE_XOP()
31465   int NumElts = VT.getVectorNumElements();  in LowerBITREVERSE_XOP()
31466   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;  in LowerBITREVERSE_XOP()
31468   // Decompose 256-bit ops into smaller 128-bit ops.  in LowerBITREVERSE_XOP()
31469   if (VT.is256BitVector())  in LowerBITREVERSE_XOP()
31472   assert(VT.is128BitVector() &&  in LowerBITREVERSE_XOP()
31473          "Only 128-bit vector bitreverse lowering supported.");  in LowerBITREVERSE_XOP()
31481     for (int j = ScalarSizeInBytes - 1; j >= 0; --j) {  in LowerBITREVERSE_XOP()
31492   return DAG.getBitcast(VT, Res);  in LowerBITREVERSE_XOP()
31497   MVT VT = Op.getSimpleValueType();  in LowerBITREVERSE()  local
31499   if (Subtarget.hasXOP() && !VT.is512BitVector())  in LowerBITREVERSE()
31507   // Split 512-bit ops without BWI so that we can still use the PSHUFB lowering.  in LowerBITREVERSE()
31508   if (VT.is512BitVector() && !Subtarget.hasBWI())  in LowerBITREVERSE()
31511   // Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.  in LowerBITREVERSE()
31512   if (VT.is256BitVector() && !Subtarget.hasInt256())  in LowerBITREVERSE()
31516   if (!VT.isVector()) {  in LowerBITREVERSE()
31518         (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) &&  in LowerBITREVERSE()
31520     MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits());  in LowerBITREVERSE()
31524     Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,  in LowerBITREVERSE()
31526     return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res);  in LowerBITREVERSE()
31529   assert(VT.isVector() && VT.getSizeInBits() >= 128);  in LowerBITREVERSE()
31532   if (VT.getScalarType() != MVT::i8) {  in LowerBITREVERSE()
31533     MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);  in LowerBITREVERSE()
31534     SDValue Res = DAG.getNode(ISD::BSWAP, DL, VT, In);  in LowerBITREVERSE()
31537     return DAG.getBitcast(VT, Res);  in LowerBITREVERSE()
31539   assert(VT.isVector() && VT.getScalarType() == MVT::i8 &&  in LowerBITREVERSE()
31542   unsigned NumElts = VT.getVectorNumElements();  in LowerBITREVERSE()
31546     SDValue Matrix = getGFNICtrlMask(ISD::BITREVERSE, DAG, DL, VT);  in LowerBITREVERSE()
31547     return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix,  in LowerBITREVERSE()
31553   // 0-15 value (moved to the other nibble).  in LowerBITREVERSE()
31554   SDValue NibbleMask = DAG.getConstant(0xF, DL, VT);  in LowerBITREVERSE()
31555   SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask);  in LowerBITREVERSE()
31556   SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT));  in LowerBITREVERSE()
31575   SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts);  in LowerBITREVERSE()
31576   SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts);  in LowerBITREVERSE()
31577   Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo);  in LowerBITREVERSE()
31578   Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi);  in LowerBITREVERSE()
31579   return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);  in LowerBITREVERSE()
31586   MVT VT = Op.getSimpleValueType();  in LowerPARITY()  local
31588   // Special case. If the input fits in 8-bits we can use a single 8-bit TEST.  in LowerPARITY()
31589   if (VT == MVT::i8 ||  in LowerPARITY()
31590       DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) {  in LowerPARITY()
31597     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);  in LowerPARITY()
31604   if (VT == MVT::i64) {  in LowerPARITY()
31605     // Xor the high and low 16-bits together using a 32-bit operation.  in LowerPARITY()
31613   if (VT != MVT::i16) {  in LowerPARITY()
31614     // Xor the high and low 16-bits together using a 32-bit operation.  in LowerPARITY()
31619     // If the input is 16-bits, we need to extend to use an i32 shift below.  in LowerPARITY()
31623   // Finally xor the low 2 bytes together and use a 8-bit flag setting xor.  in LowerPARITY()
31624   // This should allow an h-reg to be used to save a shift.  in LowerPARITY()
31635   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);  in LowerPARITY()
31641   switch (N->getOpcode()) {  in lowerAtomicArithWithLOCK()
31661   MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();  in lowerAtomicArithWithLOCK()
31665       {N->getOperand(0), N->getOperand(1), N->getOperand(2)},  in lowerAtomicArithWithLOCK()
31666       /*MemVT=*/N->getSimpleValueType(0), MMO);  in lowerAtomicArithWithLOCK()
31669 /// Lower atomic_load_ops into LOCK-prefixed operations.
31673   SDValue Chain = N->getOperand(0);  in lowerAtomicArith()
31674   SDValue LHS = N->getOperand(1);  in lowerAtomicArith()
31675   SDValue RHS = N->getOperand(2);  in lowerAtomicArith()
31676   unsigned Opc = N->getOpcode();  in lowerAtomicArith()
31677   MVT VT = N->getSimpleValueType(0);  in lowerAtomicArith()  local
31683   if (N->hasAnyUseOfValue(0)) {  in lowerAtomicArith()
31684     // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to  in lowerAtomicArith()
31690       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,  in lowerAtomicArith()
31691                            DAG.getNegative(RHS, DL, VT), AN->getMemOperand());  in lowerAtomicArith()
31705   if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS) && !AN->isVolatile()) {  in lowerAtomicArith()
31711     if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent &&  in lowerAtomicArith()
31712         AN->getSyncScopeID() == SyncScope::System) {  in lowerAtomicArith()
31717       assert(!N->hasAnyUseOfValue(0));  in lowerAtomicArith()
31719       return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),  in lowerAtomicArith()
31720                          DAG.getUNDEF(VT), NewChain);  in lowerAtomicArith()
31722     // MEMBARRIER is a compiler barrier; it codegens to a no-op.  in lowerAtomicArith()
31724     assert(!N->hasAnyUseOfValue(0));  in lowerAtomicArith()
31726     return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),  in lowerAtomicArith()
31727                        DAG.getUNDEF(VT), NewChain);  in lowerAtomicArith()
31732   assert(!N->hasAnyUseOfValue(0));  in lowerAtomicArith()
31734   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(),  in lowerAtomicArith()
31735                      DAG.getUNDEF(VT), LockOp.getValue(1));  in lowerAtomicArith()
31742   EVT VT = Node->getMemoryVT();  in LowerATOMIC_STORE()  local
31745       Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent;  in LowerATOMIC_STORE()
31746   bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT);  in LowerATOMIC_STORE()
31759     if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.hasAVX()) {  in LowerATOMIC_STORE()
31760       SDValue VecVal = DAG.getBitcast(MVT::v2i64, Node->getVal());  in LowerATOMIC_STORE()
31761       Chain = DAG.getStore(Node->getChain(), dl, VecVal, Node->getBasePtr(),  in LowerATOMIC_STORE()
31762                            Node->getMemOperand());  in LowerATOMIC_STORE()
31767     if (VT == MVT::i64) {  in LowerATOMIC_STORE()
31770             DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal());  in LowerATOMIC_STORE()
31774         SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()};  in LowerATOMIC_STORE()
31776                                         MVT::i64, Node->getMemOperand());  in LowerATOMIC_STORE()
31778         // First load this into an 80-bit X87 register using a stack temporary.  in LowerATOMIC_STORE()
31781         int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();  in LowerATOMIC_STORE()
31784         Chain = DAG.getStore(Node->getChain(), dl, Node->getVal(), StackPtr,  in LowerATOMIC_STORE()
31794         SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()};  in LowerATOMIC_STORE()
31797                                     StoreOps, MVT::i64, Node->getMemOperand());  in LowerATOMIC_STORE()
31811   // Convert seq_cst store -> xchg  in LowerATOMIC_STORE()
31812   // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b)  in LowerATOMIC_STORE()
31813   // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment.  in LowerATOMIC_STORE()
31814   SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(),  in LowerATOMIC_STORE()
31815                                Node->getOperand(0), Node->getOperand(2),  in LowerATOMIC_STORE()
31816                                Node->getOperand(1), Node->getMemOperand());  in LowerATOMIC_STORE()
31822   MVT VT = N->getSimpleValueType(0);  in LowerADDSUBO_CARRY()  local
31826   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))  in LowerADDSUBO_CARRY()
31829   SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in LowerADDSUBO_CARRY()
31846   if (N->getValueType(1) == MVT::i1)  in LowerADDSUBO_CARRY()
31849   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);  in LowerADDSUBO_CARRY()
31947     for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)  in ExtendToType()
31960          "MGATHER/MSCATTER are supported on AVX-512 arch only");  in LowerMSCATTER()
31963   SDValue Src = N->getValue();  in LowerMSCATTER()
31964   MVT VT = Src.getSimpleValueType();  in LowerMSCATTER()  local
31965   assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");  in LowerMSCATTER()
31968   SDValue Scale = N->getScale();  in LowerMSCATTER()
31969   SDValue Index = N->getIndex();  in LowerMSCATTER()
31970   SDValue Mask = N->getMask();  in LowerMSCATTER()
31971   SDValue Chain = N->getChain();  in LowerMSCATTER()
31972   SDValue BasePtr = N->getBasePtr();  in LowerMSCATTER()
31974   if (VT == MVT::v2f32 || VT == MVT::v2i32) {  in LowerMSCATTER()
31979       EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);  in LowerMSCATTER()
31980       Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT));  in LowerMSCATTER()
31984                                      N->getMemoryVT(), N->getMemOperand());  in LowerMSCATTER()
31996   // If we don't have VLX and neither the passthru or index is 512-bits, we  in LowerMSCATTER()
31998   if (!Subtarget.hasVLX() && !VT.is512BitVector() &&  in LowerMSCATTER()
32000     // Determine how much we need to widen by to get a 512-bit type.  in LowerMSCATTER()
32001     unsigned Factor = std::min(512/VT.getSizeInBits(),  in LowerMSCATTER()
32003     unsigned NumElts = VT.getVectorNumElements() * Factor;  in LowerMSCATTER()
32005     VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);  in LowerMSCATTER()
32009     Src = ExtendToType(Src, VT, DAG);  in LowerMSCATTER()
32017                                  N->getMemoryVT(), N->getMemOperand());  in LowerMSCATTER()
32024   MVT VT = Op.getSimpleValueType();  in LowerMLOAD()  local
32025   MVT ScalarVT = VT.getScalarType();  in LowerMLOAD()
32026   SDValue Mask = N->getMask();  in LowerMLOAD()
32028   SDValue PassThru = N->getPassThru();  in LowerMLOAD()
32038         VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,  in LowerMLOAD()
32039         getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(),  in LowerMLOAD()
32040         N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(),  in LowerMLOAD()
32041         N->isExpandingLoad());  in LowerMLOAD()
32043     SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);  in LowerMLOAD()
32047   assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&  in LowerMLOAD()
32048          "Expanding masked load is supported on AVX-512 target only!");  in LowerMLOAD()
32050   assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) &&  in LowerMLOAD()
32051          "Expanding masked load is supported for 32 and 64-bit types only!");  in LowerMLOAD()
32053   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&  in LowerMLOAD()
32063   unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();  in LowerMLOAD()
32075       WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,  in LowerMLOAD()
32076       PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),  in LowerMLOAD()
32077       N->getExtensionType(), N->isExpandingLoad());  in LowerMLOAD()
32080       DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0),  in LowerMLOAD()
32089   SDValue DataToStore = N->getValue();  in LowerMSTORE()
32090   MVT VT = DataToStore.getSimpleValueType();  in LowerMSTORE()  local
32091   MVT ScalarVT = VT.getScalarType();  in LowerMSTORE()
32092   SDValue Mask = N->getMask();  in LowerMSTORE()
32095   assert((!N->isCompressingStore() || Subtarget.hasAVX512()) &&  in LowerMSTORE()
32096          "Expanding masked load is supported on AVX-512 target only!");  in LowerMSTORE()
32098   assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) &&  in LowerMSTORE()
32099          "Expanding masked load is supported for 32 and 64-bit types only!");  in LowerMSTORE()
32101   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&  in LowerMSTORE()
32111   unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();  in LowerMSTORE()
32122   return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),  in LowerMSTORE()
32123                             N->getOffset(), Mask, N->getMemoryVT(),  in LowerMSTORE()
32124                             N->getMemOperand(), N->getAddressingMode(),  in LowerMSTORE()
32125                             N->isTruncatingStore(), N->isCompressingStore());  in LowerMSTORE()
32131          "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only");  in LowerMGATHER()
32135   MVT VT = Op.getSimpleValueType();  in LowerMGATHER()  local
32136   SDValue Index = N->getIndex();  in LowerMGATHER()
32137   SDValue Mask = N->getMask();  in LowerMGATHER()
32138   SDValue PassThru = N->getPassThru();  in LowerMGATHER()
32141   assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");  in LowerMGATHER()
32147   // If we don't have VLX and neither the passthru or index is 512-bits, we  in LowerMGATHER()
32149   MVT OrigVT = VT;  in LowerMGATHER()
32150   if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&  in LowerMGATHER()
32152     // Determine how much we need to widen by to get a 512-bit type.  in LowerMGATHER()
32153     unsigned Factor = std::min(512/VT.getSizeInBits(),  in LowerMGATHER()
32156     unsigned NumElts = VT.getVectorNumElements() * Factor;  in LowerMGATHER()
32158     VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);  in LowerMGATHER()
32162     PassThru = ExtendToType(PassThru, VT, DAG);  in LowerMGATHER()
32169     PassThru = getZeroVector(VT, Subtarget, DAG, dl);  in LowerMGATHER()
32171   SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,  in LowerMGATHER()
32172                     N->getScale() };  in LowerMGATHER()
32174       X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(),  in LowerMGATHER()
32175       N->getMemOperand());  in LowerMGATHER()
32187   unsigned SrcAS = N->getSrcAddressSpace();  in LowerADDRSPACECAST()
32189   assert(SrcAS != N->getDestAddressSpace() &&  in LowerADDRSPACECAST()
32209   // no-ops in the case of a null GC strategy (or a GC strategy which does not  in LowerGC_TRANSITION()
32214   if (Op->getGluedNode())  in LowerGC_TRANSITION()
32215     Ops.push_back(Op->getOperand(Op->getNumOperands() - 1));  in LowerGC_TRANSITION()
32224   EVT VT = Op.getValueType();  in LowerCVTPS2PH()  local
32228   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in LowerCVTPS2PH()
32232   return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in LowerCVTPS2PH()
32239   // We don't support non-data prefetch without PREFETCHI.  in LowerPREFETCH()
32259     // sub-string, e.g. "$12" contain "$1"  in getInstrStrFromOpNo()
32261       I = AsmStr.size() - OpNoStr1.size();  in getInstrStrFromOpNo()
32316   // ->  in visitMaskedLoad()
32337   // ->  in visitMaskedStore()
32352   // clang-format off  in LowerOperation()
32504   // clang-format on  in LowerOperation()
32514   switch (N->getOpcode()) {  in ReplaceNodeResults()
32518     N->dump(&DAG);  in ReplaceNodeResults()
32522     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32526     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in ReplaceNodeResults()
32529     SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in ReplaceNodeResults()
32534     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32538     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);  in ReplaceNodeResults()
32540                      {N->getOperand(0), Lo});  in ReplaceNodeResults()
32542                      {N->getOperand(0), Hi});  in ReplaceNodeResults()
32545     SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in ReplaceNodeResults()
32554     assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");  in ReplaceNodeResults()
32557     KnownBits Known = DAG.computeKnownBits(N->getOperand(0));  in ReplaceNodeResults()
32561       SDValue Op = DAG.getNode(ISD::SRL, dl, MVT::i64, N->getOperand(0),  in ReplaceNodeResults()
32575           DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));  in ReplaceNodeResults()
32577       // Bit count should fit in 32-bits, extract it as that and then zero  in ReplaceNodeResults()
32588     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32589     assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
32590            VT.getVectorElementType() == MVT::i8 && "Unexpected VT!");  in ReplaceNodeResults()
32591     // Pre-promote these to vXi16 to avoid op legalization thinking all 16  in ReplaceNodeResults()
32593     MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());  in ReplaceNodeResults()
32594     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0));  in ReplaceNodeResults()
32595     SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1));  in ReplaceNodeResults()
32597     Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in ReplaceNodeResults()
32598     unsigned NumConcats = 16 / VT.getVectorNumElements();  in ReplaceNodeResults()
32599     SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));  in ReplaceNodeResults()
32607     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32608     assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
32609            VT == MVT::v2i32 && "Unexpected VT!");  in ReplaceNodeResults()
32610     bool IsSigned = N->getOpcode() == ISD::SMULO;  in ReplaceNodeResults()
32612     SDValue Op0 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(0));  in ReplaceNodeResults()
32613     SDValue Op1 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(1));  in ReplaceNodeResults()
32615     // Extract the high 32 bits from each result using PSHUFD.  in ReplaceNodeResults()
32618     Hi = DAG.getVectorShuffle(MVT::v4i32, dl, Hi, Hi, {1, 3, -1, -1});  in ReplaceNodeResults()
32619     Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Hi,  in ReplaceNodeResults()
32623     Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in ReplaceNodeResults()
32627       // SMULO overflows if the high bits don't match the sign of the low.  in ReplaceNodeResults()
32628       HiCmp = DAG.getNode(ISD::SRA, dl, VT, Res, DAG.getConstant(31, dl, VT));  in ReplaceNodeResults()
32630       // UMULO overflows if the high bits are non-zero.  in ReplaceNodeResults()
32631       HiCmp = DAG.getConstant(0, dl, VT);  in ReplaceNodeResults()
32633     SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE);  in ReplaceNodeResults()
32637                       DAG.getUNDEF(VT));  in ReplaceNodeResults()
32646     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32647     EVT InVT = N->getOperand(0).getValueType();  in ReplaceNodeResults()
32648     assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&  in ReplaceNodeResults()
32649            "Expected a VT that divides into 128 bits.");  in ReplaceNodeResults()
32650     assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
32658                                   VT.getVectorElementType(),  in ReplaceNodeResults()
32659                                   NumConcat * VT.getVectorNumElements());  in ReplaceNodeResults()
32662     Ops[0] = N->getOperand(0);  in ReplaceNodeResults()
32664     Ops[0] = N->getOperand(1);  in ReplaceNodeResults()
32667     SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);  in ReplaceNodeResults()
32676     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32677     assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");  in ReplaceNodeResults()
32678     SDValue UNDEF = DAG.getUNDEF(VT);  in ReplaceNodeResults()
32680                               N->getOperand(0), UNDEF);  in ReplaceNodeResults()
32682                               N->getOperand(1), UNDEF);  in ReplaceNodeResults()
32683     Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));  in ReplaceNodeResults()
32690     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32691     if (VT.isVector()) {  in ReplaceNodeResults()
32692       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
32696       // TODO: Can we do something for non-splat?  in ReplaceNodeResults()
32698       if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) {  in ReplaceNodeResults()
32699         unsigned NumConcats = 128 / VT.getSizeInBits();  in ReplaceNodeResults()
32700         SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT));  in ReplaceNodeResults()
32701         Ops0[0] = N->getOperand(0);  in ReplaceNodeResults()
32702         EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT);  in ReplaceNodeResults()
32705         SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1);  in ReplaceNodeResults()
32716     MVT VT = N->getSimpleValueType(0);  in ReplaceNodeResults()  local
32717     if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)  in ReplaceNodeResults()
32723     MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();  in ReplaceNodeResults()
32724     SDValue In = N->getOperand(0);  in ReplaceNodeResults()
32727     EVT EltVT = VT.getVectorElementType();  in ReplaceNodeResults()
32728     unsigned MinElts = VT.getVectorNumElements();  in ReplaceNodeResults()
32735             matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) {  in ReplaceNodeResults()
32736       if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src,  in ReplaceNodeResults()
32749         SmallVector<int, 16> TruncMask(WidenNumElts, -1);  in ReplaceNodeResults()
32772       if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) {  in ReplaceNodeResults()
32779     if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 &&  in ReplaceNodeResults()
32791                                           -1, -1, -1, -1, -1, -1, -1, -1 });  in ReplaceNodeResults()
32814     assert(N->getValueType(0) == MVT::v8i8 &&  in ReplaceNodeResults()
32819     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32820     SDValue In = N->getOperand(0);  in ReplaceNodeResults()
32822     if (!Subtarget.hasSSE41() && VT == MVT::v4i64 &&  in ReplaceNodeResults()
32826       assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode");  in ReplaceNodeResults()
32827       // Custom split this so we can extend i8/i16->i32 invec. This is better  in ReplaceNodeResults()
32828       // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using  in ReplaceNodeResults()
32846       SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in ReplaceNodeResults()
32851     if (VT == MVT::v16i32 || VT == MVT::v8i64) {  in ReplaceNodeResults()
32863         In = DAG.getNode(N->getOpcode(), dl, InVT, In);  in ReplaceNodeResults()
32869       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));  in ReplaceNodeResults()
32870       assert(isTypeLegal(LoVT) && "Split VT not legal?");  in ReplaceNodeResults()
32872       SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG);  in ReplaceNodeResults()
32882       Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG);  in ReplaceNodeResults()
32884       SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);  in ReplaceNodeResults()
32893     bool IsStrict = N->isStrictFPOpcode();  in ReplaceNodeResults()
32894     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||  in ReplaceNodeResults()
32895                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;  in ReplaceNodeResults()
32896     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
32897     SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in ReplaceNodeResults()
32898     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();  in ReplaceNodeResults()
32903       EVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;  in ReplaceNodeResults()
32906             DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},  in ReplaceNodeResults()
32911         Res = DAG.getNode(N->getOpcode(), dl, VT,  in ReplaceNodeResults()
32921     if (VT.isVector() && Subtarget.hasFP16() &&  in ReplaceNodeResults()
32923       EVT EleVT = VT.getVectorElementType();  in ReplaceNodeResults()
32938             DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});  in ReplaceNodeResults()
32965     if (VT.isVector() && VT.getScalarSizeInBits() < 32) {  in ReplaceNodeResults()
32966       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
32970       unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U);  in ReplaceNodeResults()
32972                                        VT.getVectorNumElements());  in ReplaceNodeResults()
32977                           {N->getOperand(0), Src});  in ReplaceNodeResults()
32990                         DAG.getValueType(VT.getVectorElementType()));  in ReplaceNodeResults()
32997       Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);  in ReplaceNodeResults()
33000       unsigned NumConcats = 128 / VT.getSizeInBits();  in ReplaceNodeResults()
33001       MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),  in ReplaceNodeResults()
33002                                       VT.getVectorNumElements() * NumConcats);  in ReplaceNodeResults()
33003       SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT));  in ReplaceNodeResults()
33013     if (VT == MVT::v2i32) {  in ReplaceNodeResults()
33017       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
33037           // legalization to v8i32<-v8f64.  in ReplaceNodeResults()
33044           Opc = N->getOpcode();  in ReplaceNodeResults()
33050                             {N->getOperand(0), Src});  in ReplaceNodeResults()
33061       // Custom widen strict v2f32->v2i32 by padding with zeros.  in ReplaceNodeResults()
33066         SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other},  in ReplaceNodeResults()
33067                                   {N->getOperand(0), Src});  in ReplaceNodeResults()
33078     assert(!VT.isVector() && "Vectors should have been handled above!");  in ReplaceNodeResults()
33080     if ((Subtarget.hasDQI() && VT == MVT::i64 &&  in ReplaceNodeResults()
33085       // If we use a 128-bit result we might need to use a target specific node.  in ReplaceNodeResults()
33090       unsigned Opc = N->getOpcode();  in ReplaceNodeResults()
33105         Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res);  in ReplaceNodeResults()
33109       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);  in ReplaceNodeResults()
33116     if (VT == MVT::i128 && Subtarget.isTargetWin64()) {  in ReplaceNodeResults()
33143     bool IsStrict = N->isStrictFPOpcode();  in ReplaceNodeResults()
33144     bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||  in ReplaceNodeResults()
33145                     N->getOpcode() == ISD::STRICT_SINT_TO_FP;  in ReplaceNodeResults()
33146     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
33147     SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in ReplaceNodeResults()
33148     if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&  in ReplaceNodeResults()
33153       if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)  in ReplaceNodeResults()
33161                                   {N->getOperand(0), Src});  in ReplaceNodeResults()
33170     if (VT != MVT::v2f32)  in ReplaceNodeResults()
33178                                   {N->getOperand(0), Src});  in ReplaceNodeResults()
33203                           {N->getOperand(0), Elt});  in ReplaceNodeResults()
33220           DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1});  in ReplaceNodeResults()
33235       // Custom widen strict v2i32->v2f32 to avoid scalarization.  in ReplaceNodeResults()
33239       SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other},  in ReplaceNodeResults()
33240                                 {N->getOperand(0), Src});  in ReplaceNodeResults()
33255                                 {N->getOperand(0), Or, VBias});  in ReplaceNodeResults()
33262       // TODO: Are there any fast-math-flags to propagate here?  in ReplaceNodeResults()
33270     bool IsStrict = N->isStrictFPOpcode();  in ReplaceNodeResults()
33271     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();  in ReplaceNodeResults()
33272     SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in ReplaceNodeResults()
33273     SDValue Rnd = N->getOperand(IsStrict ? 2 : 1);  in ReplaceNodeResults()
33275     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
33277     if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {  in ReplaceNodeResults()
33282     if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) {  in ReplaceNodeResults()
33300     EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;  in ReplaceNodeResults()
33315     assert(N->getValueType(0) == MVT::v2f32 &&  in ReplaceNodeResults()
33319     bool IsStrict = N->isStrictFPOpcode();  in ReplaceNodeResults()
33320     SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in ReplaceNodeResults()
33328                       {N->getOperand(0), V});  in ReplaceNodeResults()
33337     unsigned IntNo = N->getConstantOperandVal(1);  in ReplaceNodeResults()
33365     EVT T = N->getValueType(0);  in ReplaceNodeResults()
33369            "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");  in ReplaceNodeResults()
33373         DAG.SplitScalar(N->getOperand(2), dl, HalfT, HalfT);  in ReplaceNodeResults()
33374     cpInL = DAG.getCopyToReg(N->getOperand(0), dl,  in ReplaceNodeResults()
33381         DAG.SplitScalar(N->getOperand(3), dl, HalfT, HalfT);  in ReplaceNodeResults()
33386     // In 64-bit mode we might need the base pointer in RBX, but we can't know  in ReplaceNodeResults()
33391     // live-range.  in ReplaceNodeResults()
33394     MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();  in ReplaceNodeResults()
33396       SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL,  in ReplaceNodeResults()
33403       SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1),  in ReplaceNodeResults()
33420     Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));  in ReplaceNodeResults()
33429         (N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) &&  in ReplaceNodeResults()
33430         "Unexpected VT!");  in ReplaceNodeResults()
33437       if (N->getValueType(0) == MVT::i128) {  in ReplaceNodeResults()
33439           SDValue Ld = DAG.getLoad(MVT::v2i64, dl, Node->getChain(),  in ReplaceNodeResults()
33440                                    Node->getBasePtr(), Node->getMemOperand());  in ReplaceNodeResults()
33445           Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0),  in ReplaceNodeResults()
33454         // Then extract the lower 64-bits.  in ReplaceNodeResults()
33457         SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };  in ReplaceNodeResults()
33459                                              MVT::i64, Node->getMemOperand());  in ReplaceNodeResults()
33468         // then casts to i64. This avoids a 128-bit stack temporary being  in ReplaceNodeResults()
33469         // created by type legalization if we were to cast v4f32->v2i64.  in ReplaceNodeResults()
33478         // First load this into an 80-bit X87 register. This will put the whole  in ReplaceNodeResults()
33481         SDValue Ops[] = { Node->getChain(), Node->getBasePtr() };  in ReplaceNodeResults()
33484                                                  Node->getMemOperand());  in ReplaceNodeResults()
33492         int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();  in ReplaceNodeResults()
33531     EVT DstVT = N->getValueType(0);  in ReplaceNodeResults()
33532     EVT SrcVT = N->getOperand(0).getValueType();  in ReplaceNodeResults()
33534     // If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target  in ReplaceNodeResults()
33535     // we can split using the k-register rather than memory.  in ReplaceNodeResults()
33537       assert(!Subtarget.is64Bit() && "Expected 32-bit mode");  in ReplaceNodeResults()
33554                                 N->getOperand(0));  in ReplaceNodeResults()
33563     EVT VT = N->getValueType(0);  in ReplaceNodeResults()  local
33564     if ((VT == MVT::v2f32 || VT == MVT::v2i32) &&  in ReplaceNodeResults()
33567       SDValue Index = Gather->getIndex();  in ReplaceNodeResults()
33570       assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
33572       EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);  in ReplaceNodeResults()
33573       SDValue Mask = Gather->getMask();  in ReplaceNodeResults()
33576                                      Gather->getPassThru(),  in ReplaceNodeResults()
33577                                      DAG.getUNDEF(VT));  in ReplaceNodeResults()
33585       SDValue Ops[] = { Gather->getChain(), PassThru, Mask,  in ReplaceNodeResults()
33586                         Gather->getBasePtr(), Index, Gather->getScale() };  in ReplaceNodeResults()
33589           Gather->getMemoryVT(), Gather->getMemOperand());  in ReplaceNodeResults()
33598     // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp  in ReplaceNodeResults()
33600     MVT VT = N->getSimpleValueType(0);  in ReplaceNodeResults()  local
33601     assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");  in ReplaceNodeResults()
33602     assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&  in ReplaceNodeResults()
33608       MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;  in ReplaceNodeResults()
33609       SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),  in ReplaceNodeResults()
33610                                 Ld->getPointerInfo(), Ld->getOriginalAlign(),  in ReplaceNodeResults()
33611                                 Ld->getMemOperand()->getFlags());  in ReplaceNodeResults()
33615       EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);  in ReplaceNodeResults()
33623     SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};  in ReplaceNodeResults()
33625                                           MVT::i64, Ld->getMemOperand());  in ReplaceNodeResults()
33636     assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");  in ReplaceNodeResults()
33645     assert(N->getSimpleValueType(0) == MVT::f16 &&  in ReplaceNodeResults()
33648     SDValue VecOp = N->getOperand(0);  in ReplaceNodeResults()
33650     SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0));  in ReplaceNodeResults()
33652                         N->getOperand(1));  in ReplaceNodeResults()
34081   // X86 allows a sign-extended 32-bit immediate field as a displacement.  in isLegalAddressingMode()
34097     // If lower 4G is not available, then we must use rip-relative addressing.  in isLegalAddressingMode()
34127   unsigned Bits = Ty->getScalarSizeInBits();  in isVectorShiftByScalarCheap()
34151   // These are non-commutative binops.  in isBinOp()
34187   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())  in isTruncateFree()
34189   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();  in isTruncateFree()
34190   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();  in isTruncateFree()
34195   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())  in allowTruncateForTailCall()
34201   assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");  in allowTruncateForTailCall()
34230   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.  in isZExtFree()
34231   return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit();  in isZExtFree()
34235   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.  in isZExtFree()
34256     // X86 has 8, 16, and 32-bit zero-extending loads.  in isZExtFree()
34267   FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());  in shouldSinkOperands()
34271   if (I->getOpcode() == Instruction::Mul &&  in shouldSinkOperands()
34272       VTy->getElementType()->isIntegerTy(64)) {  in shouldSinkOperands()
34273     for (auto &Op : I->operands()) {  in shouldSinkOperands()
34275       if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))  in shouldSinkOperands()
34283         Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));  in shouldSinkOperands()
34298   int ShiftAmountOpNum = -1;  in shouldSinkOperands()
34299   if (I->isShift())  in shouldSinkOperands()
34302     if (II->getIntrinsicID() == Intrinsic::fshl ||  in shouldSinkOperands()
34303         II->getIntrinsicID() == Intrinsic::fshr)  in shouldSinkOperands()
34307   if (ShiftAmountOpNum == -1)  in shouldSinkOperands()
34310   auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));  in shouldSinkOperands()
34311   if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&  in shouldSinkOperands()
34312       isVectorShiftByScalarCheap(I->getType())) {  in shouldSinkOperands()
34313     Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));  in shouldSinkOperands()
34340                                                    EVT VT) const {  in isFMAFasterThanFMulAndFAdd()
34344   VT = VT.getScalarType();  in isFMAFasterThanFMulAndFAdd()
34346   if (!VT.isSimple())  in isFMAFasterThanFMulAndFAdd()
34349   switch (VT.getSimpleVT().SimpleTy) {  in isFMAFasterThanFMulAndFAdd()
34368                                                              EVT VT) const {  in shouldFoldSelectWithIdentityConstant()
34369   // TODO: This is too general. There are cases where pre-AVX512 codegen would  in shouldFoldSelectWithIdentityConstant()
34373   if (!Subtarget.hasVLX() && !VT.is512BitVector())  in shouldFoldSelectWithIdentityConstant()
34375   if (!VT.isVector() || VT.getScalarType() == MVT::i1)  in shouldFoldSelectWithIdentityConstant()
34385 bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const {  in isShuffleMaskLegal()
34386   if (!VT.isSimple())  in isShuffleMaskLegal()
34390   if (VT.getSimpleVT().getScalarType() == MVT::i1)  in isShuffleMaskLegal()
34393   // Very little shuffling can be done for 64-bit vectors right now.  in isShuffleMaskLegal()
34394   if (VT.getSimpleVT().getSizeInBits() == 64)  in isShuffleMaskLegal()
34399   return isTypeLegal(VT.getSimpleVT());  in isShuffleMaskLegal()
34403                                                EVT VT) const {  in isVectorClearMaskLegal()
34405   // vpblendw and vpshufb for 256-bit vectors are not available on AVX1.  in isVectorClearMaskLegal()
34407     if (VT == MVT::v32i8 || VT == MVT::v16i16)  in isVectorClearMaskLegal()
34411   return isShuffleMaskLegal(Mask, VT);  in isVectorClearMaskLegal()
34426   // zero-extensions.  in getPreferredSwitchConditionType()
34433 //===----------------------------------------------------------------------===//
34435 //===----------------------------------------------------------------------===//
34442   for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) {  in isEFLAGSLiveAfter()
34452   for (MachineBasicBlock *Succ : BB->successors())  in isEFLAGSLiveAfter()
34453     if (Succ->isLiveIn(X86::EFLAGS))  in isEFLAGSLiveAfter()
34464   const BasicBlock *BB = MBB->getBasicBlock();  in emitXBegin()
34465   MachineFunction::iterator I = ++MBB->getIterator();  in emitXBegin()
34473   //  s0 = -1  in emitXBegin()
34483   MachineFunction *MF = MBB->getParent();  in emitXBegin()
34484   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);  in emitXBegin()
34485   MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);  in emitXBegin()
34486   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);  in emitXBegin()
34487   MF->insert(I, mainMBB);  in emitXBegin()
34488   MF->insert(I, fallMBB);  in emitXBegin()
34489   MF->insert(I, sinkMBB);  in emitXBegin()
34492     mainMBB->addLiveIn(X86::EFLAGS);  in emitXBegin()
34493     fallMBB->addLiveIn(X86::EFLAGS);  in emitXBegin()
34494     sinkMBB->addLiveIn(X86::EFLAGS);  in emitXBegin()
34498   sinkMBB->splice(sinkMBB->begin(), MBB,  in emitXBegin()
34499                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());  in emitXBegin()
34500   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);  in emitXBegin()
34502   MachineRegisterInfo &MRI = MF->getRegInfo();  in emitXBegin()
34512   BuildMI(thisMBB, MIMD, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);  in emitXBegin()
34513   thisMBB->addSuccessor(mainMBB);  in emitXBegin()
34514   thisMBB->addSuccessor(fallMBB);  in emitXBegin()
34517   //  mainDstReg := -1  in emitXBegin()
34518   BuildMI(mainMBB, MIMD, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);  in emitXBegin()
34519   BuildMI(mainMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);  in emitXBegin()
34520   mainMBB->addSuccessor(sinkMBB);  in emitXBegin()
34526   BuildMI(fallMBB, MIMD, TII->get(X86::XABORT_DEF));  in emitXBegin()
34527   BuildMI(fallMBB, MIMD, TII->get(TargetOpcode::COPY), fallDstReg)  in emitXBegin()
34529   fallMBB->addSuccessor(sinkMBB);  in emitXBegin()
34533   BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)  in emitXBegin()
34544   // Emit va_arg instruction on X86-64.  in EmitVAARGWithCustomInserter()
34546   // Operands to this pseudo-instruction:  in EmitVAARGWithCustomInserter()
34548   // 1-5) Input         : va_list address (addr, i64mem)  in EmitVAARGWithCustomInserter()
34552   // 9  ) EFLAGS (implicit-def)  in EmitVAARGWithCustomInserter()
34567   MachineFunction *MF = MBB->getParent();  in EmitVAARGWithCustomInserter()
34575   MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand(  in EmitVAARGWithCustomInserter()
34576       OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore);  in EmitVAARGWithCustomInserter()
34577   MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand(  in EmitVAARGWithCustomInserter()
34578       OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad);  in EmitVAARGWithCustomInserter()
34582   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();  in EmitVAARGWithCustomInserter()
34584       getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout()));  in EmitVAARGWithCustomInserter()
34643     const BasicBlock *LLVM_BB = MBB->getBasicBlock();  in EmitVAARGWithCustomInserter()
34644     overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitVAARGWithCustomInserter()
34645     offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitVAARGWithCustomInserter()
34646     endMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitVAARGWithCustomInserter()
34648     MachineFunction::iterator MBBIter = ++MBB->getIterator();  in EmitVAARGWithCustomInserter()
34651     MF->insert(MBBIter, offsetMBB);  in EmitVAARGWithCustomInserter()
34652     MF->insert(MBBIter, overflowMBB);  in EmitVAARGWithCustomInserter()
34653     MF->insert(MBBIter, endMBB);  in EmitVAARGWithCustomInserter()
34656     endMBB->splice(endMBB->begin(), thisMBB,  in EmitVAARGWithCustomInserter()
34657                    std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());  in EmitVAARGWithCustomInserter()
34658     endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);  in EmitVAARGWithCustomInserter()
34661     thisMBB->addSuccessor(offsetMBB);  in EmitVAARGWithCustomInserter()
34662     thisMBB->addSuccessor(overflowMBB);  in EmitVAARGWithCustomInserter()
34665     offsetMBB->addSuccessor(endMBB);  in EmitVAARGWithCustomInserter()
34666     overflowMBB->addSuccessor(endMBB);  in EmitVAARGWithCustomInserter()
34670     BuildMI(thisMBB, MIMD, TII->get(X86::MOV32rm), OffsetReg)  in EmitVAARGWithCustomInserter()
34679     BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri))  in EmitVAARGWithCustomInserter()
34681       .addImm(MaxOffset + 8 - ArgSizeA8);  in EmitVAARGWithCustomInserter()
34685     BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1))  in EmitVAARGWithCustomInserter()
34697         TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),  in EmitVAARGWithCustomInserter()
34707       // Zero-extend the offset  in EmitVAARGWithCustomInserter()
34709       BuildMI(offsetMBB, MIMD, TII->get(X86::SUBREG_TO_REG), OffsetReg64)  in EmitVAARGWithCustomInserter()
34715       BuildMI(offsetMBB, MIMD, TII->get(X86::ADD64rr), OffsetDestReg)  in EmitVAARGWithCustomInserter()
34720       BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32rr), OffsetDestReg)  in EmitVAARGWithCustomInserter()
34727     BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg)  in EmitVAARGWithCustomInserter()
34732     BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr))  in EmitVAARGWithCustomInserter()
34742     BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1))  in EmitVAARGWithCustomInserter()
34753           TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),  in EmitVAARGWithCustomInserter()
34768     // aligned_addr = (addr + (align-1)) & ~(align-1)  in EmitVAARGWithCustomInserter()
34771         TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),  in EmitVAARGWithCustomInserter()
34774         .addImm(Alignment.value() - 1);  in EmitVAARGWithCustomInserter()
34778         TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri),  in EmitVAARGWithCustomInserter()
34781         .addImm(~(uint64_t)(Alignment.value() - 1));  in EmitVAARGWithCustomInserter()
34783     BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg)  in EmitVAARGWithCustomInserter()
34788   // (the overflow address should be kept 8-byte aligned)  in EmitVAARGWithCustomInserter()
34792       TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),  in EmitVAARGWithCustomInserter()
34799           TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr))  in EmitVAARGWithCustomInserter()
34810     BuildMI(*endMBB, endMBB->begin(), MIMD,  in EmitVAARGWithCustomInserter()
34811             TII->get(X86::PHI), DestReg)  in EmitVAARGWithCustomInserter()
34835   SelectItr->addRegisterKilled(X86::EFLAGS, TRI);  in checkAndUpdateEFLAGSKill()
34839 // Return true if it is OK for this CMOV pseudo-opcode to be cascaded
34840 // together with other CMOV pseudo-opcodes into a single basic-block with
34885   MachineFunction *MF = TrueMBB->getParent();  in createPHIsForCMOVsInSinkBB()
34886   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();  in createPHIsForCMOVsInSinkBB()
34889   X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm());  in createPHIsForCMOVsInSinkBB()
34892   MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();  in createPHIsForCMOVsInSinkBB()
34904     Register DestReg = MIIt->getOperand(0).getReg();  in createPHIsForCMOVsInSinkBB()
34905     Register Op1Reg = MIIt->getOperand(1).getReg();  in createPHIsForCMOVsInSinkBB()
34906     Register Op2Reg = MIIt->getOperand(2).getReg();  in createPHIsForCMOVsInSinkBB()
34911     if (MIIt->getOperand(3).getImm() == OppCC)  in createPHIsForCMOVsInSinkBB()
34921         BuildMI(*SinkMBB, SinkInsertionPoint, MIMD, TII->get(X86::PHI), DestReg)  in createPHIsForCMOVsInSinkBB()
34966   // because this custom-inserter would have generated:  in EmitLoweredCascadedSelect()
35013   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();  in EmitLoweredCascadedSelect()
35014   MachineFunction *F = ThisMBB->getParent();  in EmitLoweredCascadedSelect()
35015   MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredCascadedSelect()
35016   MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredCascadedSelect()
35017   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredCascadedSelect()
35019   MachineFunction::iterator It = ++ThisMBB->getIterator();  in EmitLoweredCascadedSelect()
35020   F->insert(It, FirstInsertedMBB);  in EmitLoweredCascadedSelect()
35021   F->insert(It, SecondInsertedMBB);  in EmitLoweredCascadedSelect()
35022   F->insert(It, SinkMBB);  in EmitLoweredCascadedSelect()
35027   FirstInsertedMBB->addLiveIn(X86::EFLAGS);  in EmitLoweredCascadedSelect()
35034     SecondInsertedMBB->addLiveIn(X86::EFLAGS);  in EmitLoweredCascadedSelect()
35035     SinkMBB->addLiveIn(X86::EFLAGS);  in EmitLoweredCascadedSelect()
35039   SinkMBB->splice(SinkMBB->begin(), ThisMBB,  in EmitLoweredCascadedSelect()
35041                   ThisMBB->end());  in EmitLoweredCascadedSelect()
35042   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);  in EmitLoweredCascadedSelect()
35045   ThisMBB->addSuccessor(FirstInsertedMBB);  in EmitLoweredCascadedSelect()
35047   ThisMBB->addSuccessor(SinkMBB);  in EmitLoweredCascadedSelect()
35049   FirstInsertedMBB->addSuccessor(SecondInsertedMBB);  in EmitLoweredCascadedSelect()
35051   FirstInsertedMBB->addSuccessor(SinkMBB);  in EmitLoweredCascadedSelect()
35053   SecondInsertedMBB->addSuccessor(SinkMBB);  in EmitLoweredCascadedSelect()
35057   BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);  in EmitLoweredCascadedSelect()
35061   BuildMI(FirstInsertedMBB, MIMD, TII->get(X86::JCC_1))  in EmitLoweredCascadedSelect()
35071       BuildMI(*SinkMBB, SinkMBB->begin(), MIMD, TII->get(X86::PHI), DestReg)  in EmitLoweredCascadedSelect()
35095   // diamond control-flow pattern.  The incoming instruction knows the  in EmitLoweredSelect()
35104   //   fallthrough --> FalseMBB  in EmitLoweredSelect()
35106   // This code lowers all pseudo-CMOV instructions. Generally it lowers these  in EmitLoweredSelect()
35139   // function - EmitLoweredCascadedSelect.  in EmitLoweredSelect()
35153     while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) &&  in EmitLoweredSelect()
35154            (NextMIIt->getOperand(3).getImm() == CC ||  in EmitLoweredSelect()
35155             NextMIIt->getOperand(3).getImm() == OppCC)) {  in EmitLoweredSelect()
35157       NextMIIt = next_nodbg(NextMIIt, ThisMBB->end());  in EmitLoweredSelect()
35163   if (LastCMOV == &MI && NextMIIt != ThisMBB->end() &&  in EmitLoweredSelect()
35164       NextMIIt->getOpcode() == MI.getOpcode() &&  in EmitLoweredSelect()
35165       NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() &&  in EmitLoweredSelect()
35166       NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() &&  in EmitLoweredSelect()
35167       NextMIIt->getOperand(1).isKill()) {  in EmitLoweredSelect()
35171   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();  in EmitLoweredSelect()
35172   MachineFunction *F = ThisMBB->getParent();  in EmitLoweredSelect()
35173   MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredSelect()
35174   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredSelect()
35176   MachineFunction::iterator It = ++ThisMBB->getIterator();  in EmitLoweredSelect()
35177   F->insert(It, FalseMBB);  in EmitLoweredSelect()
35178   F->insert(It, SinkMBB);  in EmitLoweredSelect()
35181   unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);  in EmitLoweredSelect()
35182   FalseMBB->setCallFrameSize(CallFrameSize);  in EmitLoweredSelect()
35183   SinkMBB->setCallFrameSize(CallFrameSize);  in EmitLoweredSelect()
35188   if (!LastCMOV->killsRegister(X86::EFLAGS, /*TRI=*/nullptr) &&  in EmitLoweredSelect()
35190     FalseMBB->addLiveIn(X86::EFLAGS);  in EmitLoweredSelect()
35191     SinkMBB->addLiveIn(X86::EFLAGS);  in EmitLoweredSelect()
35199       SinkMBB->push_back(MI.removeFromParent());  in EmitLoweredSelect()
35202   SinkMBB->splice(SinkMBB->end(), ThisMBB,  in EmitLoweredSelect()
35204                   ThisMBB->end());  in EmitLoweredSelect()
35205   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);  in EmitLoweredSelect()
35208   ThisMBB->addSuccessor(FalseMBB);  in EmitLoweredSelect()
35210   ThisMBB->addSuccessor(SinkMBB);  in EmitLoweredSelect()
35212   FalseMBB->addSuccessor(SinkMBB);  in EmitLoweredSelect()
35215   BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);  in EmitLoweredSelect()
35226   ThisMBB->erase(MIItBegin, MIItEnd);  in EmitLoweredSelect()
35241   MachineFunction *MF = MBB->getParent();  in EmitLoweredProbedAlloca()
35245   const BasicBlock *LLVM_BB = MBB->getBasicBlock();  in EmitLoweredProbedAlloca()
35249   MachineRegisterInfo &MRI = MF->getRegInfo();  in EmitLoweredProbedAlloca()
35250   MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredProbedAlloca()
35251   MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredProbedAlloca()
35252   MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredProbedAlloca()
35254   MachineFunction::iterator MBBIter = ++MBB->getIterator();  in EmitLoweredProbedAlloca()
35255   MF->insert(MBBIter, testMBB);  in EmitLoweredProbedAlloca()
35256   MF->insert(MBBIter, blockMBB);  in EmitLoweredProbedAlloca()
35257   MF->insert(MBBIter, tailMBB);  in EmitLoweredProbedAlloca()
35268   BuildMI(*MBB, {MI}, MIMD, TII->get(TargetOpcode::COPY), TmpStackPtr)  in EmitLoweredProbedAlloca()
35272     BuildMI(*MBB, {MI}, MIMD, TII->get(Opc), FinalStackPtr)  in EmitLoweredProbedAlloca()
35280           TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))  in EmitLoweredProbedAlloca()
35284   BuildMI(testMBB, MIMD, TII->get(X86::JCC_1))  in EmitLoweredProbedAlloca()
35287   testMBB->addSuccessor(blockMBB);  in EmitLoweredProbedAlloca()
35288   testMBB->addSuccessor(tailMBB);  in EmitLoweredProbedAlloca()
35294   //       + ---- <- ------------ <- ------------- <- ------------ +  in EmitLoweredProbedAlloca()
35296 …// [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] …  in EmitLoweredProbedAlloca()
35298 …                                           + <- ----------- <- ------------ <- ----------- <- ----…  in EmitLoweredProbedAlloca()
35304   addRegOffset(BuildMI(blockMBB, MIMD, TII->get(XORMIOpc)), physSPReg, false, 0)  in EmitLoweredProbedAlloca()
35307   BuildMI(blockMBB, MIMD, TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)),  in EmitLoweredProbedAlloca()
35312   BuildMI(blockMBB, MIMD, TII->get(X86::JMP_1)).addMBB(testMBB);  in EmitLoweredProbedAlloca()
35313   blockMBB->addSuccessor(testMBB);  in EmitLoweredProbedAlloca()
35316   BuildMI(tailMBB, MIMD, TII->get(TargetOpcode::COPY),  in EmitLoweredProbedAlloca()
35320   tailMBB->splice(tailMBB->end(), MBB,  in EmitLoweredProbedAlloca()
35321                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());  in EmitLoweredProbedAlloca()
35322   tailMBB->transferSuccessorsAndUpdatePHIs(MBB);  in EmitLoweredProbedAlloca()
35323   MBB->addSuccessor(testMBB);  in EmitLoweredProbedAlloca()
35335   MachineFunction *MF = BB->getParent();  in EmitLoweredSegAlloca()
35338   const BasicBlock *LLVM_BB = BB->getBasicBlock();  in EmitLoweredSegAlloca()
35340   assert(MF->shouldSplitStack());  in EmitLoweredSegAlloca()
35364   MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredSegAlloca()
35365   MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredSegAlloca()
35366   MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);  in EmitLoweredSegAlloca()
35368   MachineRegisterInfo &MRI = MF->getRegInfo();  in EmitLoweredSegAlloca()
35370       getRegClassFor(getPointerTy(MF->getDataLayout()));  in EmitLoweredSegAlloca()
35380   MachineFunction::iterator MBBIter = ++BB->getIterator();  in EmitLoweredSegAlloca()
35382   MF->insert(MBBIter, bumpMBB);  in EmitLoweredSegAlloca()
35383   MF->insert(MBBIter, mallocMBB);  in EmitLoweredSegAlloca()
35384   MF->insert(MBBIter, continueMBB);  in EmitLoweredSegAlloca()
35386   continueMBB->splice(continueMBB->begin(), BB,  in EmitLoweredSegAlloca()
35387                       std::next(MachineBasicBlock::iterator(MI)), BB->end());  in EmitLoweredSegAlloca()
35388   continueMBB->transferSuccessorsAndUpdatePHIs(BB);  in EmitLoweredSegAlloca()
35392   BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);  in EmitLoweredSegAlloca()
35393   BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)  in EmitLoweredSegAlloca()
35395   BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))  in EmitLoweredSegAlloca()
35398   BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);  in EmitLoweredSegAlloca()
35402   BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg)  in EmitLoweredSegAlloca()
35404   BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)  in EmitLoweredSegAlloca()
35406   BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);  in EmitLoweredSegAlloca()
35410       Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);  in EmitLoweredSegAlloca()
35412     BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)  in EmitLoweredSegAlloca()
35414     BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))  in EmitLoweredSegAlloca()
35420     BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)  in EmitLoweredSegAlloca()
35422     BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))  in EmitLoweredSegAlloca()
35428     BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)  in EmitLoweredSegAlloca()
35430     BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);  in EmitLoweredSegAlloca()
35431     BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))  in EmitLoweredSegAlloca()
35438     BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)  in EmitLoweredSegAlloca()
35441   BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)  in EmitLoweredSegAlloca()
35443   BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);  in EmitLoweredSegAlloca()
35446   BB->addSuccessor(bumpMBB);  in EmitLoweredSegAlloca()
35447   BB->addSuccessor(mallocMBB);  in EmitLoweredSegAlloca()
35448   mallocMBB->addSuccessor(continueMBB);  in EmitLoweredSegAlloca()
35449   bumpMBB->addSuccessor(continueMBB);  in EmitLoweredSegAlloca()
35452   BuildMI(*continueMBB, continueMBB->begin(), MIMD, TII->get(X86::PHI),  in EmitLoweredSegAlloca()
35469   MachineFunction *MF = BB->getParent();  in EmitLoweredCatchRet()
35475              classifyEHPersonality(MF->getFunction().getPersonalityFn())) &&  in EmitLoweredCatchRet()
35478   // Only 32-bit EH needs to worry about manually restoring stack pointers.  in EmitLoweredCatchRet()
35485       MF->CreateMachineBasicBlock(BB->getBasicBlock());  in EmitLoweredCatchRet()
35486   assert(BB->succ_size() == 1);  in EmitLoweredCatchRet()
35487   MF->insert(std::next(BB->getIterator()), RestoreMBB);  in EmitLoweredCatchRet()
35488   RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);  in EmitLoweredCatchRet()
35489   BB->addSuccessor(RestoreMBB);  in EmitLoweredCatchRet()
35494   RestoreMBB->setIsEHPad(true);  in EmitLoweredCatchRet()
35496   auto RestoreMBBI = RestoreMBB->begin();  in EmitLoweredCatchRet()
35505   // adjust_stackdown -> TLSADDR -> adjust_stackup.  in EmitLoweredTLSAddr()
35507   // inside MC, therefore without the two markers shrink-wrapping  in EmitLoweredTLSAddr()
35511   MachineFunction &MF = *BB->getParent();  in EmitLoweredTLSAddr()
35518   BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);  in EmitLoweredTLSAddr()
35526   BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);  in EmitLoweredTLSAddr()
35535   // our load from the relocation, sticking it in either RDI (x86-64)  in EmitLoweredTLSCall()
35538   MachineFunction *F = BB->getParent();  in EmitLoweredTLSCall()
35546   // FIXME: The 32-bit calls have non-standard calling conventions. Use a  in EmitLoweredTLSCall()
35550       Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() :  in EmitLoweredTLSCall()
35551       Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);  in EmitLoweredTLSCall()
35554         BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)  in EmitLoweredTLSCall()
35561     MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m));  in EmitLoweredTLSCall()
35566         BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)  in EmitLoweredTLSCall()
35573     MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));  in EmitLoweredTLSCall()
35578         BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)  in EmitLoweredTLSCall()
35579             .addReg(TII->getGlobalBaseReg(F))  in EmitLoweredTLSCall()
35585     MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));  in EmitLoweredTLSCall()
35614     // aliases and are doing non-trivial configuration of the thunk's body. For  in getIndirectThunkSymbol()
35615     // example, the Linux kernel will do boot-time hot patching of the thunk  in getIndirectThunkSymbol()
35621     // LLVM will generate calls to specific thunks, we merely make a best-effort  in getIndirectThunkSymbol()
35626       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35629       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35632       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35635       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35638       assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");  in getIndirectThunkSymbol()
35646     // When targeting an internal COMDAT thunk use an LLVM-specific name.  in getIndirectThunkSymbol()
35649       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35652       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35655       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35658       assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");  in getIndirectThunkSymbol()
35661       assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");  in getIndirectThunkSymbol()
35668     assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");  in getIndirectThunkSymbol()
35684   // Find an available scratch register to hold the callee. On 64-bit, we can  in EmitLoweredIndirectThunk()
35686   // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't  in EmitLoweredIndirectThunk()
35704   // Choose the first remaining non-zero available register.  in EmitLoweredIndirectThunk()
35718   BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), AvailableReg)  in EmitLoweredIndirectThunk()
35721   MI.setDesc(TII->get(Opc));  in EmitLoweredIndirectThunk()
35722   MachineInstrBuilder(*BB->getParent(), &MI)  in EmitLoweredIndirectThunk()
35742   MachineFunction *MF = MBB->getParent();  in emitSetJmpShadowStackFix()
35744   MachineRegisterInfo &MRI = MF->getRegInfo();  in emitSetJmpShadowStackFix()
35752   MVT PVT = getPointerTy(MF->getDataLayout());  in emitSetJmpShadowStackFix()
35756   BuildMI(*MBB, MI, MIMD, TII->get(XorRROpc))  in emitSetJmpShadowStackFix()
35764   BuildMI(*MBB, MI, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);  in emitSetJmpShadowStackFix()
35768   MIB = BuildMI(*MBB, MI, MIMD, TII->get(PtrStoreOpc));  in emitSetJmpShadowStackFix()
35785   MachineFunction *MF = MBB->getParent();  in emitEHSjLjSetJmp()
35788   MachineRegisterInfo &MRI = MF->getRegInfo();  in emitEHSjLjSetJmp()
35790   const BasicBlock *BB = MBB->getBasicBlock();  in emitEHSjLjSetJmp()
35791   MachineFunction::iterator I = ++MBB->getIterator();  in emitEHSjLjSetJmp()
35804   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");  in emitEHSjLjSetJmp()
35811   MVT PVT = getPointerTy(MF->getDataLayout());  in emitEHSjLjSetJmp()
35818   //  buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB  in emitEHSjLjSetJmp()
35832   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);  in emitEHSjLjSetJmp()
35833   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);  in emitEHSjLjSetJmp()
35834   MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);  in emitEHSjLjSetJmp()
35835   MF->insert(I, mainMBB);  in emitEHSjLjSetJmp()
35836   MF->insert(I, sinkMBB);  in emitEHSjLjSetJmp()
35837   MF->push_back(restoreMBB);  in emitEHSjLjSetJmp()
35838   restoreMBB->setMachineBlockAddressTaken();  in emitEHSjLjSetJmp()
35843   sinkMBB->splice(sinkMBB->begin(), MBB,  in emitEHSjLjSetJmp()
35844                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());  in emitEHSjLjSetJmp()
35845   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);  in emitEHSjLjSetJmp()
35851   bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&  in emitEHSjLjSetJmp()
35860       MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)  in emitEHSjLjSetJmp()
35868       MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)  in emitEHSjLjSetJmp()
35869               .addReg(XII->getGlobalBaseReg(MF))  in emitEHSjLjSetJmp()
35878   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc));  in emitEHSjLjSetJmp()
35891   if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {  in emitEHSjLjSetJmp()
35896   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))  in emitEHSjLjSetJmp()
35900   MIB.addRegMask(RegInfo->getNoPreservedMask());  in emitEHSjLjSetJmp()
35901   thisMBB->addSuccessor(mainMBB);  in emitEHSjLjSetJmp()
35902   thisMBB->addSuccessor(restoreMBB);  in emitEHSjLjSetJmp()
35906   BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg);  in emitEHSjLjSetJmp()
35907   mainMBB->addSuccessor(sinkMBB);  in emitEHSjLjSetJmp()
35910   BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)  in emitEHSjLjSetJmp()
35917   if (RegInfo->hasBasePointer(*MF)) {  in emitEHSjLjSetJmp()
35920     X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();  in emitEHSjLjSetJmp()
35921     X86FI->setRestoreBasePointer(MF);  in emitEHSjLjSetJmp()
35922     Register FramePtr = RegInfo->getFrameRegister(*MF);  in emitEHSjLjSetJmp()
35923     Register BasePtr = RegInfo->getBaseRegister();  in emitEHSjLjSetJmp()
35925     addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),  in emitEHSjLjSetJmp()
35926                  FramePtr, true, X86FI->getRestoreBasePointerOffset())  in emitEHSjLjSetJmp()
35929   BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);  in emitEHSjLjSetJmp()
35930   BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);  in emitEHSjLjSetJmp()
35931   restoreMBB->addSuccessor(sinkMBB);  in emitEHSjLjSetJmp()
35946   MachineFunction *MF = MBB->getParent();  in emitLongJmpShadowStackFix()
35948   MachineRegisterInfo &MRI = MF->getRegInfo();  in emitLongJmpShadowStackFix()
35954   MVT PVT = getPointerTy(MF->getDataLayout());  in emitLongJmpShadowStackFix()
35981   MachineFunction::iterator I = ++MBB->getIterator();  in emitLongJmpShadowStackFix()
35982   const BasicBlock *BB = MBB->getBasicBlock();  in emitLongJmpShadowStackFix()
35984   MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35985   MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35986   MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35987   MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35988   MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35989   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);  in emitLongJmpShadowStackFix()
35990   MF->insert(I, checkSspMBB);  in emitLongJmpShadowStackFix()
35991   MF->insert(I, fallMBB);  in emitLongJmpShadowStackFix()
35992   MF->insert(I, fixShadowMBB);  in emitLongJmpShadowStackFix()
35993   MF->insert(I, fixShadowLoopPrepareMBB);  in emitLongJmpShadowStackFix()
35994   MF->insert(I, fixShadowLoopMBB);  in emitLongJmpShadowStackFix()
35995   MF->insert(I, sinkMBB);  in emitLongJmpShadowStackFix()
35998   sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI),  in emitLongJmpShadowStackFix()
35999                   MBB->end());  in emitLongJmpShadowStackFix()
36000   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);  in emitLongJmpShadowStackFix()
36002   MBB->addSuccessor(checkSspMBB);  in emitLongJmpShadowStackFix()
36006   BuildMI(checkSspMBB, MIMD, TII->get(X86::MOV32r0), ZReg);  in emitLongJmpShadowStackFix()
36010     BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg)  in emitLongJmpShadowStackFix()
36020   BuildMI(checkSspMBB, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);  in emitLongJmpShadowStackFix()
36025   BuildMI(checkSspMBB, MIMD, TII->get(TestRROpc))  in emitLongJmpShadowStackFix()
36028   BuildMI(checkSspMBB, MIMD, TII->get(X86::JCC_1))  in emitLongJmpShadowStackFix()
36031   checkSspMBB->addSuccessor(sinkMBB);  in emitLongJmpShadowStackFix()
36032   checkSspMBB->addSuccessor(fallMBB);  in emitLongJmpShadowStackFix()
36039       BuildMI(fallMBB, MIMD, TII->get(PtrLoadOpc), PrevSSPReg);  in emitLongJmpShadowStackFix()
36055   BuildMI(fallMBB, MIMD, TII->get(SubRROpc), SspSubReg)  in emitLongJmpShadowStackFix()
36060   BuildMI(fallMBB, MIMD, TII->get(X86::JCC_1))  in emitLongJmpShadowStackFix()
36063   fallMBB->addSuccessor(sinkMBB);  in emitLongJmpShadowStackFix()
36064   fallMBB->addSuccessor(fixShadowMBB);  in emitLongJmpShadowStackFix()
36070   BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspFirstShrReg)  in emitLongJmpShadowStackFix()
36076   BuildMI(fixShadowMBB, MIMD, TII->get(IncsspOpc)).addReg(SspFirstShrReg);  in emitLongJmpShadowStackFix()
36080   BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspSecondShrReg)  in emitLongJmpShadowStackFix()
36085   BuildMI(fixShadowMBB, MIMD, TII->get(X86::JCC_1))  in emitLongJmpShadowStackFix()
36088   fixShadowMBB->addSuccessor(sinkMBB);  in emitLongJmpShadowStackFix()
36089   fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);  in emitLongJmpShadowStackFix()
36094   BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(ShlR1Opc), SspAfterShlReg)  in emitLongJmpShadowStackFix()
36101   BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(MovRIOpc), Value128InReg)  in emitLongJmpShadowStackFix()
36103   fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);  in emitLongJmpShadowStackFix()
36109   BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::PHI), CounterReg)  in emitLongJmpShadowStackFix()
36116   BuildMI(fixShadowLoopMBB, MIMD, TII->get(IncsspOpc)).addReg(Value128InReg);  in emitLongJmpShadowStackFix()
36120   BuildMI(fixShadowLoopMBB, MIMD, TII->get(DecROpc), DecReg).addReg(CounterReg);  in emitLongJmpShadowStackFix()
36123   BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::JCC_1))  in emitLongJmpShadowStackFix()
36126   fixShadowLoopMBB->addSuccessor(sinkMBB);  in emitLongJmpShadowStackFix()
36127   fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);  in emitLongJmpShadowStackFix()
36136   MachineFunction *MF = MBB->getParent();  in emitEHSjLjLongJmp()
36138   MachineRegisterInfo &MRI = MF->getRegInfo();  in emitEHSjLjLongJmp()
36144   MVT PVT = getPointerTy(MF->getDataLayout());  in emitEHSjLjLongJmp()
36154   Register SP = RegInfo->getStackRegister();  in emitEHSjLjLongJmp()
36167   if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) {  in emitEHSjLjLongJmp()
36172   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), FP);  in emitEHSjLjLongJmp()
36184   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), Tmp);  in emitEHSjLjLongJmp()
36198   MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), SP);  in emitEHSjLjLongJmp()
36209   BuildMI(*thisMBB, MI, MIMD, TII->get(IJmpOpc)).addReg(Tmp);  in emitEHSjLjLongJmp()
36220   MachineFunction *MF = MBB->getParent();  in SetupEntryBlockForSjLj()
36221   MachineRegisterInfo *MRI = &MF->getRegInfo();  in SetupEntryBlockForSjLj()
36224   MVT PVT = getPointerTy(MF->getDataLayout());  in SetupEntryBlockForSjLj()
36230   bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&  in SetupEntryBlockForSjLj()
36238     VR = MRI->createVirtualRegister(TRC);  in SetupEntryBlockForSjLj()
36242       BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA64r), VR)  in SetupEntryBlockForSjLj()
36249       BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA32r), VR)  in SetupEntryBlockForSjLj()
36250           .addReg(0) /* TII->getGlobalBaseReg(MF) */  in SetupEntryBlockForSjLj()
36257   MachineInstrBuilder MIB = BuildMI(*MBB, MI, MIMD, TII->get(Op));  in SetupEntryBlockForSjLj()
36269   MachineFunction *MF = BB->getParent();  in EmitSjLjDispatchBlock()
36270   MachineRegisterInfo *MRI = &MF->getRegInfo();  in EmitSjLjDispatchBlock()
36272   int FI = MF->getFrameInfo().getFunctionContextIndex();  in EmitSjLjDispatchBlock()
36292     if (!MF->hasCallSiteLandingPad(Sym))  in EmitSjLjDispatchBlock()
36295     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {  in EmitSjLjDispatchBlock()
36309       InvokeBBs.insert(LP->pred_begin(), LP->pred_end());  in EmitSjLjDispatchBlock()
36319   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();  in EmitSjLjDispatchBlock()
36320   DispatchBB->setIsEHPad(true);  in EmitSjLjDispatchBlock()
36322   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();  in EmitSjLjDispatchBlock()
36323   BuildMI(TrapBB, MIMD, TII->get(X86::TRAP));  in EmitSjLjDispatchBlock()
36324   DispatchBB->addSuccessor(TrapBB);  in EmitSjLjDispatchBlock()
36326   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();  in EmitSjLjDispatchBlock()
36327   DispatchBB->addSuccessor(DispContBB);  in EmitSjLjDispatchBlock()
36330   MF->push_back(DispatchBB);  in EmitSjLjDispatchBlock()
36331   MF->push_back(DispContBB);  in EmitSjLjDispatchBlock()
36332   MF->push_back(TrapBB);  in EmitSjLjDispatchBlock()
36340   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);  in EmitSjLjDispatchBlock()
36341   unsigned MJTI = JTI->createJumpTableIndex(LPadList);  in EmitSjLjDispatchBlock()
36343   const X86RegisterInfo &RI = TII->getRegisterInfo();  in EmitSjLjDispatchBlock()
36349     X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>();  in EmitSjLjDispatchBlock()
36350     MFI->setRestoreBasePointer(MF);  in EmitSjLjDispatchBlock()
36355     addRegOffset(BuildMI(DispatchBB, MIMD, TII->get(Op), BP), FP, true,  in EmitSjLjDispatchBlock()
36356                  MFI->getRestoreBasePointerOffset())  in EmitSjLjDispatchBlock()
36359     BuildMI(DispatchBB, MIMD, TII->get(X86::NOOP))  in EmitSjLjDispatchBlock()
36364   Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);  in EmitSjLjDispatchBlock()
36365   addFrameReference(BuildMI(DispatchBB, MIMD, TII->get(X86::MOV32rm), IReg), FI,  in EmitSjLjDispatchBlock()
36367   BuildMI(DispatchBB, MIMD, TII->get(X86::CMP32ri))  in EmitSjLjDispatchBlock()
36370   BuildMI(DispatchBB, MIMD, TII->get(X86::JCC_1))  in EmitSjLjDispatchBlock()
36375     Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass);  in EmitSjLjDispatchBlock()
36376     Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);  in EmitSjLjDispatchBlock()
36379     BuildMI(DispContBB, MIMD, TII->get(X86::LEA64r), BReg)  in EmitSjLjDispatchBlock()
36386     BuildMI(DispContBB, MIMD, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)  in EmitSjLjDispatchBlock()
36394       BuildMI(DispContBB, MIMD, TII->get(X86::JMP64m))  in EmitSjLjDispatchBlock()
36402       Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass);  in EmitSjLjDispatchBlock()
36403       Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass);  in EmitSjLjDispatchBlock()
36404       Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass);  in EmitSjLjDispatchBlock()
36407       BuildMI(DispContBB, MIMD, TII->get(X86::MOV32rm), OReg)  in EmitSjLjDispatchBlock()
36414       BuildMI(DispContBB, MIMD, TII->get(X86::MOVSX64rr32), OReg64)  in EmitSjLjDispatchBlock()
36417       BuildMI(DispContBB, MIMD, TII->get(X86::ADD64rr), TReg)  in EmitSjLjDispatchBlock()
36421       BuildMI(DispContBB, MIMD, TII->get(X86::JMP64r)).addReg(TReg);  in EmitSjLjDispatchBlock()
36429     BuildMI(DispContBB, MIMD, TII->get(X86::JMP32m))  in EmitSjLjDispatchBlock()
36441       DispContBB->addSuccessor(LP);  in EmitSjLjDispatchBlock()
36445   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();  in EmitSjLjDispatchBlock()
36450     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),  in EmitSjLjDispatchBlock()
36451                                                    MBB->succ_rend());  in EmitSjLjDispatchBlock()
36454       if (MBBS->isEHPad()) {  in EmitSjLjDispatchBlock()
36455         MBB->removeSuccessor(MBBS);  in EmitSjLjDispatchBlock()
36460     MBB->addSuccessor(DispatchBB);  in EmitSjLjDispatchBlock()
36462     // Find the invoke call and mark all of the callee-saved registers as  in EmitSjLjDispatchBlock()
36486   // Mark all former landing pads as non-landing pads.  The dispatch is the only  in EmitSjLjDispatchBlock()
36489     LP->setIsEHPad(false);  in EmitSjLjDispatchBlock()
36503   MachineFunction &MF = *BB->getParent();  in emitPatchableEventCall()
36510   BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);  in emitPatchableEventCall()
36516   BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);  in emitPatchableEventCall()
36524   MachineFunction *MF = BB->getParent();  in EmitInstrWithCustomInserter()
36591         MF->getFrameInfo().CreateStackObject(2, Align(2), false);  in EmitInstrWithCustomInserter()
36592     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),  in EmitInstrWithCustomInserter()
36596     Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);  in EmitInstrWithCustomInserter()
36597     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),  in EmitInstrWithCustomInserter()
36602     Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);  in EmitInstrWithCustomInserter()
36603     BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)  in EmitInstrWithCustomInserter()
36609         MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);  in EmitInstrWithCustomInserter()
36610     BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)  in EmitInstrWithCustomInserter()
36615         MF->getFrameInfo().CreateStackObject(2, Align(2), false);  in EmitInstrWithCustomInserter()
36616     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),  in EmitInstrWithCustomInserter()
36621     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),  in EmitInstrWithCustomInserter()
36626       BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80))  in EmitInstrWithCustomInserter()
36631       BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80m32))  in EmitInstrWithCustomInserter()
36642     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),  in EmitInstrWithCustomInserter()
36661         MF->getFrameInfo().CreateStackObject(2, Align(2), false);  in EmitInstrWithCustomInserter()
36662     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),  in EmitInstrWithCustomInserter()
36666     Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);  in EmitInstrWithCustomInserter()
36667     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),  in EmitInstrWithCustomInserter()
36671     Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);  in EmitInstrWithCustomInserter()
36672     BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)  in EmitInstrWithCustomInserter()
36677         MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);  in EmitInstrWithCustomInserter()
36678     BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)  in EmitInstrWithCustomInserter()
36683         MF->getFrameInfo().CreateStackObject(2, Align(2), false);  in EmitInstrWithCustomInserter()
36684     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),  in EmitInstrWithCustomInserter()
36690                               TII->get(X86::FLDCW16m)), NewCWFrameIdx);  in EmitInstrWithCustomInserter()
36695     // clang-format off  in EmitInstrWithCustomInserter()
36706     // clang-format on  in EmitInstrWithCustomInserter()
36710     addFullAddress(BuildMI(*BB, MI, MIMD, TII->get(Opc)), AM)  in EmitInstrWithCustomInserter()
36714     addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),  in EmitInstrWithCustomInserter()
36758     // - which is ESI for i686 - register allocator would not be able to  in EmitInstrWithCustomInserter()
36760     // - there never would be enough unreserved registers during regalloc  in EmitInstrWithCustomInserter()
36765     // If it is not i686 or there is no base pointer - nothing to do here.  in EmitInstrWithCustomInserter()
36766     if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))  in EmitInstrWithCustomInserter()
36773     assert(TRI->getBaseRegister() == X86::ESI &&  in EmitInstrWithCustomInserter()
36777     MachineRegisterInfo &MRI = MF->getRegInfo();  in EmitInstrWithCustomInserter()
36778     MVT SPTy = getPointerTy(MF->getDataLayout());  in EmitInstrWithCustomInserter()
36792     while (RMBBI != BB->rend() &&  in EmitInstrWithCustomInserter()
36793            (RMBBI->definesRegister(X86::EAX, /*TRI=*/nullptr) ||  in EmitInstrWithCustomInserter()
36794             RMBBI->definesRegister(X86::EBX, /*TRI=*/nullptr) ||  in EmitInstrWithCustomInserter()
36795             RMBBI->definesRegister(X86::ECX, /*TRI=*/nullptr) ||  in EmitInstrWithCustomInserter()
36796             RMBBI->definesRegister(X86::EDX, /*TRI=*/nullptr))) {  in EmitInstrWithCustomInserter()
36801         BuildMI(*BB, *MBBI, MIMD, TII->get(X86::LEA32r), computedAddrVReg), AM);  in EmitInstrWithCustomInserter()
36809     Register BasePtr = TRI->getBaseRegister();  in EmitInstrWithCustomInserter()
36810     if (TRI->hasBasePointer(*MF) &&  in EmitInstrWithCustomInserter()
36812       if (!BB->isLiveIn(BasePtr))  in EmitInstrWithCustomInserter()
36813         BB->addLiveIn(BasePtr);  in EmitInstrWithCustomInserter()
36816           MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);  in EmitInstrWithCustomInserter()
36817       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)  in EmitInstrWithCustomInserter()
36819       Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);  in EmitInstrWithCustomInserter()
36821           BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);  in EmitInstrWithCustomInserter()
36828       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::RBX)  in EmitInstrWithCustomInserter()
36831           BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B));  in EmitInstrWithCustomInserter()
36840     Register BasePtr = TRI->getBaseRegister();  in EmitInstrWithCustomInserter()
36844     if (!IsRBX || !TRI->hasBasePointer(*MF)) {  in EmitInstrWithCustomInserter()
36845       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)  in EmitInstrWithCustomInserter()
36847       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)  in EmitInstrWithCustomInserter()
36849       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EBX)  in EmitInstrWithCustomInserter()
36851       BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITXrrr));  in EmitInstrWithCustomInserter()
36854       if (!BB->isLiveIn(BasePtr)) {  in EmitInstrWithCustomInserter()
36855         BB->addLiveIn(BasePtr);  in EmitInstrWithCustomInserter()
36858       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)  in EmitInstrWithCustomInserter()
36860       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)  in EmitInstrWithCustomInserter()
36862       assert(Subtarget.is64Bit() && "Expected 64-bit mode!");  in EmitInstrWithCustomInserter()
36865           MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);  in EmitInstrWithCustomInserter()
36866       BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)  in EmitInstrWithCustomInserter()
36869       Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);  in EmitInstrWithCustomInserter()
36870       BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITX_SAVE_RBX))  in EmitInstrWithCustomInserter()
36879     assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");  in EmitInstrWithCustomInserter()
36880     auto *MFI = MF->getInfo<X86MachineFunctionInfo>();  in EmitInstrWithCustomInserter()
36881     MFI->setHasPreallocatedCall(true);  in EmitInstrWithCustomInserter()
36883     size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);  in EmitInstrWithCustomInserter()
36887     BuildMI(*BB, MI, MIMD, TII->get(X86::SUB32ri), X86::ESP)  in EmitInstrWithCustomInserter()
36894     assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");  in EmitInstrWithCustomInserter()
36897     auto *MFI = MF->getInfo<X86MachineFunctionInfo>();  in EmitInstrWithCustomInserter()
36898     size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];  in EmitInstrWithCustomInserter()
36902     addRegOffset(BuildMI(*BB, MI, MIMD, TII->get(X86::LEA32r),  in EmitInstrWithCustomInserter()
36916     // clang-format off  in EmitInstrWithCustomInserter()
36924     // clang-format on  in EmitInstrWithCustomInserter()
36927     MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));  in EmitInstrWithCustomInserter()
36938     BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));  in EmitInstrWithCustomInserter()
36940     auto *MFI = MF->getInfo<X86MachineFunctionInfo>();  in EmitInstrWithCustomInserter()
36941     MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);  in EmitInstrWithCustomInserter()
36945     auto *MFI = MF->getInfo<X86MachineFunctionInfo>();  in EmitInstrWithCustomInserter()
36946     MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);  in EmitInstrWithCustomInserter()
36968     MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));  in EmitInstrWithCustomInserter()
36976     MIB.add(MI.getOperand(CurOp++)); // index -- stride  in EmitInstrWithCustomInserter()
36992     // clang-format off  in EmitInstrWithCustomInserter()
36996     // clang-format on  in EmitInstrWithCustomInserter()
36998     MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));  in EmitInstrWithCustomInserter()
37009 //===----------------------------------------------------------------------===//
37011 //===----------------------------------------------------------------------===//
37018   EVT VT = Op.getValueType();  in targetShrinkDemandedConstant()  local
37020   unsigned EltSize = VT.getScalarSizeInBits();  in targetShrinkDemandedConstant()
37022   if (VT.isVector()) {  in targetShrinkDemandedConstant()
37039     // For vectors - if we have a constant, then try to sign extend.  in targetShrinkDemandedConstant()
37042     if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&  in targetShrinkDemandedConstant()
37047                                    VT.getVectorNumElements());  in targetShrinkDemandedConstant()
37049           TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,  in targetShrinkDemandedConstant()
37052           TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC);  in targetShrinkDemandedConstant()
37068   const APInt &Mask = C->getAPIntValue();  in targetShrinkDemandedConstant()
37070   // Clear all non-demanded bits initially.  in targetShrinkDemandedConstant()
37094   // and non-demanded bits.  in targetShrinkDemandedConstant()
37100   SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT);  in targetShrinkDemandedConstant()
37101   SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);  in targetShrinkDemandedConstant()
37205   EVT VT = Op.getValueType();  in computeKnownBitsForTargetNode()  local
37246     if (ShAmt >= VT.getScalarSizeInBits()) {  in computeKnownBitsForTargetNode()
37254       ShAmt = VT.getScalarSizeInBits() - 1;  in computeKnownBitsForTargetNode()
37266       // High bits are known zero.  in computeKnownBitsForTargetNode()
37277     getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);  in computeKnownBitsForTargetNode()
37303     // PSHUFB is being used as a LUT (ctpop etc.) - the target shuffle handling  in computeKnownBitsForTargetNode()
37348     assert(VT.getScalarType() == MVT::i64 &&  in computeKnownBitsForTargetNode()
37375     assert(VT.getVectorElementType() == MVT::i32 &&  in computeKnownBitsForTargetNode()
37385     assert(VT.getVectorElementType() == MVT::i16 &&  in computeKnownBitsForTargetNode()
37419       unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);  in computeKnownBitsForTargetNode()
37420       unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);  in computeKnownBitsForTargetNode()
37442     // The result will have at least as many trailing zeros as the non-mask  in computeKnownBitsForTargetNode()
37474     // Truncations/Conversions - upper elements are known zero.  in computeKnownBitsForTargetNode()
37489     // Strict Conversions - upper elements are known zero.  in computeKnownBitsForTargetNode()
37538     switch (Op->getConstantOperandVal(0)) {  in computeKnownBitsForTargetNode()
37544       assert(VT.getScalarType() == MVT::i32 &&  in computeKnownBitsForTargetNode()
37556       assert(VT.getScalarType() == MVT::i16 &&  in computeKnownBitsForTargetNode()
37568       assert(VT.getScalarType() == MVT::i64 &&  in computeKnownBitsForTargetNode()
37581   // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.  in computeKnownBitsForTargetNode()
37587       unsigned NumElts = VT.getVectorNumElements();  in computeKnownBitsForTargetNode()
37610           if (Ops[OpIdx].getValueType() != VT) {  in computeKnownBitsForTargetNode()
37611             // TODO - handle target shuffle ops with different value types.  in computeKnownBitsForTargetNode()
37633   EVT VT = Op.getValueType();  in ComputeNumSignBitsForTargetNode()  local
37634   unsigned VTBits = VT.getScalarSizeInBits();  in ComputeNumSignBitsForTargetNode()
37648     if (Tmp > (NumSrcBits - VTBits))  in ComputeNumSignBitsForTargetNode()
37649       return Tmp - (NumSrcBits - VTBits);  in ComputeNumSignBitsForTargetNode()
37661     auto NumSignBitsPACKSS = [&](SDValue V, const APInt &Elts) -> unsigned {  in ComputeNumSignBitsForTargetNode()
37684     if (Tmp > (SrcBits - VTBits))  in ComputeNumSignBitsForTargetNode()
37685       return Tmp - (SrcBits - VTBits);  in ComputeNumSignBitsForTargetNode()
37700       return VTBits; // Shifted all bits out --> zero.  in ComputeNumSignBitsForTargetNode()
37703       return 1; // Shifted all sign bits out --> unknown.  in ComputeNumSignBitsForTargetNode()
37704     return Tmp - ShiftVal.getZExtValue();  in ComputeNumSignBitsForTargetNode()
37710     if (ShiftVal.uge(VTBits - 1))  in ComputeNumSignBitsForTargetNode()
37718     // cmpss/cmpsd return zero/all-bits result values in the bottom element.  in ComputeNumSignBitsForTargetNode()
37719     if (VT == MVT::f32 || VT == MVT::f64 ||  in ComputeNumSignBitsForTargetNode()
37720         ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1))  in ComputeNumSignBitsForTargetNode()
37729     // Vector compares return zero/all-bits result values.  in ComputeNumSignBitsForTargetNode()
37750   // TODO - use resolveTargetShuffleInputs once we can limit recursive depth.  in ComputeNumSignBitsForTargetNode()
37756       unsigned NumElts = VT.getVectorNumElements();  in ComputeNumSignBitsForTargetNode()
37776           if (Ops[OpIdx].getValueType() != VT) {  in ComputeNumSignBitsForTargetNode()
37777             // TODO - handle target shuffle ops with different value types.  in ComputeNumSignBitsForTargetNode()
37800   if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP)  in unwrapAddress()
37801     return N->getOperand(0);  in unwrapAddress()
37806 // specified VT and memory VT. Returns SDValue() on failure.
37807 static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT,  in narrowLoadToVZLoad()  argument
37810   if (!LN->isSimple())  in narrowLoadToVZLoad()
37813   SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in narrowLoadToVZLoad()
37814   SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};  in narrowLoadToVZLoad()
37816                                  LN->getPointerInfo(), LN->getOriginalAlign(),  in narrowLoadToVZLoad()
37817                                  LN->getMemOperand()->getFlags());  in narrowLoadToVZLoad()
37831   // Match against a VZEXT_MOVL vXi32 and vXi16 zero-extending instruction.  in matchUnaryShuffle()
37834     if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) ||  in matchUnaryShuffle()
37836          isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {  in matchUnaryShuffle()
37847   // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).  in matchUnaryShuffle()
37865         unsigned Len = Scale - 1;  in matchUnaryShuffle()
37891   // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).  in matchUnaryShuffle()
37895       isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {  in matchUnaryShuffle()
37926     assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");  in matchUnaryShuffle()
37948            "AVX512 required for 512-bit vector shuffles");  in matchUnaryShuffle()
37994       // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).  in matchUnaryPermuteShuffle()
38011       // VPERMILPD can permute with a non-repeating shuffle.  in matchUnaryPermuteShuffle()
38032       // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).  in matchUnaryPermuteShuffle()
38037           // Narrow the repeated mask to create 32-bit element permutes.  in matchUnaryPermuteShuffle()
38075               OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);  in matchUnaryPermuteShuffle()
38189     // Use (SSE41) PACKUSWD if the leading zerobits goto the lowest 16-bits.  in matchBinaryShuffle()
38198     // Use PACKUSBW if the leading zerobits goto the lowest 8-bits.  in matchBinaryShuffle()
38205     // Use PACKSSWD if the signbits extend to the lowest 16-bits.  in matchBinaryShuffle()
38231   // non-blended source element is zero in each case.  in matchBinaryShuffle()
38441           S0 = (SM_SentinelUndef == M0 ? -1 : 0);  in matchBinaryPermuteShuffle()
38442           S1 = (SM_SentinelUndef == M1 ? -1 : 1);  in matchBinaryPermuteShuffle()
38445           S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);  in matchBinaryPermuteShuffle()
38446           S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);  in matchBinaryPermuteShuffle()
38449           S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);  in matchBinaryPermuteShuffle()
38450           S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);  in matchBinaryPermuteShuffle()
38457       int ShufMask[4] = {-1, -1, -1, -1};  in matchBinaryPermuteShuffle()
38494 /// chain of single-use x86 shuffle instructions and accumulated the combined
38516   auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) {  in combineX86ShuffleChain()  argument
38517     if (VT.getSizeInBits() > Op.getValueSizeInBits())  in combineX86ShuffleChain()
38518       Op = widenSubVector(Op, false, Subtarget, DAG, DL, VT.getSizeInBits());  in combineX86ShuffleChain()
38519     else if (VT.getSizeInBits() < Op.getValueSizeInBits())  in combineX86ShuffleChain()
38520       Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits());  in combineX86ShuffleChain()
38521     return DAG.getBitcast(VT, Op);  in combineX86ShuffleChain()
38551   // is different from the root element size - this would prevent writemasks  in combineX86ShuffleChain()
38555     if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT &&  in combineX86ShuffleChain()
38556         Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) {  in combineX86ShuffleChain()
38573   // See if the shuffle is a hidden identity shuffle - repeated args in HOPs  in combineX86ShuffleChain()
38588   // Handle 128/256-bit lane shuffles of 512-bit vectors.  in combineX86ShuffleChain()
38594     if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {  in combineX86ShuffleChain()
38615       int PermMask[4] = {-1, -1, -1, -1};  in combineX86ShuffleChain()
38619         assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value");  in combineX86ShuffleChain()
38660   // Handle 128-bit lane shuffles of 256-bit vectors.  in combineX86ShuffleChain()
38675     // If we're inserting the low subvector, an insert-subvector 'concat'  in combineX86ShuffleChain()
38708     // TODO - handle AVX512VL cases with X86ISD::SHUF128.  in combineX86ShuffleChain()
38727   // For masks that have been widened to 128-bit elements or more,  in combineX86ShuffleChain()
38728   // narrow back down to 64-bit elements.  in combineX86ShuffleChain()
38739   // TODO - variable shuffles might need this to be widened again.  in combineX86ShuffleChain()
38779     // Attempt to match against broadcast-from-vector.  in combineX86ShuffleChain()
38965   // Don't try to re-form single instruction chains under any circumstances now  in combineX86ShuffleChain()
38987     // If we have a single input lane-crossing shuffle then lower to VPERMV.  in combineX86ShuffleChain()
38996       // AVX512 variants (non-VLX will pad to 512-bit shuffles).  in combineX86ShuffleChain()
39011     // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero  in combineX86ShuffleChain()
39012     // vector as the second source (non-VLX will pad to 512-bit shuffles).  in combineX86ShuffleChain()
39023       // Adjust shuffle mask - replace SM_SentinelZero with second source index.  in combineX86ShuffleChain()
39041     // If we have a dual input lane-crossing shuffle then lower to VPERMV3,  in combineX86ShuffleChain()
39042     // (non-VLX will pad to 512-bit shuffles).  in combineX86ShuffleChain()
39061   // See if we can combine a single input shuffle with zeros to a bit-mask,  in combineX86ShuffleChain()
39089   // the 128-bit lanes use the variable mask to VPERMILPS.  in combineX86ShuffleChain()
39106   // With XOP, binary shuffles of 128/256-bit floating point vectors can combine  in combineX86ShuffleChain()
39112     // Bits[3] - Match Bit.  in combineX86ShuffleChain()
39113     // Bits[2:1] - (Per Lane) PD Shuffle Mask.  in combineX86ShuffleChain()
39114     // Bits[2:0] - (Per Lane) PS Shuffle Mask.  in combineX86ShuffleChain()
39121         VPerm2Idx.push_back(-1);  in combineX86ShuffleChain()
39174   // With XOP, if we have a 128-bit binary input shuffle we can always combine  in combineX86ShuffleChain()
39175   // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never  in combineX86ShuffleChain()
39180     // Bits[4:0] - Byte Index (0 - 31)  in combineX86ShuffleChain()
39181     // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)  in combineX86ShuffleChain()
39214   // (non-VLX will pad to 512-bit shuffles)  in combineX86ShuffleChain()
39244 // -->
39286   WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);  in combineX86ShuffleChainWithExtract()
39325   // elements, and shrink them to the half-width mask. It does this in a loop  in combineX86ShuffleChainWithExtract()
39413       // the HOP args are pre-shuffled.  in canonicalizeShuffleMaskWithHorizOp()
39425           if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode()))  in canonicalizeShuffleMaskWithHorizOp()
39439       // shuffle(hop(x,y),hop(z,w)) -> permute(hop(x,z)) etc.  in canonicalizeShuffleMaskWithHorizOp()
39459         int PostMask[4] = {-1, -1, -1, -1};  in canonicalizeShuffleMaskWithHorizOp()
39481   SDValue BC1 = BC[BC.size() - 1];  in canonicalizeShuffleMaskWithHorizOp()
39505           M -= NumElts + (SubLane * NumHalfEltsPerLane);  in canonicalizeShuffleMaskWithHorizOp()
39519         M -= NumHalfEltsPerLane;  in canonicalizeShuffleMaskWithHorizOp()
39522         M -= NumHalfEltsPerLane;  in canonicalizeShuffleMaskWithHorizOp()
39552   // If we are post-shuffling a 256-bit hop and not requiring the upper  in canonicalizeShuffleMaskWithHorizOp()
39553   // elements, then try to narrow to a 128-bit hop directly.  in canonicalizeShuffleMaskWithHorizOp()
39577 static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,  in combineX86ShufflesConstants()  argument
39582   unsigned SizeInBits = VT.getSizeInBits();  in combineX86ShufflesConstants()
39602       llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))  in combineX86ShufflesConstants()
39645     return getZeroVector(VT, Subtarget, DAG, DL);  in combineX86ShufflesConstants()
39649   if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))  in combineX86ShufflesConstants()
39659   return DAG.getBitcast(VT, CstOp);  in combineX86ShufflesConstants()
39674 /// of single-use shuffle instructions, build a generic model of the cumulative
39681 ///    special-purpose shuffle.
39697 /// combine-ordering. To fix this, we should do the redundant instruction
39722   EVT VT = Op.getValueType();  in combineX86ShufflesRecursively()  local
39723   if (!VT.isVector() || !VT.isSimple())  in combineX86ShufflesRecursively()
39724     return SDValue(); // Bail if we hit a non-simple non-vector.  in combineX86ShufflesRecursively()
39727   if (VT.getVectorElementType() == MVT::f16)  in combineX86ShufflesRecursively()
39730   assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&  in combineX86ShufflesRecursively()
39738       OpDemandedElts.setBit(M - BaseIdx);  in combineX86ShufflesRecursively()
39740   if (RootSizeInBits != VT.getSizeInBits()) {  in combineX86ShufflesRecursively()
39741     // Op is smaller than Root - extract the demanded elts for the subvector.  in combineX86ShufflesRecursively()
39742     unsigned Scale = RootSizeInBits / VT.getSizeInBits();  in combineX86ShufflesRecursively()
39746                .extractBits(RootMask.size() - NumOpMaskElts, NumOpMaskElts)  in combineX86ShufflesRecursively()
39752       APIntOps::ScaleBitMask(OpDemandedElts, VT.getVectorNumElements());  in combineX86ShufflesRecursively()
39763     if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {  in combineX86ShufflesRecursively()
39764           return OpInput.getValueSizeInBits() > VT.getSizeInBits();  in combineX86ShufflesRecursively()
39772     unsigned NumElts = VT.getVectorNumElements();  in combineX86ShufflesRecursively()
39783   if (RootSizeInBits > VT.getSizeInBits()) {  in combineX86ShufflesRecursively()
39784     unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits();  in combineX86ShufflesRecursively()
39798     OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef);  in combineX86ShufflesRecursively()
39834     auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int {  in combineX86ShufflesRecursively()
39840       // Match failed - should we replace an existing Op?  in combineX86ShufflesRecursively()
39847       return Ops.size() - 1;  in combineX86ShufflesRecursively()
39853           AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1));  in combineX86ShufflesRecursively()
39862     // This function can be performance-critical, so we rely on the power-of-2  in combineX86ShufflesRecursively()
39864     // bit-masks and shifts.  in combineX86ShufflesRecursively()
39866            "Non-power-of-2 shuffle mask sizes");  in combineX86ShufflesRecursively()
39868            "Non-power-of-2 shuffle mask sizes");  in combineX86ShufflesRecursively()
39879     assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");  in combineX86ShufflesRecursively()
39880     assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");  in combineX86ShufflesRecursively()
39881     assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");  in combineX86ShufflesRecursively()
39902               : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1));  in combineX86ShufflesRecursively()
39912       RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1);  in combineX86ShufflesRecursively()
39921       // Ok, we have non-zero lanes, map them through to one of the Op's inputs.  in combineX86ShufflesRecursively()
39924                                                 (RootMaskedIdx & (OpRatio - 1));  in combineX86ShufflesRecursively()
39926       OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1);  in combineX86ShufflesRecursively()
39987   if (Ops.size() < (MaxDepth - Depth)) {  in combineX86ShufflesRecursively()
39996       if (Ops[i].getNode()->hasOneUse() ||  in combineX86ShufflesRecursively()
40014   // If constant fold failed and we only have constants - then we have  in combineX86ShufflesRecursively()
40015   // multiple uses by a single non-variable shuffle - just bail.  in combineX86ShufflesRecursively()
40047         int OpEltIdx = MaskElt - Lo;  in combineX86ShufflesRecursively()
40059                  NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&  in combineX86ShufflesRecursively()
40064     // The Op itself may be of different VT, so we need to scale the mask.  in combineX86ShufflesRecursively()
40086     // Reresolve - we might have repeated subvector sources.  in combineX86ShufflesRecursively()
40094     // elements, and shrink them to the half-width mask. It does this in a loop  in combineX86ShufflesRecursively()
40146 /// Get the PSHUF-style mask from PSHUF node.
40149 /// PSHUF-style masks that can be reused with such instructions.
40151   MVT VT = N.getSimpleValueType();  in getPSHUFShuffleMask()  local
40158   // If we have more than 128-bits, only the low 128-bits of shuffle mask  in getPSHUFShuffleMask()
40160   if (VT.getSizeInBits() > 128) {  in getPSHUFShuffleMask()
40161     int LaneElts = 128 / VT.getScalarSizeInBits();  in getPSHUFShuffleMask()
40163     for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)  in getPSHUFShuffleMask()
40165         assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) &&  in getPSHUFShuffleMask()
40166                "Mask doesn't repeat in high 128-bit lanes!");  in getPSHUFShuffleMask()
40180       M -= 4;  in getPSHUFShuffleMask()
40197          "Called with something other than an x86 128-bit half shuffle!");  in combineRedundantDWordShuffle()
40199   // Walk up a single-use chain looking for a combinable shuffle. Keep a stack  in combineRedundantDWordShuffle()
40220       // dword shuffle, and the high words are self-contained.  in combineRedundantDWordShuffle()
40229       // Check that the high words (being shuffled) are the identity in the  in combineRedundantDWordShuffle()
40230       // dword shuffle, and the low words are self-contained.  in combineRedundantDWordShuffle()
40240       // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword  in combineRedundantDWordShuffle()
40246       // Search for a half-shuffle which we can combine with.  in combineRedundantDWordShuffle()
40250           !V->isOnlyUserOf(V.getOperand(0).getNode()))  in combineRedundantDWordShuffle()
40321 // permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
40322 static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,  in combineCommutableSHUFP()  argument
40325   if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32)  in combineCommutableSHUFP()
40328   // SHUFP(LHS, RHS) -> SHUFP(RHS, LHS) iff LHS is foldable + RHS is not.  in combineCommutableSHUFP()
40329   auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) {  in combineCommutableSHUFP()
40330     if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode()))  in combineCommutableSHUFP()
40340     return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,  in combineCommutableSHUFP()
40348       return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP,  in combineCommutableSHUFP()
40358         return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP,  in combineCommutableSHUFP()
40361       return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1,  in combineCommutableSHUFP()
40364       return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP,  in combineCommutableSHUFP()
40374 // Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
40377 combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask,  in combineBlendOfPermutes()  argument
40384   unsigned NumElts = VT.getVectorNumElements();  in combineBlendOfPermutes()
40444   // the blend mask is the same in the 128-bit subvectors (or can widen to  in combineBlendOfPermutes()
40446   if (VT == MVT::v16i16) {  in combineBlendOfPermutes()
40447     if (!is128BitLaneRepeatedShuffleMask(VT, NewBlendMask) &&  in combineBlendOfPermutes()
40454   // Don't introduce lane-crossing permutes without AVX2, unless it can be  in combineBlendOfPermutes()
40456   if (VT.is256BitVector() && !Subtarget.hasAVX2() &&  in combineBlendOfPermutes()
40457       isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(),  in combineBlendOfPermutes()
40463       DAG.getVectorShuffle(VT, DL, DAG.getBitcast(VT, Ops0[0]),  in combineBlendOfPermutes()
40464                            DAG.getBitcast(VT, Ops1[0]), NewBlendMask);  in combineBlendOfPermutes()
40465   return DAG.getVectorShuffle(VT, DL, NewBlend, DAG.getUNDEF(VT),  in combineBlendOfPermutes()
40469 // TODO - move this to TLI like isBinOp?
40480 // Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)).
40481 // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
40499            (Op.getOpcode() == Opc && Op->hasOneUse()) ||  in canonicalizeShuffleWithOp()
40500            (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||  in canonicalizeShuffleWithOp()
40501            (FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||  in canonicalizeShuffleWithOp()
40530         N->isOnlyUserOf(N.getOperand(0).getNode())) {  in canonicalizeShuffleWithOp()
40575     if (N->isOnlyUserOf(N.getOperand(0).getNode()) &&  in canonicalizeShuffleWithOp()
40576         N->isOnlyUserOf(N.getOperand(1).getNode())) {  in canonicalizeShuffleWithOp()
40639 /// Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
40645   MVT VT = V.getSimpleValueType();  in canonicalizeLaneShuffleWithRepeatedOps()  local
40663     return DAG.getBitcast(VT, Res);  in canonicalizeLaneShuffleWithRepeatedOps()
40666     // TODO: Handle v4f64 permutes with different low/high lane masks.  in canonicalizeLaneShuffleWithRepeatedOps()
40683       return DAG.getBitcast(VT, Res);  in canonicalizeLaneShuffleWithRepeatedOps()
40696   MVT VT = N.getSimpleValueType();  in combineTargetShuffle()  local
40701   if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG))  in combineTargetShuffle()
40708     // Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload.  in combineTargetShuffle()
40709     if (VT == MVT::v2f64 && Src.hasOneUse() &&  in combineTargetShuffle()
40730     // TODO - we really need a general SimplifyDemandedVectorElts mechanism.  in combineTargetShuffle()
40732         VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) {  in combineTargetShuffle()
40733       unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits();  in combineTargetShuffle()
40743         return DAG.getNode(X86ISD::VBROADCAST, DL, VT,  in combineTargetShuffle()
40747     // broadcast(bitcast(src)) -> bitcast(broadcast(src))  in combineTargetShuffle()
40748     // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.  in combineTargetShuffle()
40755                                    VT.getVectorNumElements());  in combineTargetShuffle()
40756       return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));  in combineTargetShuffle()
40759     // vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src))  in combineTargetShuffle()
40760     // If we're re-broadcasting a smaller type then broadcast with that type and  in combineTargetShuffle()
40766         (VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 &&  in combineTargetShuffle()
40767         (VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) {  in combineTargetShuffle()
40770                            VT.getSizeInBits() / BCVT.getScalarSizeInBits());  in combineTargetShuffle()
40771       return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));  in combineTargetShuffle()
40774     // Reduce broadcast source vector to lowest 128-bits.  in combineTargetShuffle()
40776       return DAG.getNode(X86ISD::VBROADCAST, DL, VT,  in combineTargetShuffle()
40779     // broadcast(scalar_to_vector(x)) -> broadcast(x).  in combineTargetShuffle()
40782       return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));  in combineTargetShuffle()
40784     // broadcast(extract_vector_elt(x, 0)) -> broadcast(x).  in combineTargetShuffle()
40790       return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));  in combineTargetShuffle()
40794     for (SDNode *User : Src->uses())  in combineTargetShuffle()
40795       if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&  in combineTargetShuffle()
40796           Src == User->getOperand(0) &&  in combineTargetShuffle()
40797           User->getValueSizeInBits(0).getFixedValue() >  in combineTargetShuffle()
40798               VT.getFixedSizeInBits()) {  in combineTargetShuffle()
40800                                 VT.getSizeInBits());  in combineTargetShuffle()
40803     // vbroadcast(scalarload X) -> vbroadcast_load X  in combineTargetShuffle()
40805     if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) &&  in combineTargetShuffle()
40808       SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40809       SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };  in combineTargetShuffle()
40812                                   LN->getMemoryVT(), LN->getMemOperand());  in combineTargetShuffle()
40839         if (LN->isSimple()) {  in combineTargetShuffle()
40840           SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40841           SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };  in combineTargetShuffle()
40844               LN->getPointerInfo(), LN->getOriginalAlign(),  in combineTargetShuffle()
40845               LN->getMemOperand()->getFlags());  in combineTargetShuffle()
40857         if (LN->getMemoryVT().getSizeInBits() == 16) {  in combineTargetShuffle()
40858           SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40859           SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };  in combineTargetShuffle()
40862                                       LN->getMemoryVT(), LN->getMemOperand());  in combineTargetShuffle()
40881             LN->isSimple()) {  in combineTargetShuffle()
40883           SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40885               LN->getBasePtr(), TypeSize::getFixed(Offset), DL);  in combineTargetShuffle()
40886           SDValue Ops[] = { LN->getChain(), Ptr };  in combineTargetShuffle()
40889               LN->getPointerInfo().getWithOffset(Offset),  in combineTargetShuffle()
40890               LN->getOriginalAlign(),  in combineTargetShuffle()
40891               LN->getMemOperand()->getFlags());  in combineTargetShuffle()
40900     // vbroadcast(vzload X) -> vbroadcast_load X  in combineTargetShuffle()
40903       if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) {  in combineTargetShuffle()
40904         SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40905         SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };  in combineTargetShuffle()
40908                                     LN->getMemoryVT(), LN->getMemOperand());  in combineTargetShuffle()
40916     // vbroadcast(vector load X) -> vbroadcast_load  in combineTargetShuffle()
40922       if (LN->isSimple()) {  in combineTargetShuffle()
40923         SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40924         SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};  in combineTargetShuffle()
40927             LN->getPointerInfo(), LN->getOriginalAlign(),  in combineTargetShuffle()
40928             LN->getMemOperand()->getFlags());  in combineTargetShuffle()
40946               narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) {  in combineTargetShuffle()
40959       if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {  in combineTargetShuffle()
40960         SDVTList Tys = DAG.getVTList(VT, MVT::Other);  in combineTargetShuffle()
40961         SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};  in combineTargetShuffle()
40964                                     LN->getMemoryVT(), LN->getMemOperand());  in combineTargetShuffle()
40982         MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);  in combineTargetShuffle()
40985         return DAG.getBitcast(VT, Movl);  in combineTargetShuffle()
40991     // vzext_movl (scalar_to_vector C) --> load [C,0...]  in combineTargetShuffle()
40994         // Create a vector constant - scalar constant followed by zeros.  in combineTargetShuffle()
40997         unsigned NumElts = VT.getVectorNumElements();  in combineTargetShuffle()
41000         ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue());  in combineTargetShuffle()
41007         Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();  in combineTargetShuffle()
41008         return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment,  in combineTargetShuffle()
41016     // 128-bit scalar_to_vector. This reduces the number of isel patterns.  in combineTargetShuffle()
41023         MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),  in combineTargetShuffle()
41025                                          VT.getScalarSizeInBits());  in combineTargetShuffle()
41028         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,  in combineTargetShuffle()
41029                            getZeroVector(VT, Subtarget, DAG, DL), Movl,  in combineTargetShuffle()
41039     unsigned EltBits = VT.getScalarSizeInBits();  in combineTargetShuffle()
41042       // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types.  in combineTargetShuffle()
41048           unsigned Size = VT.getVectorNumElements();  in combineTargetShuffle()
41053               VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),  in combineTargetShuffle()
41061       // --> m3 = blend(m1,m2)  in combineTargetShuffle()
41087             return DAG.getNode(X86ISD::BLENDI, DL, VT,  in combineTargetShuffle()
41088                                DAG.getBitcast(VT, NewLHS),  in combineTargetShuffle()
41089                                DAG.getBitcast(VT, NewRHS), N.getOperand(2));  in combineTargetShuffle()
41097     // Fold shufps(shuffle(x),shuffle(y)) -> shufps(x,y).  in combineTargetShuffle()
41100     if (VT == MVT::v4f32) {  in combineTargetShuffle()
41116             Ops[i] = DAG.getBitcast(VT, SubOps[0]);  in combineTargetShuffle()
41125         return DAG.getNode(X86ISD::SHUFP, DL, VT, Ops);  in combineTargetShuffle()
41131     // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.  in combineTargetShuffle()
41135     unsigned EltSizeInBits = VT.getScalarSizeInBits();  in combineTargetShuffle()
41141       return DAG.getBitcast(VT, Res);  in combineTargetShuffle()
41146     // If we're permuting the upper 256-bits subvectors of a concatenation, then  in combineTargetShuffle()
41148     if (VT.is512BitVector()) {  in combineTargetShuffle()
41149       // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the  in combineTargetShuffle()
41151       SDValue LHS = N->getOperand(0);  in combineTargetShuffle()
41152       SDValue RHS = N->getOperand(1);  in combineTargetShuffle()
41153       uint64_t Mask = N->getConstantOperandVal(2);  in combineTargetShuffle()
41167         return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS,  in combineTargetShuffle()
41174     // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)).  in combineTargetShuffle()
41175     SDValue LHS = N->getOperand(0);  in combineTargetShuffle()
41176     SDValue RHS = N->getOperand(1);  in combineTargetShuffle()
41181         return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT,  in combineTargetShuffle()
41184                                               N->getOperand(2)));  in combineTargetShuffle()
41188     // Fold vperm2x128(op(),op()) -> op(vperm2x128(),vperm2x128()).  in combineTargetShuffle()
41193     // vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc.  in combineTargetShuffle()
41212         MVT SubVT = VT.getHalfNumVectorElementsVT();  in combineTargetShuffle()
41215         return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi);  in combineTargetShuffle()
41225     if (N0->hasOneUse()) {  in combineTargetShuffle()
41237         if (InnerVT.getScalarSizeInBits() <= VT.getScalarSizeInBits()) {  in combineTargetShuffle()
41238           SDValue Res = DAG.getNode(Opcode, DL, VT,  in combineTargetShuffle()
41239                                     DAG.getBitcast(VT, V.getOperand(0)), N1);  in combineTargetShuffle()
41242           return DAG.getBitcast(VT, Res);  in combineTargetShuffle()
41260     // MOVS*(N0, OP(N0, N1)) --> MOVS*(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0])))  in combineTargetShuffle()
41271         MVT SVT = VT.getVectorElementType();  in combineTargetShuffle()
41276         SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);  in combineTargetShuffle()
41277         return DAG.getNode(Opcode, DL, VT, N0, SclVec);  in combineTargetShuffle()
41284     assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");  in combineTargetShuffle()
41294       return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,  in combineTargetShuffle()
41299       return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),  in combineTargetShuffle()
41309         // Zero/UNDEF insertion - zero out element and remove dependency.  in combineTargetShuffle()
41311         return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),  in combineTargetShuffle()
41319       return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,  in combineTargetShuffle()
41366         return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,  in combineTargetShuffle()
41375       if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) {  in combineTargetShuffle()
41376         SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(),  in combineTargetShuffle()
41377                                    MemIntr->getBasePtr(),  in combineTargetShuffle()
41378                                    MemIntr->getMemOperand());  in combineTargetShuffle()
41379         SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,  in combineTargetShuffle()
41380                            DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,  in combineTargetShuffle()
41405             VT.getScalarType(), NVT.getSizeInBits() / VT.getScalarSizeInBits());  in combineTargetShuffle()
41410         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm,  in combineTargetShuffle()
41420   // Nuke no-op shuffles that show up after combining.  in combineTargetShuffle()
41431     assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");  in combineTargetShuffle()
41435     // dwords as otherwise it would have been removed as a no-op.  in combineTargetShuffle()
41441       MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);  in combineTargetShuffle()
41445       return DAG.getBitcast(VT, V);  in combineTargetShuffle()
41450     // only works when we have a PSHUFD followed by two half-shuffles.  in combineTargetShuffle()
41474           V = DAG.getBitcast(VT, D.getOperand(0));  in combineTargetShuffle()
41477                              DL, VT, V, V);  in combineTargetShuffle()
41499   int ParitySrc[2] = {-1, -1};  in isAddSubOrSubAddMask()
41538   EVT VT = N->getValueType(0);  in isAddSubOrSubAdd()  local
41540   if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) ||  in isAddSubOrSubAdd()
41541       !VT.getSimpleVT().isFloatingPoint())  in isAddSubOrSubAdd()
41544   // We only handle target-independent shuffles.  in isAddSubOrSubAdd()
41547   if (N->getOpcode() != ISD::VECTOR_SHUFFLE)  in isAddSubOrSubAdd()
41550   SDValue V1 = N->getOperand(0);  in isAddSubOrSubAdd()
41551   SDValue V2 = N->getOperand(1);  in isAddSubOrSubAdd()
41560   if (!V1->hasOneUse() || !V2->hasOneUse())  in isAddSubOrSubAdd()
41567     LHS = V1->getOperand(0); RHS = V1->getOperand(1);  in isAddSubOrSubAdd()
41568     if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&  in isAddSubOrSubAdd()
41569         (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))  in isAddSubOrSubAdd()
41573     LHS = V2->getOperand(0); RHS = V2->getOperand(1);  in isAddSubOrSubAdd()
41574     if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) &&  in isAddSubOrSubAdd()
41575         (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS))  in isAddSubOrSubAdd()
41579   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();  in isAddSubOrSubAdd()
41585   IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD  in isAddSubOrSubAdd()
41586                      : V2->getOpcode() == ISD::FADD;  in isAddSubOrSubAdd()
41597   // We only handle target-independent shuffles.  in combineShuffleToFMAddSub()
41600   if (N->getOpcode() != ISD::VECTOR_SHUFFLE)  in combineShuffleToFMAddSub()
41603   MVT VT = N->getSimpleValueType(0);  in combineShuffleToFMAddSub()  local
41605   if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT))  in combineShuffleToFMAddSub()
41609   SDValue Op0 = N->getOperand(0);  in combineShuffleToFMAddSub()
41610   SDValue Op1 = N->getOperand(1);  in combineShuffleToFMAddSub()
41622   ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();  in combineShuffleToFMAddSub()
41630   return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1),  in combineShuffleToFMAddSub()
41634 /// Try to combine a shuffle into a target-specific add-sub or
41635 /// mul-add-sub node.
41647   MVT VT = N->getSimpleValueType(0);  in combineShuffleToAddSubOrFMAddSub()  local
41653     return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);  in combineShuffleToAddSubOrFMAddSub()
41659   // Do not generate X86ISD::ADDSUB node for 512-bit types even though  in combineShuffleToAddSubOrFMAddSub()
41661   // X86 targets with 512-bit ADDSUB instructions!  in combineShuffleToAddSubOrFMAddSub()
41662   if (VT.is512BitVector())  in combineShuffleToAddSubOrFMAddSub()
41668   if (VT.getVectorElementType() == MVT::f16)  in combineShuffleToAddSubOrFMAddSub()
41671   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);  in combineShuffleToAddSubOrFMAddSub()
41676 // if we can express this as a single-source shuffle, that's preferable.
41683   EVT VT = N->getValueType(0);  in combineShuffleOfConcatUndef()  local
41685   // We only care about shuffles of 128/256-bit vectors of 32/64-bit values.  in combineShuffleOfConcatUndef()
41686   if (!VT.is128BitVector() && !VT.is256BitVector())  in combineShuffleOfConcatUndef()
41689   if (VT.getVectorElementType() != MVT::i32 &&  in combineShuffleOfConcatUndef()
41690       VT.getVectorElementType() != MVT::i64 &&  in combineShuffleOfConcatUndef()
41691       VT.getVectorElementType() != MVT::f32 &&  in combineShuffleOfConcatUndef()
41692       VT.getVectorElementType() != MVT::f64)  in combineShuffleOfConcatUndef()
41695   SDValue N0 = N->getOperand(0);  in combineShuffleOfConcatUndef()
41696   SDValue N1 = N->getOperand(1);  in combineShuffleOfConcatUndef()
41708   int NumElts = VT.getVectorNumElements();  in combineShuffleOfConcatUndef()
41711   for (int Elt : SVOp->getMask())  in combineShuffleOfConcatUndef()
41712     Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));  in combineShuffleOfConcatUndef()
41714   SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),  in combineShuffleOfConcatUndef()
41716   return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);  in combineShuffleOfConcatUndef()
41720 /// low half of each source vector and does not set any high half elements in
41723   EVT VT = Shuf->getValueType(0);  in narrowShuffle()  local
41724   if (!DAG.getTargetLoweringInfo().isTypeLegal(Shuf->getValueType(0)))  in narrowShuffle()
41726   if (!VT.is256BitVector() && !VT.is512BitVector())  in narrowShuffle()
41729   // See if we can ignore all of the high elements of the shuffle.  in narrowShuffle()
41730   ArrayRef<int> Mask = Shuf->getMask();  in narrowShuffle()
41735   // (half-index output is 0 or 2).  in narrowShuffle()
41742   // Create a half-width shuffle to replace the unnecessarily wide shuffle.  in narrowShuffle()
41744   // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle  in narrowShuffle()
41747   return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),  in narrowShuffle()
41748                                Shuf->getOperand(1), HalfMask, HalfIdx1,  in narrowShuffle()
41762   EVT VT = N->getValueType(0);  in combineShuffle()  local
41764   if (TLI.isTypeLegal(VT) && !isSoftF16(VT, Subtarget))  in combineShuffle()
41771           VT, SDValue(N, 0), dl, DAG, Subtarget, /*IsAfterLegalize*/ true))  in combineShuffle()
41783   if (isTargetShuffle(N->getOpcode())) {  in combineShuffle()
41789     // instructions into higher-order shuffles. We do this after combining  in combineShuffle()
41797     // TODO - merge this into combineX86ShufflesRecursively.  in combineShuffle()
41798     APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());  in combineShuffle()
41802     // Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)).  in combineShuffle()
41803     // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).  in combineShuffle()
41838   if (!Load || !Load->getBasePtr().hasOneUse())  in SimplifyDemandedVectorEltsForTargetShuffle()
41845   Type *CTy = C->getType();  in SimplifyDemandedVectorEltsForTargetShuffle()
41846   if (!CTy->isVectorTy() ||  in SimplifyDemandedVectorEltsForTargetShuffle()
41847       CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits())  in SimplifyDemandedVectorEltsForTargetShuffle()
41850   // Handle scaling for i64 elements on 32-bit targets.  in SimplifyDemandedVectorEltsForTargetShuffle()
41851   unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements();  in SimplifyDemandedVectorEltsForTargetShuffle()
41860     Constant *Elt = C->getAggregateElement(i);  in SimplifyDemandedVectorEltsForTargetShuffle()
41862       ConstVecOps.push_back(UndefValue::get(Elt->getType()));  in SimplifyDemandedVectorEltsForTargetShuffle()
41878       Load->getAlign());  in SimplifyDemandedVectorEltsForTargetShuffle()
41887   EVT VT = Op.getValueType();  in SimplifyDemandedVectorEltsForTargetNode()  local
41938     assert(VT.getScalarType() == MVT::i64 &&  in SimplifyDemandedVectorEltsForTargetNode()
41955             Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));  in SimplifyDemandedVectorEltsForTargetNode()
41963     // We only need the bottom 64-bits of the (128-bit) shift amount.  in SimplifyDemandedVectorEltsForTargetNode()
41969     // only the bottom 64-bits are only ever used.  in SimplifyDemandedVectorEltsForTargetNode()
41970     bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) {  in SimplifyDemandedVectorEltsForTargetNode()
41971       unsigned UseOpc = Use->getOpcode();  in SimplifyDemandedVectorEltsForTargetNode()
41974              Use->getOperand(0) != Amt;  in SimplifyDemandedVectorEltsForTargetNode()
41994     // Fold shift(0,x) -> 0  in SimplifyDemandedVectorEltsForTargetNode()
41997           Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));  in SimplifyDemandedVectorEltsForTargetNode()
42004             Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1)));  in SimplifyDemandedVectorEltsForTargetNode()
42020     // Fold shift(0,x) -> 0  in SimplifyDemandedVectorEltsForTargetNode()
42023           Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));  in SimplifyDemandedVectorEltsForTargetNode()
42049     assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");  in SimplifyDemandedVectorEltsForTargetNode()
42050     unsigned ShiftAmt = Amt->getZExtValue();  in SimplifyDemandedVectorEltsForTargetNode()
42062         int Diff = ShiftAmt - C1;  in SimplifyDemandedVectorEltsForTargetNode()
42064           Diff = -Diff;  in SimplifyDemandedVectorEltsForTargetNode()
42071             Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));  in SimplifyDemandedVectorEltsForTargetNode()
42088     assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");  in SimplifyDemandedVectorEltsForTargetNode()
42089     unsigned ShiftAmt = Amt->getZExtValue();  in SimplifyDemandedVectorEltsForTargetNode()
42101         int Diff = ShiftAmt - C1;  in SimplifyDemandedVectorEltsForTargetNode()
42103           Diff = -Diff;  in SimplifyDemandedVectorEltsForTargetNode()
42110             Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA));  in SimplifyDemandedVectorEltsForTargetNode()
42132       int NumElts = VT.getVectorNumElements();  in SimplifyDemandedVectorEltsForTargetNode()
42133       int EltSizeInBits = VT.getScalarSizeInBits();  in SimplifyDemandedVectorEltsForTargetNode()
42144             // We can't assume an undef src element gives an undef dst - the  in SimplifyDemandedVectorEltsForTargetNode()
42180             Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));  in SimplifyDemandedVectorEltsForTargetNode()
42204     getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);  in SimplifyDemandedVectorEltsForTargetNode()
42215     // TODO - pass on known zero/undef.  in SimplifyDemandedVectorEltsForTargetNode()
42218     // TODO - we should do this for all target/faux shuffles ops.  in SimplifyDemandedVectorEltsForTargetNode()
42228                              TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));  in SimplifyDemandedVectorEltsForTargetNode()
42241     getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);  in SimplifyDemandedVectorEltsForTargetNode()
42252     // TODO - pass on known zero/undef.  in SimplifyDemandedVectorEltsForTargetNode()
42265                              TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1));  in SimplifyDemandedVectorEltsForTargetNode()
42288             VT.getSimpleVT(), Op.getOperand(0), Op.getOperand(1), BlendMask,  in SimplifyDemandedVectorEltsForTargetNode()
42326     MVT SVT = VT.getSimpleVT().getVectorElementType();  in SimplifyDemandedVectorEltsForTargetNode()
42330       SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(),  in SimplifyDemandedVectorEltsForTargetNode()
42331                                     Mem->getMemOperand());  in SimplifyDemandedVectorEltsForTargetNode()
42332       SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt);  in SimplifyDemandedVectorEltsForTargetNode()
42333       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec));  in SimplifyDemandedVectorEltsForTargetNode()
42344       if (Src.getValueType() != VT)  in SimplifyDemandedVectorEltsForTargetNode()
42345         Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG,  in SimplifyDemandedVectorEltsForTargetNode()
42355     // TODO - we should do this for all target/faux shuffles ops.  in SimplifyDemandedVectorEltsForTargetNode()
42358       return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));  in SimplifyDemandedVectorEltsForTargetNode()
42381   // For 256/512-bit ops that are 128/256-bit ops glued together, if we do not  in SimplifyDemandedVectorEltsForTargetNode()
42382   // demand any of the high elements, then narrow the op to 128/256-bits: e.g.  in SimplifyDemandedVectorEltsForTargetNode()
42383   // (op ymm0, ymm1) --> insert undef, (op xmm0, xmm1), 0  in SimplifyDemandedVectorEltsForTargetNode()
42384   if ((VT.is256BitVector() || VT.is512BitVector()) &&  in SimplifyDemandedVectorEltsForTargetNode()
42386     unsigned SizeInBits = VT.getSizeInBits();  in SimplifyDemandedVectorEltsForTargetNode()
42389     // See if 512-bit ops only use the bottom 128-bits.  in SimplifyDemandedVectorEltsForTargetNode()
42390     if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0)  in SimplifyDemandedVectorEltsForTargetNode()
42400       EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),  in SimplifyDemandedVectorEltsForTargetNode()
42401                                     ExtSizeInBits / VT.getScalarSizeInBits());  in SimplifyDemandedVectorEltsForTargetNode()
42403       return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,  in SimplifyDemandedVectorEltsForTargetNode()
42409       EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),  in SimplifyDemandedVectorEltsForTargetNode()
42410                                     ExtSizeInBits / VT.getScalarSizeInBits());  in SimplifyDemandedVectorEltsForTargetNode()
42412       SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};  in SimplifyDemandedVectorEltsForTargetNode()
42414           X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(),  in SimplifyDemandedVectorEltsForTargetNode()
42415           MemIntr->getMemOperand());  in SimplifyDemandedVectorEltsForTargetNode()
42418       return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,  in SimplifyDemandedVectorEltsForTargetNode()
42424       EVT MemVT = MemIntr->getMemoryVT();  in SimplifyDemandedVectorEltsForTargetNode()
42428             TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(),  in SimplifyDemandedVectorEltsForTargetNode()
42429                             MemIntr->getBasePtr(), MemIntr->getMemOperand());  in SimplifyDemandedVectorEltsForTargetNode()
42432         return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0,  in SimplifyDemandedVectorEltsForTargetNode()
42436         EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),  in SimplifyDemandedVectorEltsForTargetNode()
42437                                       ExtSizeInBits / VT.getScalarSizeInBits());  in SimplifyDemandedVectorEltsForTargetNode()
42441                                insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0,  in SimplifyDemandedVectorEltsForTargetNode()
42462       SDValue UndefVec = TLO.DAG.getUNDEF(VT);  in SimplifyDemandedVectorEltsForTargetNode()
42470       if (VT == MVT::v4f64 || VT == MVT::v4i64) {  in SimplifyDemandedVectorEltsForTargetNode()
42476           SDValue UndefVec = TLO.DAG.getUNDEF(VT);  in SimplifyDemandedVectorEltsForTargetNode()
42489             Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL));  in SimplifyDemandedVectorEltsForTargetNode()
42494       SDValue UndefVec = TLO.DAG.getUNDEF(VT);  in SimplifyDemandedVectorEltsForTargetNode()
42506       // (Non-Lane Crossing) Target Shuffles.  in SimplifyDemandedVectorEltsForTargetNode()
42544       MVT ExtVT = VT.getSimpleVT();  in SimplifyDemandedVectorEltsForTargetNode()
42548       SDValue UndefVec = TLO.DAG.getUNDEF(VT);  in SimplifyDemandedVectorEltsForTargetNode()
42572       llvm::any_of(OpInputs, [VT](SDValue V) {  in SimplifyDemandedVectorEltsForTargetNode()
42573         return VT.getSizeInBits() != V.getValueSizeInBits() ||  in SimplifyDemandedVectorEltsForTargetNode()
42589     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));  in SimplifyDemandedVectorEltsForTargetNode()
42594         Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));  in SimplifyDemandedVectorEltsForTargetNode()
42598       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src]));  in SimplifyDemandedVectorEltsForTargetNode()
42603     if (OpInputs[Src].getValueType() != VT)  in SimplifyDemandedVectorEltsForTargetNode()
42610         int M = OpMask[i] - Lo;  in SimplifyDemandedVectorEltsForTargetNode()
42615     // TODO - Propagate input undef/zero elts.  in SimplifyDemandedVectorEltsForTargetNode()
42625   // can handle - so pretend its Depth == 0 again, and reduce the max depth  in SimplifyDemandedVectorEltsForTargetNode()
42638         {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,  in SimplifyDemandedVectorEltsForTargetNode()
42653   EVT VT = Op.getValueType();  in SimplifyDemandedBitsForTargetNode()  local
42676     // Don't mask bits on 32-bit AVX512 targets which might lose a broadcast.  in SimplifyDemandedBitsForTargetNode()
42695     // PMULUDQ(X,1) -> AND(X,(1<<32)-1) 'getZeroExtendInReg'.  in SimplifyDemandedBitsForTargetNode()
42700       SDValue Mask = TLO.DAG.getConstant(DemandedMask, DL, VT);  in SimplifyDemandedBitsForTargetNode()
42701       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, DL, VT, LHS, Mask));  in SimplifyDemandedBitsForTargetNode()
42713           Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS));  in SimplifyDemandedBitsForTargetNode()
42756         int Diff = ShAmt - Shift2Amt;  in SimplifyDemandedBitsForTargetNode()
42762             NewOpc, SDLoc(Op), VT, Op0.getOperand(0),  in SimplifyDemandedBitsForTargetNode()
42771     unsigned UpperDemandedBits = BitWidth - OriginalDemandedBits.countr_zero();  in SimplifyDemandedBitsForTargetNode()
42772     if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)  in SimplifyDemandedBitsForTargetNode()
42800     // High bits known zero.  in SimplifyDemandedBitsForTargetNode()
42808     unsigned ShAmt = Op1->getAsZExtVal();  in SimplifyDemandedBitsForTargetNode()
42818     // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1  in SimplifyDemandedBitsForTargetNode()
42842     if (Known.Zero[BitWidth - ShAmt - 1] ||  in SimplifyDemandedBitsForTargetNode()
42845           Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));  in SimplifyDemandedBitsForTargetNode()
42847     // High bits are known one.  in SimplifyDemandedBitsForTargetNode()
42848     if (Known.One[BitWidth - ShAmt - 1])  in SimplifyDemandedBitsForTargetNode()
42869       return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT,  in SimplifyDemandedBitsForTargetNode()
42881     if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) {  in SimplifyDemandedBitsForTargetNode()
42882       unsigned Idx = CIdx->getZExtValue();  in SimplifyDemandedBitsForTargetNode()
42886       // bits from the implict zext - simplify to zero.  in SimplifyDemandedBitsForTargetNode()
42889         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));  in SimplifyDemandedBitsForTargetNode()
42905             Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1)));  in SimplifyDemandedBitsForTargetNode()
42919     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {  in SimplifyDemandedBitsForTargetNode()
42920       unsigned Idx = CIdx->getZExtValue();  in SimplifyDemandedBitsForTargetNode()
42946     // TODO - add known bits handling.  in SimplifyDemandedBitsForTargetNode()
42949       getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS);  in SimplifyDemandedBitsForTargetNode()
42960       // Attempt to avoid multi-use ops if we don't need anything from them.  in SimplifyDemandedBitsForTargetNode()
42968         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1));  in SimplifyDemandedBitsForTargetNode()
42971     // TODO - add general PACKSS/PACKUS SimplifyDemandedBits support.  in SimplifyDemandedBitsForTargetNode()
42986         Src->hasOneUse()) {  in SimplifyDemandedBitsForTargetNode()
42990       MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2);  in SimplifyDemandedBitsForTargetNode()
42993       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst));  in SimplifyDemandedBitsForTargetNode()
42998     // icmp sgt(0, R) == ashr(R, BitWidth-1).  in SimplifyDemandedBitsForTargetNode()
43012       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));  in SimplifyDemandedBitsForTargetNode()
43014     // See if we only demand bits from the lower 128-bit vector.  in SimplifyDemandedBitsForTargetNode()
43018       return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));  in SimplifyDemandedBitsForTargetNode()
43029     Known.Zero.setHighBits(BitWidth - NumElts);  in SimplifyDemandedBitsForTargetNode()
43038     if (KnownSrc.One[SrcBits - 1])  in SimplifyDemandedBitsForTargetNode()
43040     else if (KnownSrc.Zero[SrcBits - 1])  in SimplifyDemandedBitsForTargetNode()
43043     // Attempt to avoid multi-use os if we don't need anything from it.  in SimplifyDemandedBitsForTargetNode()
43046       return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));  in SimplifyDemandedBitsForTargetNode()
43060     bool AssumeSingleUse = (Op0 == Op1) && Op->isOnlyUserOf(Op0.getNode());  in SimplifyDemandedBitsForTargetNode()
43084     // Only bottom 16-bits of the control bits are required.  in SimplifyDemandedBitsForTargetNode()
43087       uint64_t Val1 = Cst1->getZExtValue();  in SimplifyDemandedBitsForTargetNode()
43092             Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0,  in SimplifyDemandedBitsForTargetNode()
43093                                 TLO.DAG.getConstant(MaskedVal1, DL, VT)));  in SimplifyDemandedBitsForTargetNode()
43096       unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0);  in SimplifyDemandedBitsForTargetNode()
43097       unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8);  in SimplifyDemandedBitsForTargetNode()
43124         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));  in SimplifyDemandedBitsForTargetNode()
43134     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);  in SimplifyDemandedBitsForTargetNode()
43152     // The result will have at least as many trailing zeros as the non-mask  in SimplifyDemandedBitsForTargetNode()
43168   EVT VT = Op.getValueType();  in SimplifyMultipleUseDemandedBitsForTargetNode()  local
43177     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&  in SimplifyMultipleUseDemandedBitsForTargetNode()
43178         !DemandedElts[CIdx->getZExtValue()])  in SimplifyMultipleUseDemandedBitsForTargetNode()
43189     unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();  in SimplifyMultipleUseDemandedBitsForTargetNode()
43190     if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)  in SimplifyMultipleUseDemandedBitsForTargetNode()
43201     // icmp sgt(0, R) == ashr(R, BitWidth-1).  in SimplifyMultipleUseDemandedBitsForTargetNode()
43246         llvm::all_of(ShuffleOps, [VT](SDValue V) {  in SimplifyMultipleUseDemandedBitsForTargetNode()
43247           return VT.getSizeInBits() == V.getValueSizeInBits();  in SimplifyMultipleUseDemandedBitsForTargetNode()
43251         return DAG.getUNDEF(VT);  in SimplifyMultipleUseDemandedBitsForTargetNode()
43253         return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op));  in SimplifyMultipleUseDemandedBitsForTargetNode()
43275         return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countr_zero()]);  in SimplifyMultipleUseDemandedBitsForTargetNode()
43394   // clang-format off  in getAltBitOpcode()
43399   // clang-format on  in getAltBitOpcode()
43404 // Helper to adjust v4i32 MOVMSK expansion to work with SSE1-only targets.
43415         cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) {  in adjustBitcastSrcVectorSSE1()
43466 // ->
43470 static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,  in combineBitcastvxi1()  argument
43483       return DAG.getZExtOrTrunc(V, DL, VT);  in combineBitcastvxi1()
43499       cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT &&  in combineBitcastvxi1()
43508   // With AVX512 vxi1 types are legal and we prefer using k-regs.  in combineBitcastvxi1()
43522       EVT SubVT = VT.getIntegerVT(  in combineBitcastvxi1()
43525         EVT IntVT = VT.getIntegerVT(*DAG.getContext(), VT.getSizeInBits());  in combineBitcastvxi1()
43526         return DAG.getBitcast(VT, DAG.getNode(ISD::ANY_EXTEND, DL, IntVT, V));  in combineBitcastvxi1()
43532   // v8f64. So all legal 128-bit and 256-bit vectors are covered except for  in combineBitcastvxi1()
43538   // avoid sign-extending to this type entirely.  in combineBitcastvxi1()
43552     // sign-extend to a 256-bit operation to avoid truncation.  in combineBitcastvxi1()
43562     // sign-extend to a 256-bit operation to match the compare.  in combineBitcastvxi1()
43563     // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over  in combineBitcastvxi1()
43564     // 256-bit because the shuffle is cheaper than sign extending the result of  in combineBitcastvxi1()
43575     // it is not profitable to sign-extend to 256-bit because this will  in combineBitcastvxi1()
43576     // require an extra cross-lane shuffle which is more expensive than  in combineBitcastvxi1()
43577     // truncating the result of the compare to 128-bits.  in combineBitcastvxi1()
43615   return DAG.getBitcast(VT, V);  in combineBitcastvxi1()
43629     if (!In.isUndef() && (In->getAsZExtVal() & 0x1))  in combinevXi1ConstantToInteger()
43639   assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast");  in combineCastedMaskArithmetic()
43644   // Only do this if we have k-registers.  in combineCastedMaskArithmetic()
43648   EVT DstVT = N->getValueType(0);  in combineCastedMaskArithmetic()
43649   SDValue Op = N->getOperand(0);  in combineCastedMaskArithmetic()
43695   unsigned NumElts = BV->getNumOperands();  in createMMXBuildVector()
43696   SDValue Splat = BV->getSplatValue();  in createMMXBuildVector()
43720   // Broadcast - use (PUNPCKL+)PSHUFW to broadcast single element.  in createMMXBuildVector()
43728       // Unpack v8i8 to splat i8 elements to lowest 16-bits.  in createMMXBuildVector()
43736       // Use PSHUFW to repeat 16-bit elements.  in createMMXBuildVector()
43747       Ops.push_back(CreateMMXElement(BV->getOperand(i)));  in createMMXBuildVector()
43772 static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL,  in combineBitcastToBoolVector()  argument
43783     // Bitcast from a vector/float/double, we can cheaply bitcast to VT.  in combineBitcastToBoolVector()
43787       return DAG.getBitcast(VT, Src);  in combineBitcastToBoolVector()
43792     if (C->isZero())  in combineBitcastToBoolVector()
43793       return DAG.getConstant(0, DL, VT);  in combineBitcastToBoolVector()
43794     if (C->isAllOnes())  in combineBitcastToBoolVector()
43795       return DAG.getAllOnesConstant(DL, VT);  in combineBitcastToBoolVector()
43806         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0,  in combineBitcastToBoolVector()
43819         return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,  in combineBitcastToBoolVector()
43820                            Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT)  in combineBitcastToBoolVector()
43821                                                   : DAG.getConstant(0, DL, VT),  in combineBitcastToBoolVector()
43829     if (SDValue N0 = combineBitcastToBoolVector(VT, V.getOperand(0), DL, DAG,  in combineBitcastToBoolVector()
43831       if (SDValue N1 = combineBitcastToBoolVector(VT, V.getOperand(1), DL, DAG,  in combineBitcastToBoolVector()
43833         return DAG.getNode(Opc, DL, VT, N0, N1);  in combineBitcastToBoolVector()
43839     if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) ||  in combineBitcastToBoolVector()
43840         ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI()))  in combineBitcastToBoolVector()
43844       if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget,  in combineBitcastToBoolVector()
43847             X86ISD::KSHIFTL, DL, VT, N0,  in combineBitcastToBoolVector()
43848             DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8));  in combineBitcastToBoolVector()
43855     if (SDNode *Alt = DAG.getNodeIfExists(ISD::BITCAST, DAG.getVTList(VT), {V}))  in combineBitcastToBoolVector()
43864   SDValue N0 = N->getOperand(0);  in combineBitcast()
43865   EVT VT = N->getValueType(0);  in combineBitcast()  local
43871   // ->  in combineBitcast()
43877     if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget))  in combineBitcast()
43882     if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&  in combineBitcast()
43886       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,  in combineBitcast()
43892     if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&  in combineBitcast()
43902         SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1);  in combineBitcast()
43906           SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end());  in combineBitcast()
43910           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);  in combineBitcast()
43919       return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);  in combineBitcast()
43924     if (VT.isVector() && VT.getScalarType() == MVT::i1 &&  in combineBitcast()
43925         SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) {  in combineBitcast()
43927               combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget))  in combineBitcast()
43937   if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() &&  in combineBitcast()
43941     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,  in combineBitcast()
43948        VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) {  in combineBitcast()
43951     unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits();  in combineBitcast()
43954       MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize)  in combineBitcast()
43956       MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize)  in combineBitcast()
43961       SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() };  in combineBitcast()
43964                                   MemVT, BCast->getMemOperand());  in combineBitcast()
43966       return DAG.getBitcast(VT, ResNode);  in combineBitcast()
43972   // avoiding store-load conversions.  in combineBitcast()
43973   if (VT == MVT::x86mmx) {  in combineBitcast()
43981       // Handle zero-extension of i32 with MOVD.  in combineBitcast()
43983         return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,  in combineBitcast()
43986       // TODO - investigate supporting sext 32-bit immediates on x86_64.  in combineBitcast()
43988       return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64));  in combineBitcast()
44006         return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00);  in combineBitcast()
44010     // Detect bitcasts of 64-bit build vectors and convert to a  in combineBitcast()
44024         return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,  in combineBitcast()
44033       return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,  in combineBitcast()
44040   if (Subtarget.hasAVX512() && VT.isScalarInteger() &&  in combineBitcast()
44046   if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() &&  in combineBitcast()
44047       VT.getVectorElementType() == MVT::i1) {  in combineBitcast()
44049       if (C->isAllOnes())  in combineBitcast()
44050         return DAG.getConstant(1, SDLoc(N0), VT);  in combineBitcast()
44051       if (C->isZero())  in combineBitcast()
44052         return DAG.getConstant(0, SDLoc(N0), VT);  in combineBitcast()
44057   // Turn it into a sign bit compare that produces a k-register. This avoids  in combineBitcast()
44060       VT.isVector() && VT.getVectorElementType() == MVT::i1 &&  in combineBitcast()
44061       isPowerOf2_32(VT.getVectorNumElements())) {  in combineBitcast()
44062     unsigned NumElts = VT.getVectorNumElements();  in combineBitcast()
44084         if (EVT(CmpVT) == VT)  in combineBitcast()
44087         // Pad with zeroes up to original VT to replace the zeroes that were  in combineBitcast()
44092         return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops);  in combineBitcast()
44098   // remove GPR<->K-register crossings.  in combineBitcast()
44103   // floating-point operand into a floating-point logic operation. This may  in combineBitcast()
44109   // clang-format off  in combineBitcast()
44114   // clang-format on  in combineBitcast()
44118   if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||  in combineBitcast()
44119         (Subtarget.hasSSE2() && VT == MVT::f64) ||  in combineBitcast()
44120         (Subtarget.hasFP16() && VT == MVT::f16) ||  in combineBitcast()
44121         (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() &&  in combineBitcast()
44122          TLI.isTypeLegal(VT))))  in combineBitcast()
44129   // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))  in combineBitcast()
44132       LogicOp0.getOperand(0).getValueType() == VT &&  in combineBitcast()
44134     SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);  in combineBitcast()
44135     unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();  in combineBitcast()
44136     return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);  in combineBitcast()
44138   // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)  in combineBitcast()
44141       LogicOp1.getOperand(0).getValueType() == VT &&  in combineBitcast()
44143     SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);  in combineBitcast()
44144     unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode();  in combineBitcast()
44145     return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);  in combineBitcast()
44161   auto IsFreeTruncation = [](SDValue &Op) -> bool {  in detectExtMul()
44168     return (BV && BV->isConstant());  in detectExtMul()
44186   SDValue AbsOp1 = Abs->getOperand(0);  in detectZextAbsDiff()
44193   // Check if the operands of the sub are zero-extended from vectors of i8.  in detectZextAbsDiff()
44223   // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we  in createVPDPBUSD()
44237     MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32);  in createVPDPBUSD()  local
44238     return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops);  in createVPDPBUSD()
44256   // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we  in createPSADBW()
44269     MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);  in createPSADBW()  local
44270     return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops);  in createPSADBW()
44285   EVT ExtractVT = Extract->getValueType(0);  in combineMinMaxReduction()
44304   // First, reduce the source down to 128-bit, applying BinOp to lo/hi.  in combineMinMaxReduction()
44331   // v16i8 UMIN will leave the upper element as zero, performing zero-extension  in combineMinMaxReduction()
44359   EVT ExtractVT = Extract->getValueType(0);  in combinePredicateReduction()
44387     // Special case for (pre-legalization) vXi1 reductions.  in combinePredicateReduction()
44391       ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();  in combinePredicateReduction()
44394         // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.  in combinePredicateReduction()
44395         // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.  in combinePredicateReduction()
44425     // FIXME: Better handling of k-registers or 512-bit vectors?  in combinePredicateReduction()
44466     // parity -> (PARITY(MOVMSK X))  in combinePredicateReduction()
44474     // any_of -> MOVMSK != 0  in combinePredicateReduction()
44478     // all_of -> MOVMSK == ((1 << NumElts) - 1)  in combinePredicateReduction()
44485   // negate to get the final 0/-1 mask value.  in combinePredicateReduction()
44497   EVT ExtractVT = Extract->getValueType(0);  in combineVPDPBUSDPattern()
44503   EVT VT = Extract->getOperand(0).getValueType();  in combineVPDPBUSDPattern()  local
44504   if (!isPowerOf2_32(VT.getVectorNumElements()))  in combineVPDPBUSDPattern()
44512   // done by vpdpbusd compute a signed 16-bit product that will be sign extended  in combineVPDPBUSDPattern()
44537   unsigned Stages = Log2_32(VT.getVectorNumElements());  in combineVPDPBUSDPattern()
44543     for (unsigned i = Stages - StageBias; i > 0; --i) {  in combineVPDPBUSDPattern()
44544       SmallVector<int, 16> Mask(DpElems, -1);  in combineVPDPBUSDPattern()
44545       for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)  in combineVPDPBUSDPattern()
44560                      Extract->getOperand(1));  in combineVPDPBUSDPattern()
44569   EVT ExtractVT = Extract->getValueType(0);  in combineBasicSADPattern()
44575   EVT VT = Extract->getOperand(0).getValueType();  in combineBasicSADPattern()  local
44576   if (!isPowerOf2_32(VT.getVectorNumElements()))  in combineBasicSADPattern()
44597   // abs-diff pattern.  in combineBasicSADPattern()
44601   // Check whether we have an abs-diff pattern feeding into the select.  in combineBasicSADPattern()
44612   unsigned Stages = Log2_32(VT.getVectorNumElements());  in combineBasicSADPattern()
44617     for(unsigned i = Stages - 3; i > 0; --i) {  in combineBasicSADPattern()
44618       SmallVector<int, 16> Mask(SadElems, -1);  in combineBasicSADPattern()
44619       for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)  in combineBasicSADPattern()
44634                      Extract->getOperand(1));  in combineBasicSADPattern()
44638 // integer, that requires a potentially expensive XMM -> GPR transfer.
44643 //       to a single-use of the loaded vector. For the reasons above, we
44649   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&  in combineExtractFromVectorLoad()
44653   EVT VT = N->getValueType(0);  in combineExtractFromVectorLoad()  local
44655   bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {  in combineExtractFromVectorLoad()
44656     return Use->getOpcode() == ISD::STORE ||  in combineExtractFromVectorLoad()
44657            Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||  in combineExtractFromVectorLoad()
44658            Use->getOpcode() == ISD::SCALAR_TO_VECTOR;  in combineExtractFromVectorLoad()
44662   if (LoadVec && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&  in combineExtractFromVectorLoad()
44663       VecVT.getVectorElementType() == VT &&  in combineExtractFromVectorLoad()
44665       DCI.isAfterLegalizeDAG() && !LikelyUsedAsVector && LoadVec->isSimple()) {  in combineExtractFromVectorLoad()
44667         DAG, LoadVec->getBasePtr(), VecVT, DAG.getVectorIdxConstant(Idx, dl));  in combineExtractFromVectorLoad()
44668     unsigned PtrOff = VT.getSizeInBits() * Idx / 8;  in combineExtractFromVectorLoad()
44669     MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);  in combineExtractFromVectorLoad()
44670     Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);  in combineExtractFromVectorLoad()
44672         DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,  in combineExtractFromVectorLoad()
44673                     LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());  in combineExtractFromVectorLoad()
44690   SDValue Src = N->getOperand(0);  in combineExtractWithShuffle()
44691   SDValue Idx = N->getOperand(1);  in combineExtractWithShuffle()
44693   EVT VT = N->getValueType(0);  in combineExtractWithShuffle()  local
44703   const APInt &IdxC = N->getConstantOperandAPInt(1);  in combineExtractWithShuffle()
44713     if (SrcOpVT.isScalarInteger() && VT.isInteger() &&  in combineExtractWithShuffle()
44717       // TODO support non-zero offsets.  in combineExtractWithShuffle()
44720         SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);  in combineExtractWithShuffle()
44731     if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth &&  in combineExtractWithShuffle()
44732         VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) {  in combineExtractWithShuffle()
44733       SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(),  in combineExtractWithShuffle()
44734                                  MemIntr->getBasePtr(),  in combineExtractWithShuffle()
44735                                  MemIntr->getPointerInfo(),  in combineExtractWithShuffle()
44736                                  MemIntr->getOriginalAlign(),  in combineExtractWithShuffle()
44737                                  MemIntr->getMemOperand()->getFlags());  in combineExtractWithShuffle()
44745   if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&  in combineExtractWithShuffle()
44760       Scl = DAG.getZExtOrTrunc(Scl, dl, VT);  in combineExtractWithShuffle()
44772     return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src),  in combineExtractWithShuffle()
44776   // We can only legally extract other elements from 128-bit vectors and in  in combineExtractWithShuffle()
44777   // certain circumstances, depending on SSE-level.  in combineExtractWithShuffle()
44787       unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits;  in combineExtractWithShuffle()
44791       Idx &= (NumEltsPerLane - 1);  in combineExtractWithShuffle()
44844   // If narrowing/widening failed, see if we can extract+zero-extend.  in combineExtractWithShuffle()
44855     if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1))  in combineExtractWithShuffle()
44866     return DAG.getUNDEF(VT);  in combineExtractWithShuffle()
44869     return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)  in combineExtractWithShuffle()
44870                                 : DAG.getConstant(0, dl, VT);  in combineExtractWithShuffle()
44875     return DAG.getZExtOrTrunc(V, dl, VT);  in combineExtractWithShuffle()
44877   if (N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ExtractVT == SrcVT)  in combineExtractWithShuffle()
44889   assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");  in scalarizeExtEltFP()
44890   SDValue Vec = ExtElt->getOperand(0);  in scalarizeExtEltFP()
44891   SDValue Index = ExtElt->getOperand(1);  in scalarizeExtEltFP()
44892   EVT VT = ExtElt->getValueType(0);  in scalarizeExtEltFP()  local
44896   // non-zero element because the shuffle+scalar op will be cheaper?  in scalarizeExtEltFP()
44897   if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT)  in scalarizeExtEltFP()
44901   // extract, the condition code), so deal with those as a special-case.  in scalarizeExtEltFP()
44902   if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) {  in scalarizeExtEltFP()
44907     // extract (setcc X, Y, CC), 0 --> setcc (extract X, 0), (extract Y, 0), CC  in scalarizeExtEltFP()
44913     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));  in scalarizeExtEltFP()
44916   if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&  in scalarizeExtEltFP()
44917       VT != MVT::f64)  in scalarizeExtEltFP()
44930     // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)  in scalarizeExtEltFP()
44935     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,  in scalarizeExtEltFP()
44937     SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,  in scalarizeExtEltFP()
44939     return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);  in scalarizeExtEltFP()
44942   // TODO: This switch could include FNEG and the x86-specific FP logic ops  in scalarizeExtEltFP()
44973     // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...  in scalarizeExtEltFP()
44976     for (SDValue Op : Vec->ops())  in scalarizeExtEltFP()
44977       ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));  in scalarizeExtEltFP()
44978     return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);  in scalarizeExtEltFP()
44990   assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller");  in combineArithReduction()
45002   SDValue Index = ExtElt->getOperand(1);  in combineArithReduction()
45006   EVT VT = ExtElt->getValueType(0);  in combineArithReduction()  local
45008   if (VecVT.getScalarType() != VT)  in combineArithReduction()
45015   // Extend v4i8/v8i8 vector to v16i8, with undef upper 64-bits.  in combineArithReduction()
45033   // vXi8 mul reduction - promote to vXi16 mul reduction.  in combineArithReduction()
45035     if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts))  in combineArithReduction()
45056                                              {4, 5, 6, 7, -1, -1, -1, -1}));  in combineArithReduction()
45059                                            {2, 3, -1, -1, -1, -1, -1, -1}));  in combineArithReduction()
45062                                            {1, -1, -1, -1, -1, -1, -1, -1}));  in combineArithReduction()
45064     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);  in combineArithReduction()
45067   // vXi8 add reduction - sub 128-bit vector.  in combineArithReduction()
45073     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);  in combineArithReduction()
45076   // Must be a >=128-bit vector with pow2 elements.  in combineArithReduction()
45080   // vXi8 add reduction - sum lo/hi halves then use PSADBW.  in combineArithReduction()
45081   if (VT == MVT::i8) {  in combineArithReduction()
45092         {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1});  in combineArithReduction()
45097     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);  in combineArithReduction()
45101   // If the source vector values are 0-255, then we can use PSADBW to  in combineArithReduction()
45121       MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64);  in combineArithReduction()  local
45123       return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops[0], Zero);  in combineArithReduction()
45138       SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1});  in combineArithReduction()
45142     VecVT = MVT::getVectorVT(VT.getSimpleVT(), 128 / VT.getSizeInBits());  in combineArithReduction()
45144     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);  in combineArithReduction()
45153   // 256-bit horizontal instructions operate on 128-bit chunks rather than  in combineArithReduction()
45156   // TODO: We could extend this to handle 512-bit or even longer vectors.  in combineArithReduction()
45169   // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0  in combineArithReduction()
45174   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index);  in combineArithReduction()
45180 /// scalars back, while for x64 we should use 64-bit extracts and shifts.
45187   SDValue InputVector = N->getOperand(0);  in combineExtractVectorElt()
45188   SDValue EltIdx = N->getOperand(1);  in combineExtractVectorElt()
45192   EVT VT = N->getValueType(0);  in combineExtractVectorElt()  local
45194   bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT;  in combineExtractVectorElt()
45196   unsigned NumEltBits = VT.getScalarSizeInBits();  in combineExtractVectorElt()
45199   if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts))  in combineExtractVectorElt()
45200     return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);  in combineExtractVectorElt()
45203   if (CIdx && VT.isInteger()) {  in combineExtractVectorElt()
45210       uint64_t Idx = CIdx->getZExtValue();  in combineExtractVectorElt()
45212         return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT);  in combineExtractVectorElt()
45213       return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT);  in combineExtractVectorElt()
45216     // Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()).  in combineExtractVectorElt()
45224             DAG.getIntPtrConstant(CIdx->getZExtValue() * NumEltBits, dl));  in combineExtractVectorElt()
45225         return DAG.getBitcast(VT, Sub);  in combineExtractVectorElt()
45235     // PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling).  in combineExtractVectorElt()
45243       return DAG.getZExtOrTrunc(Scl, dl, VT);  in combineExtractVectorElt()
45246     // TODO - Remove this once we can handle the implicit zero-extension of  in combineExtractVectorElt()
45253   if (VT == MVT::i64 && SrcVT == MVT::v1i64 &&  in combineExtractVectorElt()
45257     return DAG.getBitcast(VT, InputVector);  in combineExtractVectorElt()
45260   if (VT == MVT::i32 && SrcVT == MVT::v2i32 &&  in combineExtractVectorElt()
45269   // pre-legalization,  in combineExtractVectorElt()
45293             N, InputVector.getValueType(), InputVector, CIdx->getZExtValue(),  in combineExtractVectorElt()
45312       if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&  in combineExtractVectorElt()
45313           Use->getOperand(0).getResNo() == ResNo &&  in combineExtractVectorElt()
45314           Use->getValueType(0) == MVT::i1) {  in combineExtractVectorElt()
45316         IsVar |= !isa<ConstantSDNode>(Use->getOperand(1));  in combineExtractVectorElt()
45322     if (all_of(InputVector->uses(), IsBoolExtract) &&  in combineExtractVectorElt()
45328           // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask  in combineExtractVectorElt()
45330           SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8);  in combineExtractVectorElt()
45342   // Attempt to fold extract(trunc(x),c) -> trunc(extract(x,c)).  in combineExtractVectorElt()
45349       return DAG.getAnyExtOrTrunc(NewExt, dl, VT);  in combineExtractVectorElt()
45359     unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG,  in combineToExtendBoolVectorInReg()  argument
45369   EVT SVT = VT.getScalarType();  in combineToExtendBoolVectorInReg()
45373   // Input type must be extending a bool vector (bit-casted from a scalar  in combineToExtendBoolVectorInReg()
45375   if (!VT.isVector())  in combineToExtendBoolVectorInReg()
45389   unsigned NumElts = VT.getVectorNumElements();  in combineToExtendBoolVectorInReg()
45395     // must split it down into sub-sections for broadcasting. For example:  in combineToExtendBoolVectorInReg()
45396     //   i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.  in combineToExtendBoolVectorInReg()
45397     //   i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.  in combineToExtendBoolVectorInReg()
45402     Vec = DAG.getBitcast(VT, Vec);  in combineToExtendBoolVectorInReg()
45406     Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);  in combineToExtendBoolVectorInReg()
45420     Vec = DAG.getBitcast(VT, Vec);  in combineToExtendBoolVectorInReg()
45422     // For smaller scalar integers, we can simply any-extend it to the vector  in combineToExtendBoolVectorInReg()
45426     Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);  in combineToExtendBoolVectorInReg()
45428     Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);  in combineToExtendBoolVectorInReg()
45438   SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);  in combineToExtendBoolVectorInReg()
45439   Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);  in combineToExtendBoolVectorInReg()
45444   Vec = DAG.getSExtOrTrunc(Vec, DL, VT);  in combineToExtendBoolVectorInReg()
45447   // zero-extension.  in combineToExtendBoolVectorInReg()
45450   return DAG.getNode(ISD::SRL, DL, VT, Vec,  in combineToExtendBoolVectorInReg()
45451                      DAG.getConstant(EltSizeInBits - 1, DL, VT));  in combineToExtendBoolVectorInReg()
45454 /// If a vector select has an operand that is -1 or 0, try to simplify the
45461   SDValue Cond = N->getOperand(0);  in combineVSelectWithAllOnesOrZeros()
45462   SDValue LHS = N->getOperand(1);  in combineVSelectWithAllOnesOrZeros()
45463   SDValue RHS = N->getOperand(2);  in combineVSelectWithAllOnesOrZeros()
45464   EVT VT = LHS.getValueType();  in combineVSelectWithAllOnesOrZeros()  local
45468   if (N->getOpcode() != ISD::VSELECT)  in combineVSelectWithAllOnesOrZeros()
45482     if (VT.isFloatingPoint())  in combineVSelectWithAllOnesOrZeros()
45483       return DAG.getConstantFP(0.0, DL, VT);  in combineVSelectWithAllOnesOrZeros()
45484     return DAG.getConstant(0, DL, VT);  in combineVSelectWithAllOnesOrZeros()
45491   // vector floating-point selects.  in combineVSelectWithAllOnesOrZeros()
45492   if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())  in combineVSelectWithAllOnesOrZeros()
45502       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==  in combineVSelectWithAllOnesOrZeros()
45509           cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType());  in combineVSelectWithAllOnesOrZeros()
45522   // vselect Cond, 111..., 000... -> Cond  in combineVSelectWithAllOnesOrZeros()
45524     return DAG.getBitcast(VT, Cond);  in combineVSelectWithAllOnesOrZeros()
45529   // vselect Cond, 111..., X -> or Cond, X  in combineVSelectWithAllOnesOrZeros()
45533     return DAG.getBitcast(VT, Or);  in combineVSelectWithAllOnesOrZeros()
45536   // vselect Cond, X, 000... -> and Cond, X  in combineVSelectWithAllOnesOrZeros()
45540     return DAG.getBitcast(VT, And);  in combineVSelectWithAllOnesOrZeros()
45543   // vselect Cond, 000..., X -> andn Cond, X  in combineVSelectWithAllOnesOrZeros()
45547     // The canonical form differs for i1 vectors - x86andnp is not used  in combineVSelectWithAllOnesOrZeros()
45553     return DAG.getBitcast(VT, AndN);  in combineVSelectWithAllOnesOrZeros()
45560 /// and concatenate the result to eliminate a wide (256-bit) vector instruction:
45561 ///   vselect Cond, (concat T0, T1), (concat F0, F1) -->
45565   unsigned Opcode = N->getOpcode();  in narrowVectorSelect()
45569   // TODO: Split 512-bit vectors too?  in narrowVectorSelect()
45570   EVT VT = N->getValueType(0);  in narrowVectorSelect()  local
45571   if (!VT.is256BitVector())  in narrowVectorSelect()
45575   SDValue Cond = N->getOperand(0);  in narrowVectorSelect()
45576   SDValue TVal = N->getOperand(1);  in narrowVectorSelect()
45577   SDValue FVal = N->getOperand(2);  in narrowVectorSelect()
45587   return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Cond, TVal, FVal}, makeBlend,  in narrowVectorSelect()
45593   SDValue Cond = N->getOperand(0);  in combineSelectOfTwoConstants()
45594   SDValue LHS = N->getOperand(1);  in combineSelectOfTwoConstants()
45595   SDValue RHS = N->getOperand(2);  in combineSelectOfTwoConstants()
45603   EVT VT = N->getValueType(0);  in combineSelectOfTwoConstants()  local
45604   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))  in combineSelectOfTwoConstants()
45608   // this with a wider condition value (post-legalization it becomes an i8),  in combineSelectOfTwoConstants()
45613   // A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by  in combineSelectOfTwoConstants()
45615   // TODO: For constants that overflow or do not differ by power-of-2 or small  in combineSelectOfTwoConstants()
45617   const APInt &TrueVal = TrueC->getAPIntValue();  in combineSelectOfTwoConstants()
45618   const APInt &FalseVal = FalseC->getAPIntValue();  in combineSelectOfTwoConstants()
45620   // We have a more efficient lowering for "(X == 0) ? Y : -1" using SBB.  in combineSelectOfTwoConstants()
45623     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();  in combineSelectOfTwoConstants()
45635       ((VT == MVT::i32 || VT == MVT::i64) &&  in combineSelectOfTwoConstants()
45646     // select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC  in combineSelectOfTwoConstants()
45647     SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);  in combineSelectOfTwoConstants()
45649     // Multiply condition by the difference if non-one.  in combineSelectOfTwoConstants()
45651       R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT));  in combineSelectOfTwoConstants()
45653     // Add the base if non-zero.  in combineSelectOfTwoConstants()
45654     if (!FalseC->isZero())  in combineSelectOfTwoConstants()
45655       R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));  in combineSelectOfTwoConstants()
45663 /// If this is a *dynamic* select (non-constant condition) and we can match
45665 /// condition so that blends can use the high (sign) bit of each element.
45672   SDValue Cond = N->getOperand(0);  in combineVSelectToBLENDV()
45673   if ((N->getOpcode() != ISD::VSELECT &&  in combineVSelectToBLENDV()
45674        N->getOpcode() != X86ISD::BLENDV) ||  in combineVSelectToBLENDV()
45680   EVT VT = N->getValueType(0);  in combineVSelectToBLENDV()  local
45686   // cases where a *dynamic* blend will fail even though a constant-condition  in combineVSelectToBLENDV()
45689   // Potentially, we should combine constant-condition vselect nodes  in combineVSelectToBLENDV()
45690   // pre-legalization into shuffles and not mark as many types as custom  in combineVSelectToBLENDV()
45692   if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))  in combineVSelectToBLENDV()
45694   // FIXME: We don't support i16-element blends currently. We could and  in combineVSelectToBLENDV()
45696   // rather than just the high bit and using an i8-element blend.  in combineVSelectToBLENDV()
45697   if (VT.getVectorElementType() == MVT::i16)  in combineVSelectToBLENDV()
45700   if (VT.is128BitVector() && !Subtarget.hasSSE41())  in combineVSelectToBLENDV()
45703   if (VT == MVT::v32i8 && !Subtarget.hasAVX2())  in combineVSelectToBLENDV()
45705   // There are no 512-bit blend instructions that use sign bits.  in combineVSelectToBLENDV()
45706   if (VT.is512BitVector())  in combineVSelectToBLENDV()
45710   // and don't ever optimize vector selects that map to AVX512 mask-registers.  in combineVSelectToBLENDV()
45715     for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();  in combineVSelectToBLENDV()
45717       if ((UI->getOpcode() != ISD::VSELECT &&  in combineVSelectToBLENDV()
45718            UI->getOpcode() != X86ISD::BLENDV) ||  in combineVSelectToBLENDV()
45739     for (SDNode *U : Cond->uses()) {  in combineVSelectToBLENDV()
45740       if (U->getOpcode() == X86ISD::BLENDV)  in combineVSelectToBLENDV()
45743       SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0),  in combineVSelectToBLENDV()
45744                                Cond, U->getOperand(1), U->getOperand(2));  in combineVSelectToBLENDV()
45754     return DAG.getNode(X86ISD::BLENDV, DL, N->getValueType(0), V,  in combineVSelectToBLENDV()
45755                        N->getOperand(1), N->getOperand(2));  in combineVSelectToBLENDV()
45767 //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
45770 //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
45771 //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
45777     EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL,  in combineLogicBlendIntoConditionalNegate()  argument
45782          "Mask must be zero/all-bits");  in combineLogicBlendIntoConditionalNegate()
45790     return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&  in combineLogicBlendIntoConditionalNegate()
45791            ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());  in combineLogicBlendIntoConditionalNegate()
45808   // (vselect M, (sub (0, X), X)  -> (sub (xor X, M), M)  in combineLogicBlendIntoConditionalNegate()
45811   // above, -(vselect M, (sub 0, X), X), and therefore the replacement  in combineLogicBlendIntoConditionalNegate()
45813   // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the  in combineLogicBlendIntoConditionalNegate()
45819   return DAG.getBitcast(VT, Res);  in combineLogicBlendIntoConditionalNegate()
45826   if (N->getOpcode() != ISD::VSELECT)  in commuteSelect()
45829   SDValue Cond = N->getOperand(0);  in commuteSelect()
45830   SDValue LHS = N->getOperand(1);  in commuteSelect()
45831   SDValue RHS = N->getOperand(2);  in commuteSelect()
45843   // (vselect M, L, R) -> (vselect ~M, R, L)  in commuteSelect()
45845       ISD::getSetCCInverse(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),  in commuteSelect()
45852 /// Do target-specific dag combines on SELECT and VSELECT nodes.
45857   SDValue Cond = N->getOperand(0);  in combineSelect()
45858   SDValue LHS = N->getOperand(1);  in combineSelect()
45859   SDValue RHS = N->getOperand(2);  in combineSelect()
45873   EVT VT = LHS.getValueType();  in combineSelect()  local
45878   // Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M).  in combineSelect()
45879   // Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT  in combineSelect()
45882       CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() &&  in combineSelect()
45885     if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS,  in combineSelect()
45891       (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV)) {  in combineSelect()
45894                                      N->getOpcode() == X86ISD::BLENDV))  in combineSelect()
45895       return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);  in combineSelect()
45898   // fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y))  in combineSelect()
45901   if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() &&  in combineSelect()
45904     MVT SimpleVT = VT.getSimpleVT();  in combineSelect()
45910       int NumElts = VT.getVectorNumElements();  in combineSelect()
45922       LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),  in combineSelect()
45924       RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0),  in combineSelect()
45926       return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);  in combineSelect()
45933   // ignored in unsafe-math mode).  in combineSelect()
45935   if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&  in combineSelect()
45936       VT != MVT::f80 && VT != MVT::f128 && !isSoftF16(VT, Subtarget) &&  in combineSelect()
45937       (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&  in combineSelect()
45939        (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) {  in combineSelect()
45940     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();  in combineSelect()
46012     // Check for x CC y ? y : x -- a min/max with reversed arms.  in combineSelect()
46080       return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);  in combineSelect()
46087   // fold (select (setcc (and (X, 1), 0, seteq), Y, Z)) -> select(and(X, 1),Z,Y)  in combineSelect()
46088   if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT &&  in combineSelect()
46089       Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) {  in combineSelect()
46090     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();  in combineSelect()
46098       return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS);  in combineSelect()
46111       (VT.getVectorElementType() == MVT::i8 ||  in combineSelect()
46112        VT.getVectorElementType() == MVT::i16)) {  in combineSelect()
46113     Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);  in combineSelect()
46114     return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);  in combineSelect()
46117   // AVX512 - Extend select with zero to merge with target shuffle.  in combineSelect()
46118   // select(mask, extract_subvector(shuffle(x)), zero) -->  in combineSelect()
46120   // TODO - support non target shuffles as well.  in combineSelect()
46141                             VT.getSizeInBits());  in combineSelect()
46143                             VT.getSizeInBits());  in combineSelect()
46148       return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());  in combineSelect()
46155   if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&  in combineSelect()
46160     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();  in combineSelect()
46163     // (x > 0) ? x : 0 -> (x >= 0) ? x : 0  in combineSelect()
46164     // (x < -1) ? x : -1 -> (x <= -1) ? x : -1  in combineSelect()
46167     // (a - b) > 0 : (a - b) ? 0 -> (a - b) >= 0 : (a - b) ? 0  in combineSelect()
46178     //  (x s> 1) ? x : 1 -> (x s>= 1) ? x : 1 -> (x s> 0) ? x : 1  in combineSelect()
46179     //  (x u> 1) ? x : 1 -> (x u>= 1) ? x : 1 -> (x != 0) ? x : 1  in combineSelect()
46186         return DAG.getSelect(DL, VT, Cond, LHS, RHS);  in combineSelect()
46191         return DAG.getSelect(DL, VT, Cond, LHS, RHS);  in combineSelect()
46198     // --> (select (cmpuge Cond0, Cond1), LHS, Y)  in combineSelect()
46200     // --> (select (cmpsle Cond0, Cond1), LHS, Y)  in combineSelect()
46206           cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get();  in combineSelect()
46212         // clang-format off  in combineSelect()
46218         // clang-format on  in combineSelect()
46222           return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2));  in combineSelect()
46231   if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&  in combineSelect()
46238     return DAG.getSelect(DL, VT, CondNew, RHS, LHS);  in combineSelect()
46243   if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&  in combineSelect()
46245       TLI.isTypeLegal(VT.getScalarType())) {  in combineSelect()
46246     EVT ExtCondVT = VT.changeVectorElementTypeToInteger();  in combineSelect()
46250       return DAG.getSelect(DL, VT, ExtCond, LHS, RHS);  in combineSelect()
46255   // with out-of-bounds clamping.  in combineSelect()
46259   // to bitwidth-1 for unsigned shifts, effectively performing a maximum left  in combineSelect()
46260   // shift of bitwidth-1 positions. and returns zero for unsigned right shifts  in combineSelect()
46261   // exceeding bitwidth-1.  in combineSelect()
46262   if (N->getOpcode() == ISD::VSELECT) {  in combineSelect()
46264     // fold select(icmp_ult(amt,BW),shl(x,amt),0) -> avx2 psllv(x,amt)  in combineSelect()
46265     // fold select(icmp_ult(amt,BW),srl(x,amt),0) -> avx2 psrlv(x,amt)  in combineSelect()
46267         supportedVectorVarShift(VT, Subtarget, LHS.getOpcode()) &&  in combineSelect()
46270                                m_SpecificInt(VT.getScalarSizeInBits()),  in combineSelect()
46274                          DL, VT, LHS.getOperand(0), LHS.getOperand(1));  in combineSelect()
46276     // fold select(icmp_uge(amt,BW),0,shl(x,amt)) -> avx2 psllv(x,amt)  in combineSelect()
46277     // fold select(icmp_uge(amt,BW),0,srl(x,amt)) -> avx2 psrlv(x,amt)  in combineSelect()
46279         supportedVectorVarShift(VT, Subtarget, RHS.getOpcode()) &&  in combineSelect()
46282                                m_SpecificInt(VT.getScalarSizeInBits()),  in combineSelect()
46286                          DL, VT, RHS.getOperand(0), RHS.getOperand(1));  in combineSelect()
46291   if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))  in combineSelect()
46303   // select(~Cond, X, Y) -> select(Cond, Y, X)  in combineSelect()
46306       return DAG.getNode(N->getOpcode(), DL, VT,  in combineSelect()
46309     // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the  in combineSelect()
46316       return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);  in combineSelect()
46325   if (N->getOpcode() == ISD::SELECT && VT.isVector() &&  in combineSelect()
46326       VT.getVectorElementType() == MVT::i1 &&  in combineSelect()
46327       (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) {  in combineSelect()
46328     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());  in combineSelect()
46348         return DAG.getBitcast(VT, Select);  in combineSelect()
46355   // This can lower using a vector shift bit-hack rather than mask and compare.  in combineSelect()
46357       N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&  in combineSelect()
46361       cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&  in combineSelect()
46362       Cond.getOperand(0).getValueType() == VT) {  in combineSelect()
46363     // The 'and' mask must be composed of power-of-2 constants.  in combineSelect()
46366     if (C && C->getAPIntValue().isPowerOf2()) {  in combineSelect()
46367       // vselect (X & C == 0), LHS, RHS --> vselect (X & C != 0), RHS, LHS  in combineSelect()
46370       return DAG.getSelect(DL, VT, NotCond, RHS, LHS);  in combineSelect()
46373     // If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld  in combineSelect()
46374     // and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply.  in combineSelect()
46375     // 16-bit lacks a proper blendv.  in combineSelect()
46376     unsigned EltBitWidth = VT.getScalarSizeInBits();  in combineSelect()
46378         TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) ||  in combineSelect()
46383           return C->getAPIntValue().isPowerOf2();  in combineSelect()
46385       // Create a left-shift constant to get the mask bits over to the sign-bit.  in combineSelect()
46388       for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {  in combineSelect()
46390         ShlVals.push_back(EltBitWidth - 1 -  in combineSelect()
46391                           MaskVal->getAPIntValue().exactLogBase2());  in combineSelect()
46393       // vsel ((X & C) == 0), LHS, RHS --> vsel ((shl X, C') < 0), RHS, LHS  in combineSelect()
46394       SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL);  in combineSelect()
46395       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt);  in combineSelect()
46398       return DAG.getSelect(DL, VT, NewCond, RHS, LHS);  in combineSelect()
46414   // This combine only operates on CMP-like nodes.  in combineSetCCAtomicArith()
46416         (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))  in combineSetCCAtomicArith()
46426   //   (icmp slt x, 0) -> (icmp sle (add x, 1), 0)  in combineSetCCAtomicArith()
46427   //   (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)  in combineSetCCAtomicArith()
46428   //   (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)  in combineSetCCAtomicArith()
46429   //   (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)  in combineSetCCAtomicArith()
46433   // - XOR/OR/AND (if they were made to survive AtomicExpand)  in combineSetCCAtomicArith()
46434   // - LHS != 1  in combineSetCCAtomicArith()
46453   APInt Addend = OpRHSC->getAPIntValue();  in combineSetCCAtomicArith()
46455     Addend = -Addend;  in combineSetCCAtomicArith()
46461   APInt Comparison = CmpRHSC->getAPIntValue();  in combineSetCCAtomicArith()
46462   APInt NegAddend = -Addend;  in combineSetCCAtomicArith()
46477     APInt DecComparison = Comparison - 1;  in combineSetCCAtomicArith()
46499         AN->getMemOperand());  in combineSetCCAtomicArith()
46515   else if (CC == X86::COND_G && Addend == -1)  in combineSetCCAtomicArith()
46517   else if (CC == X86::COND_LE && Addend == -1)  in combineSetCCAtomicArith()
46540     // CMP(X,0) -> signbit test  in checkSignTestSetCCCombine()
46545     // TODO: Remove one use limit once sdiv-fix regressions are fixed.  in checkSignTestSetCCCombine()
46551     // OR(X,Y) -> see if only one operand contributes to the signbit.  in checkSignTestSetCCCombine()
46552     // TODO: XOR(X,Y) -> see if only one operand contributes to the signbit.  in checkSignTestSetCCCombine()
46600   // This combine only operates on CMP-like nodes.  in checkBoolTestSetCCCombine()
46602         (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))  in checkBoolTestSetCCCombine()
46626   if (C->getZExtValue() == 1) {  in checkBoolTestSetCCCombine()
46629   } else if (C->getZExtValue() != 0)  in checkBoolTestSetCCCombine()
46639       int OpIdx = -1;  in checkBoolTestSetCCCombine()
46691     if (FVal && FVal->getZExtValue() != 0) {  in checkBoolTestSetCCCombine()
46692       if (FVal->getZExtValue() != 1)  in checkBoolTestSetCCCombine()
46699     if (FValIsFalse && TVal->getZExtValue() != 1)  in checkBoolTestSetCCCombine()
46701     if (!FValIsFalse && TVal->getZExtValue() != 0)  in checkBoolTestSetCCCombine()
46720   if (Cond->getOpcode() == X86ISD::CMP) {  in checkBoolTestAndOrSetCCCombine()
46721     if (!isNullConstant(Cond->getOperand(1)))  in checkBoolTestAndOrSetCCCombine()
46724     Cond = Cond->getOperand(0);  in checkBoolTestAndOrSetCCCombine()
46730   switch (Cond->getOpcode()) {  in checkBoolTestAndOrSetCCCombine()
46738     SetCC0 = Cond->getOperand(0);  in checkBoolTestAndOrSetCCCombine()
46739     SetCC1 = Cond->getOperand(1);  in checkBoolTestAndOrSetCCCombine()
46746       SetCC0->getOperand(1) != SetCC1->getOperand(1))  in checkBoolTestAndOrSetCCCombine()
46749   CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);  in checkBoolTestAndOrSetCCCombine()
46750   CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);  in checkBoolTestAndOrSetCCCombine()
46751   Flags = SetCC0->getOperand(1);  in checkBoolTestAndOrSetCCCombine()
46755 // When legalizing carry, we create carries via add X, -1
46785               CarryOp1.getNode()->hasOneUse() &&  in combineCarryThroughADD()
46789                 DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(),  in combineCarryThroughADD()
46829   MVT VT = EFLAGS.getSimpleValueType();  in combinePTESTCC()  local
46840       // testc -> testz.  in combinePTESTCC()
46844       // !testc -> !testz.  in combinePTESTCC()
46848       // testz -> testc.  in combinePTESTCC()
46852       // !testz -> !testc.  in combinePTESTCC()
46857       // testnzc -> testnzc (no change).  in combinePTESTCC()
46867       return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,  in combinePTESTCC()
46873     // TESTC(X,~X) == TESTC(X,-1)  in combinePTESTCC()
46878             EFLAGS.getOpcode(), DL, VT, DAG.getBitcast(OpVT, NotOp1),  in combinePTESTCC()
46889       return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,  in combinePTESTCC()
46899         return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,  in combinePTESTCC()
46907         return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,  in combinePTESTCC()
46912       // If every element is an all-sign value, see if we can use TESTP/MOVMSK  in combinePTESTCC()
46920           assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");  in combinePTESTCC()
46932               return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);  in combinePTESTCC()
46949     // TESTZ(-1,X) == TESTZ(X,X)  in combinePTESTCC()
46951       return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1);  in combinePTESTCC()
46953     // TESTZ(X,-1) == TESTZ(X,X)  in combinePTESTCC()
46955       return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);  in combinePTESTCC()
46957     // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)  in combinePTESTCC()
46969           return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,  in combinePTESTCC()
46985   // Handle eq/ne against -1 (all_of).  in combineSetCCMOVMSK()
46996   const APInt &CmpVal = CmpConstant->getAPIntValue();  in combineSetCCMOVMSK()
47027   bool IsOneUse = CmpOp.getNode()->hasOneUse();  in combineSetCCMOVMSK()
47030   // signbits extend down to all the sub-elements as well.  in combineSetCCMOVMSK()
47044         DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) {  in combineSetCCMOVMSK()
47053   // MOVMSK(CONCAT(X,Y)) == 0 ->  MOVMSK(OR(X,Y)).  in combineSetCCMOVMSK()
47054   // MOVMSK(CONCAT(X,Y)) != 0 ->  MOVMSK(OR(X,Y)).  in combineSetCCMOVMSK()
47055   // MOVMSK(CONCAT(X,Y)) == -1 ->  MOVMSK(AND(X,Y)).  in combineSetCCMOVMSK()
47056   // MOVMSK(CONCAT(X,Y)) != -1 ->  MOVMSK(AND(X,Y)).  in combineSetCCMOVMSK()
47074   // MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).  in combineSetCCMOVMSK()
47075   // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).  in combineSetCCMOVMSK()
47076   // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(XOR(X,Y),XOR(X,Y)).  in combineSetCCMOVMSK()
47077   // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(XOR(X,Y),XOR(X,Y)).  in combineSetCCMOVMSK()
47089       // Check for 256-bit split vector cases.  in combineSetCCMOVMSK()
47117     // PMOVMSKB(PACKSSBW(X, undef)) -> PMOVMSKB(BITCAST_v16i8(X)) & 0xAAAA.  in combineSetCCMOVMSK()
47131     // -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.  in combineSetCCMOVMSK()
47159   // MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X) iff every element is referenced.  in combineSetCCMOVMSK()
47162   // all the elements are demanded by the shuffle mask, only the "high"  in combineSetCCMOVMSK()
47172   // element width (this will ensure "high" elements match). Its slightly overly  in combineSetCCMOVMSK()
47190   // MOVMSKPS(V) !=/== 0 -> TESTPS(V,V)  in combineSetCCMOVMSK()
47191   // MOVMSKPD(V) !=/== 0 -> TESTPD(V,V)  in combineSetCCMOVMSK()
47192   // MOVMSKPS(V) !=/== -1 -> TESTPS(V,V)  in combineSetCCMOVMSK()
47193   // MOVMSKPD(V) !=/== -1 -> TESTPD(V,V)  in combineSetCCMOVMSK()
47244   SDValue FalseOp = N->getOperand(0);  in combineCMov()
47245   SDValue TrueOp = N->getOperand(1);  in combineCMov()
47246   X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);  in combineCMov()
47247   SDValue Cond = N->getOperand(3);  in combineCMov()
47249   // cmov X, X, ?, ? --> X  in combineCMov()
47262       return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);  in combineCMov()
47273       if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {  in combineCMov()
47279       // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.  in combineCMov()
47282       if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {  in combineCMov()
47286         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);  in combineCMov()
47288         unsigned ShAmt = TrueC->getAPIntValue().logBase2();  in combineCMov()
47294       // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient  in combineCMov()
47296       if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {  in combineCMov()
47301                            FalseC->getValueType(0), Cond);  in combineCMov()
47309       if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {  in combineCMov()
47310         APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();  in combineCMov()
47311         assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() &&  in combineCMov()
47333           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),  in combineCMov()
47340           // Add the base if non-zero.  in combineCMov()
47341           if (FalseC->getAPIntValue() != 0)  in combineCMov()
47351   //   (select (x != c), e, c) -> select (x != c), e, x),  in combineCMov()
47352   //   (select (x == c), c, e) -> select (x == c), x, e)  in combineCMov()
47356   // The rationale for this change is that the conditional-move from a constant  in combineCMov()
47357   // needs two instructions, however, conditional-move from a register needs  in combineCMov()
47361   //  some instruction-combining opportunities. This opt needs to be  in combineCMov()
47383         return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);  in combineCMov()
47396       Cond.getOpcode() == X86ISD::SUB && Cond->hasOneUse()) {  in combineCMov()
47401     if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {  in combineCMov()
47402       EVT CondVT = Cond->getValueType(0);  in combineCMov()
47403       EVT OuterVT = N->getValueType(0);  in combineCMov()
47406           DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), Cond.getOperand(0),  in combineCMov()
47415   //   (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)  in combineCMov()
47416   //   (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)  in combineCMov()
47444       SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps);  in combineCMov()
47447       SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops);  in combineCMov()
47452   // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) ->  in combineCMov()
47453   //      (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2)  in combineCMov()
47454   // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) ->  in combineCMov()
47455   //    (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2)  in combineCMov()
47475       EVT VT = N->getValueType(0);  in combineCMov()  local
47477       SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1));  in combineCMov()
47479           DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0),  in combineCMov()
47481       return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1));  in combineCMov()
47492   EVT VT = N->getOperand(0).getValueType();  in canReduceVMulWidth()  local
47493   if (VT.getScalarSizeInBits() != 32)  in canReduceVMulWidth()
47496   assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2");  in canReduceVMulWidth()
47500     SDValue Opd = N->getOperand(i);  in canReduceVMulWidth()
47508   // When ranges are from -128 ~ 127, use MULS8 mode.  in canReduceVMulWidth()
47514   // When ranges are from -32768 ~ 32767, use MULS16 mode.  in canReduceVMulWidth()
47542 /// -128 to 128, and the scalar value range of %4 is also -128 to 128,
47548 /// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767,
47572   SDValue N0 = N->getOperand(0);  in reduceVMULWidth()
47573   SDValue N1 = N->getOperand(1);  in reduceVMULWidth()
47574   EVT VT = N->getOperand(0).getValueType();  in reduceVMULWidth()  local
47575   unsigned NumElts = VT.getVectorNumElements();  in reduceVMULWidth()
47591                        DL, VT, MulLo);  in reduceVMULWidth()
47619   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi);  in reduceVMULWidth()
47623                                  EVT VT, const SDLoc &DL) {  in combineMulSpecial()  argument
47626     SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),  in combineMulSpecial()
47627                                  DAG.getConstant(Mult, DL, VT));  in combineMulSpecial()
47628     Result = DAG.getNode(ISD::SHL, DL, VT, Result,  in combineMulSpecial()
47630     Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,  in combineMulSpecial()
47631                          N->getOperand(0));  in combineMulSpecial()
47636     SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),  in combineMulSpecial()
47637                                  DAG.getConstant(Mul1, DL, VT));  in combineMulSpecial()
47638     Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result,  in combineMulSpecial()
47639                          DAG.getConstant(Mul2, DL, VT));  in combineMulSpecial()
47640     Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result,  in combineMulSpecial()
47641                          N->getOperand(0));  in combineMulSpecial()
47659     return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),  in combineMulSpecial()
47684     return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),  in combineMulSpecial()
47693   if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {  in combineMulSpecial()
47696       unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));  in combineMulSpecial()
47697       SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMulSpecial()
47699       SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMulSpecial()
47701       return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);  in combineMulSpecial()
47720   EVT VT = N->getValueType(0);  in combineMulToPMADDWD()  local
47723   if (!VT.isVector() || VT.getVectorElementType() != MVT::i32)  in combineMulToPMADDWD()
47728   unsigned NumElts = VT.getVectorNumElements();  in combineMulToPMADDWD()
47736   SDValue N0 = N->getOperand(0);  in combineMulToPMADDWD()
47737   SDValue N1 = N->getOperand(1);  in combineMulToPMADDWD()
47772     // Mask off upper 16-bits of sign-extended constants.  in combineMulToPMADDWD()
47774       return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(0xFFFF, DL, VT));  in combineMulToPMADDWD()
47775     if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) {  in combineMulToPMADDWD()
47778       if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128)  in combineMulToPMADDWD()
47779         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src);  in combineMulToPMADDWD()
47780       // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets  in combineMulToPMADDWD()
47783         EVT ExtVT = VT.changeVectorElementType(MVT::i16);  in combineMulToPMADDWD()
47785         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src);  in combineMulToPMADDWD()
47790         N->isOnlyUserOf(Op.getNode())) {  in combineMulToPMADDWD()
47793         return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Src);  in combineMulToPMADDWD()
47797         N->isOnlyUserOf(Op.getNode())) {  in combineMulToPMADDWD()
47798       return DAG.getNode(X86ISD::VSRLI, DL, VT, Op.getOperand(0),  in combineMulToPMADDWD()
47819   return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMADDWDBuilder);  in combineMulToPMADDWD()
47827   EVT VT = N->getValueType(0);  in combineMulToPMULDQ()  local
47830   if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 ||  in combineMulToPMULDQ()
47831       VT.getVectorNumElements() < 2 ||  in combineMulToPMULDQ()
47832       !isPowerOf2_32(VT.getVectorNumElements()))  in combineMulToPMULDQ()
47835   SDValue N0 = N->getOperand(0);  in combineMulToPMULDQ()
47836   SDValue N1 = N->getOperand(1);  in combineMulToPMULDQ()
47838   // MULDQ returns the 64-bit result of the signed multiplication of the lower  in combineMulToPMULDQ()
47839   // 32-bits. We can lower with this if the sign bits stretch that far.  in combineMulToPMULDQ()
47846     return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULDQBuilder,  in combineMulToPMULDQ()
47857     return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULUDQBuilder,  in combineMulToPMULDQ()
47867   EVT VT = N->getValueType(0);  in combineMul()  local
47876   if (DCI.isBeforeLegalize() && VT.isVector())  in combineMul()
47891   if (VT != MVT::i64 && VT != MVT::i32 &&  in combineMul()
47892       (!VT.isVector() || !VT.isSimple() || !VT.isInteger()))  in combineMul()
47896       N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);  in combineMul()
47899     if (VT.isVector())  in combineMul()
47900       if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))  in combineMul()
47901         if (auto *SplatC = RawC->getSplatValue())  in combineMul()
47903             C = &(SplatCI->getValue());  in combineMul()
47905     if (!C || C->getBitWidth() != VT.getScalarSizeInBits())  in combineMul()
47908     C = &(CNode->getAPIntValue());  in combineMul()
47911   if (isPowerOf2_64(C->getZExtValue()))  in combineMul()
47914   int64_t SignMulAmt = C->getSExtValue();  in combineMul()
47916   uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;  in combineMul()
47919   if (VT == MVT::i64 || VT == MVT::i32) {  in combineMul()
47921       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),  in combineMul()
47922                            DAG.getConstant(AbsMulAmt, DL, VT));  in combineMul()
47924         NewMul = DAG.getNegative(NewMul, DL, VT);  in combineMul()
47947       if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() &&  in combineMul()
47948                                       N->use_begin()->getOpcode() == ISD::ADD))  in combineMul()
47956         NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
47959         NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),  in combineMul()
47960                              DAG.getConstant(MulAmt1, DL, VT));  in combineMul()
47963         NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,  in combineMul()
47966         NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,  in combineMul()
47967                              DAG.getConstant(MulAmt2, DL, VT));  in combineMul()
47971         NewMul = DAG.getNegative(NewMul, DL, VT);  in combineMul()
47973       NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);  in combineMul()
47976     EVT ShiftVT = VT.isVector() ? VT : MVT::i8;  in combineMul()
47977     assert(C->getZExtValue() != 0 &&  in combineMul()
47978            C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&  in combineMul()
47981     if (isPowerOf2_64(AbsMulAmt - 1)) {  in combineMul()
47984           ISD::ADD, DL, VT, N->getOperand(0),  in combineMul()
47985           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
47986                       DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, ShiftVT)));  in combineMul()
47988         NewMul = DAG.getNegative(NewMul, DL, VT);  in combineMul()
47990       // (mul x, 2^N - 1) => (sub (shl x, N), x)  in combineMul()
47992           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
47996         NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul);  in combineMul()
47998         NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));  in combineMul()
47999     } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2) &&  in combineMul()
48000                (!VT.isVector() || Subtarget.fastImmVectorShift())) {  in combineMul()
48003           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
48004                       DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, ShiftVT));  in combineMul()
48006           ISD::ADD, DL, VT, NewMul,  in combineMul()
48007           DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));  in combineMul()
48009                (!VT.isVector() || Subtarget.fastImmVectorShift())) {  in combineMul()
48010       // (mul x, 2^N - 2) => (sub (shl x, N), (add x, x))  in combineMul()
48012           DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
48015           ISD::SUB, DL, VT, NewMul,  in combineMul()
48016           DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));  in combineMul()
48017     } else if (SignMulAmt >= 0 && VT.isVector() &&  in combineMul()
48019       uint64_t AbsMulAmtLowBit = AbsMulAmt & (-AbsMulAmt);  in combineMul()
48022       if (isPowerOf2_64(AbsMulAmt - AbsMulAmtLowBit)) {  in combineMul()
48023         ShiftAmt1 = AbsMulAmt - AbsMulAmtLowBit;  in combineMul()
48032             DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
48035             DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),  in combineMul()
48037         NewMul = DAG.getNode(*Opc, DL, VT, Shift1, Shift2);  in combineMul()
48055   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&  in combineShiftToPMULH()
48062   SDValue ShiftOperand = N->getOperand(0);  in combineShiftToPMULH()
48067   EVT VT = N->getValueType(0);  in combineShiftToPMULH()  local
48068   if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32)  in combineShiftToPMULH()
48073   if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) ||  in combineShiftToPMULH()
48097   ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;  in combineShiftToPMULH()
48098   return DAG.getNode(ExtOpc, DL, VT, Mulh);  in combineShiftToPMULH()
48104   SDValue N0 = N->getOperand(0);  in combineShiftLeft()
48105   SDValue N1 = N->getOperand(1);  in combineShiftLeft()
48107   EVT VT = N0.getValueType();  in combineShiftLeft()  local
48108   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in combineShiftLeft()
48112   // with out-of-bounds clamping.  in combineShiftLeft()
48114       supportedVectorVarShift(VT, Subtarget, ISD::SHL)) {  in combineShiftLeft()
48118     // fold shl(select(icmp_ult(amt,BW),x,0),amt) -> avx2 psllv(x,amt)  in combineShiftLeft()
48122       return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1);  in combineShiftLeft()
48124     // fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt)  in combineShiftLeft()
48128       return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1);  in combineShiftLeft()
48132   // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))  in combineShiftLeft()
48134   if (VT.isInteger() && !VT.isVector() &&  in combineShiftLeft()
48139     Mask <<= N1C->getAPIntValue();  in combineShiftLeft()
48141     // We can handle cases concerning bit-widening nodes containing setcc_c if  in combineShiftLeft()
48147     //   zext(setcc_c)                 -> i32 0x0000FFFF  in combineShiftLeft()
48148     //   c1                            -> i32 0x0000FFFF  in combineShiftLeft()
48149     //   c2                            -> i32 0x00000001  in combineShiftLeft()
48150     //   (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE  in combineShiftLeft()
48151     //   (and setcc_c, (c1 << c2))     -> i32 0x0000FFFE  in combineShiftLeft()
48163       return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));  in combineShiftLeft()
48172   SDValue N0 = N->getOperand(0);  in combineShiftRightArithmetic()
48173   SDValue N1 = N->getOperand(1);  in combineShiftRightArithmetic()
48174   EVT VT = N0.getValueType();  in combineShiftRightArithmetic()  local
48175   unsigned Size = VT.getSizeInBits();  in combineShiftRightArithmetic()
48181   // fold sra(x,umin(amt,bw-1)) -> avx2 psrav(x,amt)  in combineShiftRightArithmetic()
48182   if (supportedVectorVarShift(VT, Subtarget, ISD::SRA)) {  in combineShiftRightArithmetic()
48185                             m_SpecificInt(VT.getScalarSizeInBits() - 1))))  in combineShiftRightArithmetic()
48186       return DAG.getNode(X86ISD::VSRAV, DL, VT, N0, ShrAmtVal);  in combineShiftRightArithmetic()
48190   // into (SHL (sext_in_reg X), ShlConst - SraConst)  in combineShiftRightArithmetic()
48192   //   or (SRA (sext_in_reg X), SraConst - ShlConst)  in combineShiftRightArithmetic()
48194   // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows  in combineShiftRightArithmetic()
48203   if (VT.isVector() || N1.getOpcode() != ISD::Constant ||  in combineShiftRightArithmetic()
48210   APInt ShlConst = N01->getAsAPIntVal();  in combineShiftRightArithmetic()
48211   APInt SraConst = N1->getAsAPIntVal();  in combineShiftRightArithmetic()
48221     // Only deal with (Size - ShlConst) being equal to 8, 16 or 32.  in combineShiftRightArithmetic()
48222     if (ShiftSize >= Size || ShlConst != Size - ShiftSize)  in combineShiftRightArithmetic()
48225         DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));  in combineShiftRightArithmetic()
48229       return DAG.getNode(ISD::SHL, DL, VT, NN,  in combineShiftRightArithmetic()
48230                          DAG.getConstant(ShlConst - SraConst, DL, CVT));  in combineShiftRightArithmetic()
48231     return DAG.getNode(ISD::SRA, DL, VT, NN,  in combineShiftRightArithmetic()
48232                        DAG.getConstant(SraConst - ShlConst, DL, CVT));  in combineShiftRightArithmetic()
48241   SDValue N0 = N->getOperand(0);  in combineShiftRightLogical()
48242   SDValue N1 = N->getOperand(1);  in combineShiftRightLogical()
48243   EVT VT = N0.getValueType();  in combineShiftRightLogical()  local
48244   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in combineShiftRightLogical()
48251   // with out-of-bounds clamping.  in combineShiftRightLogical()
48253       supportedVectorVarShift(VT, Subtarget, ISD::SRL)) {  in combineShiftRightLogical()
48257     // fold srl(select(icmp_ult(amt,BW),x,0),amt) -> avx2 psrlv(x,amt)  in combineShiftRightLogical()
48261       return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1);  in combineShiftRightLogical()
48263     // fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt)  in combineShiftRightLogical()
48267       return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1);  in combineShiftRightLogical()
48277   // TODO: This is a generic DAG combine that became an x86-only combine to  in combineShiftRightLogical()
48278   // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and  in combineShiftRightLogical()
48279   // and-not ('andn').  in combineShiftRightLogical()
48288   // If we can shrink the constant mask below 8-bits or 32-bits, then this  in combineShiftRightLogical()
48290   // from improved known-bits analysis or instruction selection.  in combineShiftRightLogical()
48291   APInt MaskVal = AndC->getAPIntValue();  in combineShiftRightLogical()
48300   APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());  in combineShiftRightLogical()
48305     // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)  in combineShiftRightLogical()
48306     SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);  in combineShiftRightLogical()
48307     SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);  in combineShiftRightLogical()
48308     return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);  in combineShiftRightLogical()
48315   unsigned Opcode = N->getOpcode();  in combineHorizOpWithShuffle()
48319   EVT VT = N->getValueType(0);  in combineHorizOpWithShuffle()  local
48320   SDValue N0 = N->getOperand(0);  in combineHorizOpWithShuffle()
48321   SDValue N1 = N->getOperand(1);  in combineHorizOpWithShuffle()
48325       N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0;  in combineHorizOpWithShuffle()
48327       N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1;  in combineHorizOpWithShuffle()
48334   if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {  in combineHorizOpWithShuffle()
48342         // shuffle to a v4X64 width - we can probably relax this in the future.  in combineHorizOpWithShuffle()
48347           MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;  in combineHorizOpWithShuffle()
48351           SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);  in combineHorizOpWithShuffle()
48354           return DAG.getBitcast(VT, Res);  in combineHorizOpWithShuffle()
48360   // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(Z,W)) -> SHUFFLE(HOP()).  in combineHorizOpWithShuffle()
48361   if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {  in combineHorizOpWithShuffle()
48385       int PostShuffle[4] = {-1, -1, -1, -1};  in combineHorizOpWithShuffle()
48408         MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;  in combineHorizOpWithShuffle()
48409         SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS);  in combineHorizOpWithShuffle()
48412         return DAG.getBitcast(VT, Res);  in combineHorizOpWithShuffle()
48417   // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y)).  in combineHorizOpWithShuffle()
48418   if (VT.is256BitVector() && Subtarget.hasInt256()) {  in combineHorizOpWithShuffle()
48444         MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;  in combineHorizOpWithShuffle()
48445         SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00),  in combineHorizOpWithShuffle()
48449         return DAG.getBitcast(VT, Res);  in combineHorizOpWithShuffle()
48460   unsigned Opcode = N->getOpcode();  in combineVectorPack()
48464   EVT VT = N->getValueType(0);  in combineVectorPack()  local
48465   SDValue N0 = N->getOperand(0);  in combineVectorPack()
48466   SDValue N1 = N->getOperand(1);  in combineVectorPack()
48467   unsigned NumDstElts = VT.getVectorNumElements();  in combineVectorPack()
48468   unsigned DstBitsPerElt = VT.getScalarSizeInBits();  in combineVectorPack()
48479   if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) &&  in combineVectorPack()
48480       (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) &&  in combineVectorPack()
48487     unsigned NumLanes = VT.getSizeInBits() / 128;  in combineVectorPack()
48527     return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));  in combineVectorPack()
48530   // Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()).  in combineVectorPack()
48534   // Try to fold PACKSS(NOT(X),NOT(Y)) -> NOT(PACKSS(X,Y)).  in combineVectorPack()
48545           DAG.getNode(X86ISD::PACKSS, DL, VT, DAG.getBitcast(SrcVT, Not0),  in combineVectorPack()
48547       return DAG.getNOT(DL, Pack, VT);  in combineVectorPack()
48554       N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&  in combineVectorPack()
48560         return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));  in combineVectorPack()
48566       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);  in combineVectorPack()
48570   // Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors.  in combineVectorPack()
48571   if (VT.is128BitVector()) {  in combineVectorPack()
48588       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);  in combineVectorPack()
48596       return getEXTEND_VECTOR_INREG(ExtOpc, SDLoc(N), VT, N0.getOperand(0),  in combineVectorPack()
48611   assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() ||  in combineVectorHADDSUB()
48612           X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) &&  in combineVectorHADDSUB()
48616     MVT VT = N->getSimpleValueType(0);  in combineVectorHADDSUB()  local
48617     SDValue LHS = N->getOperand(0);  in combineVectorHADDSUB()
48618     SDValue RHS = N->getOperand(1);  in combineVectorHADDSUB()
48620     // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).  in combineVectorHADDSUB()
48621     if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&  in combineVectorHADDSUB()
48624         N->isOnlyUserOf(LHS.getNode()) && N->isOnlyUserOf(RHS.getNode())) {  in combineVectorHADDSUB()
48635         MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);  in combineVectorHADDSUB()
48643         return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(VT, NewLHS),  in combineVectorHADDSUB()
48644                            DAG.getBitcast(VT, NewRHS));  in combineVectorHADDSUB()
48649   // Try to fold HOP(SHUFFLE(),SHUFFLE()) -> SHUFFLE(HOP()).  in combineVectorHADDSUB()
48659   assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() ||  in combineVectorShiftVar()
48660           X86ISD::VSRL == N->getOpcode()) &&  in combineVectorShiftVar()
48662   EVT VT = N->getValueType(0);  in combineVectorShiftVar()  local
48663   SDValue N0 = N->getOperand(0);  in combineVectorShiftVar()
48664   SDValue N1 = N->getOperand(1);  in combineVectorShiftVar()
48666   // Shift zero -> zero.  in combineVectorShiftVar()
48668     return DAG.getConstant(0, SDLoc(N), VT);  in combineVectorShiftVar()
48676     unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false);  in combineVectorShiftVar()
48677     return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0,  in combineVectorShiftVar()
48682   APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());  in combineVectorShiftVar()
48692   unsigned Opcode = N->getOpcode();  in combineVectorShiftImm()
48697   EVT VT = N->getValueType(0);  in combineVectorShiftImm()  local
48698   SDValue N0 = N->getOperand(0);  in combineVectorShiftImm()
48699   SDValue N1 = N->getOperand(1);  in combineVectorShiftImm()
48700   unsigned NumBitsPerElt = VT.getScalarSizeInBits();  in combineVectorShiftImm()
48701   assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&  in combineVectorShiftImm()
48705   // (shift undef, X) -> 0  in combineVectorShiftImm()
48707     return DAG.getConstant(0, SDLoc(N), VT);  in combineVectorShiftImm()
48711   unsigned ShiftVal = N->getConstantOperandVal(1);  in combineVectorShiftImm()
48714       return DAG.getConstant(0, SDLoc(N), VT);  in combineVectorShiftImm()
48715     ShiftVal = NumBitsPerElt - 1;  in combineVectorShiftImm()
48718   // (shift X, 0) -> X  in combineVectorShiftImm()
48722   // (shift 0, C) -> 0  in combineVectorShiftImm()
48726     return DAG.getConstant(0, SDLoc(N), VT);  in combineVectorShiftImm()
48728   // (VSRAI -1, C) -> -1  in combineVectorShiftImm()
48732     return DAG.getConstant(-1, SDLoc(N), VT);  in combineVectorShiftImm()
48740         return DAG.getConstant(0, SDLoc(N), VT);  in combineVectorShiftImm()
48741       NewShiftVal = NumBitsPerElt - 1;  in combineVectorShiftImm()
48743     return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0),  in combineVectorShiftImm()
48747   // (shift (shift X, C2), C1) -> (shift X, (C1 + C2))  in combineVectorShiftImm()
48751   // (shl (add X, X), C) -> (shl X, (C + 1))  in combineVectorShiftImm()
48765   // psrad(pshufd(psllq(X,63),1,1,3,3),31) ->  in combineVectorShiftImm()
48770       N0->hasOneUse()) {  in combineVectorShiftImm()
48777       Src = DAG.getBitcast(VT, Src);  in combineVectorShiftImm()
48778       Src = DAG.getNode(X86ISD::PSHUFD, DL, VT, Src,  in combineVectorShiftImm()
48780       Src = DAG.getNode(X86ISD::VSHLI, DL, VT, Src, N1);  in combineVectorShiftImm()
48781       Src = DAG.getNode(X86ISD::VSRAI, DL, VT, Src, N1);  in combineVectorShiftImm()
48793     assert(EltBits.size() == VT.getVectorNumElements() &&  in combineVectorShiftImm()
48811     return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));  in combineVectorShiftImm()
48815   if (N->isOnlyUserOf(N0.getNode())) {  in combineVectorShiftImm()
48819     // Fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1))  in combineVectorShiftImm()
48823         BC->isOnlyUserOf(BC.getOperand(1).getNode()) &&  in combineVectorShiftImm()
48827         SDValue LHS = DAG.getNode(Opcode, DL, VT,  in combineVectorShiftImm()
48828                                   DAG.getBitcast(VT, BC.getOperand(0)), N1);  in combineVectorShiftImm()
48829         return DAG.getNode(BC.getOpcode(), DL, VT, LHS, RHS);  in combineVectorShiftImm()
48845   EVT VT = N->getValueType(0);  in combineVectorInsert()  local
48846   unsigned Opcode = N->getOpcode();  in combineVectorInsert()
48847   assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) ||  in combineVectorInsert()
48848           (Opcode == X86ISD::PINSRW && VT == MVT::v8i16) ||  in combineVectorInsert()
48852   SDValue Vec = N->getOperand(0);  in combineVectorInsert()
48853   SDValue Scl = N->getOperand(1);  in combineVectorInsert()
48854   SDValue Idx = N->getOperand(2);  in combineVectorInsert()
48856   // Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt).  in combineVectorInsert()
48858     return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Scl);  in combineVectorInsert()
48861     unsigned NumBitsPerElt = VT.getScalarSizeInBits();  in combineVectorInsert()
48869   if (VT.isSimple() && DCI.isAfterLegalizeDAG()) {  in combineVectorInsert()
48880 /// OR -> CMPNEQSS.
48889     SDValue N0 = N->getOperand(0);  in combineCompareEqual()
48890     SDValue N1 = N->getOperand(1);  in combineCompareEqual()
48899     SDValue CMP00 = CMP0->getOperand(0);  in combineCompareEqual()
48900     SDValue CMP01 = CMP0->getOperand(1);  in combineCompareEqual()
48901     EVT     VT    = CMP00.getValueType();  in combineCompareEqual()  local
48903     if (VT == MVT::f32 || VT == MVT::f64 ||  in combineCompareEqual()
48904         (VT == MVT::f16 && Subtarget.hasFP16())) {  in combineCompareEqual()
48907       for (const SDNode *U : N->uses()) {  in combineCompareEqual()
48911         switch (U->getOpcode()) {  in combineCompareEqual()
48951                                       N->getSimpleValueType(0));  in combineCompareEqual()
48961             // On a 32-bit target, we cannot bitcast the 64-bit float to a  in combineCompareEqual()
48962             // 64-bit integer, since that's not a legal type. Since  in combineCompareEqual()
48987 /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
48989   assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP");  in combineAndNotIntoANDNP()
48991   MVT VT = N->getSimpleValueType(0);  in combineAndNotIntoANDNP()  local
48992   if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())  in combineAndNotIntoANDNP()
48996   SDValue N0 = N->getOperand(0);  in combineAndNotIntoANDNP()
48997   SDValue N1 = N->getOperand(1);  in combineAndNotIntoANDNP()
49008   X = DAG.getBitcast(VT, X);  in combineAndNotIntoANDNP()
49009   Y = DAG.getBitcast(VT, Y);  in combineAndNotIntoANDNP()
49010   return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y);  in combineAndNotIntoANDNP()
49015 ///            (insert_vector_elt undef, (xor X, -1), Z), undef), Y
49016 ///   ->
49021   assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP");  in combineAndShuffleNot()
49023   EVT VT = N->getValueType(0);  in combineAndShuffleNot()  local
49026   if (!((VT.is128BitVector() && Subtarget.hasSSE2()) ||  in combineAndShuffleNot()
49027         ((VT.is256BitVector() || VT.is512BitVector()) && Subtarget.hasAVX())))  in combineAndShuffleNot()
49032     // TODO: SVN->hasOneUse() is a strong condition. It can be relaxed if all  in combineAndShuffleNot()
49033     // end-users are ISD::AND including cases  in combineAndShuffleNot()
49035     if (!SVN || !SVN->hasOneUse() || !SVN->isSplat() ||  in combineAndShuffleNot()
49036         !SVN->getOperand(1).isUndef()) {  in combineAndShuffleNot()
49039     SDValue IVEN = SVN->getOperand(0);  in combineAndShuffleNot()
49044         IVEN.getConstantOperandAPInt(2) != SVN->getSplatIndex())  in combineAndShuffleNot()
49052       return DAG.getVectorShuffle(SVN->getValueType(0), SDLoc(SVN), NotIVEN,  in combineAndShuffleNot()
49053                                   SVN->getOperand(1), SVN->getMask());  in combineAndShuffleNot()
49059   SDValue N0 = N->getOperand(0);  in combineAndShuffleNot()
49060   SDValue N1 = N->getOperand(1);  in combineAndShuffleNot()
49072   X = DAG.getBitcast(VT, X);  in combineAndShuffleNot()
49073   Y = DAG.getBitcast(VT, Y);  in combineAndShuffleNot()
49078   if (!Subtarget.useAVX512Regs() && VT.is512BitVector() &&  in combineAndShuffleNot()
49079       TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) {  in combineAndShuffleNot()
49087     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV});  in combineAndShuffleNot()
49090   if (TLI.isTypeLegal(VT))  in combineAndShuffleNot()
49091     return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y});  in combineAndShuffleNot()
49096 // Try to widen AND, OR and XOR nodes to VT in order to remove casts around
49100 // Given a target type \p VT, we generate
49102 // given x, y and z are of type \p VT. We can do so, if operands are either
49103 // truncates from VT types, the second operand is a vector of constants or can
49105 static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT,  in PromoteMaskArithmetic()  argument
49118   if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))  in PromoteMaskArithmetic()
49121   if (SDValue NN0 = PromoteMaskArithmetic(N0, DL, VT, DAG, Depth + 1))  in PromoteMaskArithmetic()
49129     if (N0.getOperand(0).getValueType() != VT)  in PromoteMaskArithmetic()
49135   if (SDValue NN1 = PromoteMaskArithmetic(N1, DL, VT, DAG, Depth + 1))  in PromoteMaskArithmetic()
49140                     N1.getOperand(0).getValueType() == VT;  in PromoteMaskArithmetic()
49144                  DAG.FoldConstantArithmetic(ISD::ZERO_EXTEND, DL, VT, {N1}))  in PromoteMaskArithmetic()
49150   return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);  in PromoteMaskArithmetic()
49154 // register. In most cases we actually compare or select YMM-sized registers
49157 // Even with AVX-512 this is still useful for removing casts around logical
49162   EVT VT = N.getValueType();  in PromoteMaskArithmetic()  local
49163   assert(VT.isVector() && "Expected vector type");  in PromoteMaskArithmetic()
49172   SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, 0);  in PromoteMaskArithmetic()
49182     return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,  in PromoteMaskArithmetic()
49190   // clang-format off  in convertIntLogicToFPLogicOpcode()
49195   // clang-format on  in convertIntLogicToFPLogicOpcode()
49200 /// If both input operands of a logic op are being cast from floating-point
49201 /// types or FP compares, try to convert this into a floating-point logic node
49206   EVT VT = N->getValueType(0);  in convertIntLogicToFPLogic()  local
49207   SDValue N0 = N->getOperand(0);  in convertIntLogicToFPLogic()
49208   SDValue N1 = N->getOperand(1);  in convertIntLogicToFPLogic()
49227     unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());  in convertIntLogicToFPLogic()
49229     return DAG.getBitcast(VT, FPLogic);  in convertIntLogicToFPLogic()
49232   if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() ||  in convertIntLogicToFPLogic()
49236   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get();  in convertIntLogicToFPLogic()
49237   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();  in convertIntLogicToFPLogic()
49247   // logic (setcc N00, N01), (setcc N10, N11) -->  in convertIntLogicToFPLogic()
49261   SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);  in convertIntLogicToFPLogic()
49262   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);  in convertIntLogicToFPLogic()
49265 // Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
49266 // to reduce XMM->GPR traffic.
49268   unsigned Opc = N->getOpcode();  in combineBitOpWithMOVMSK()
49272   SDValue N0 = N->getOperand(0);  in combineBitOpWithMOVMSK()
49273   SDValue N1 = N->getOperand(1);  in combineBitOpWithMOVMSK()
49299 // Attempt to fold BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z).
49303   unsigned Opc = N->getOpcode();  in combineBitOpWithShift()
49307   SDValue N0 = N->getOperand(0);  in combineBitOpWithShift()
49308   SDValue N1 = N->getOperand(1);  in combineBitOpWithShift()
49309   EVT VT = N->getValueType(0);  in combineBitOpWithShift()  local
49321   if (BCOpc != BC1->getOpcode() || BCVT != BC1.getValueType())  in combineBitOpWithShift()
49335     return DAG.getBitcast(VT, Shift);  in combineBitOpWithShift()
49343 // BITOP(PACKSS(X,Z),PACKSS(Y,W)) --> PACKSS(BITOP(X,Y),BITOP(Z,W)).
49346   unsigned Opc = N->getOpcode();  in combineBitOpWithPACK()
49350   SDValue N0 = N->getOperand(0);  in combineBitOpWithPACK()
49351   SDValue N1 = N->getOperand(1);  in combineBitOpWithPACK()
49352   EVT VT = N->getValueType(0);  in combineBitOpWithPACK()  local
49382   return DAG.getBitcast(VT, DAG.getNode(X86ISD::PACKSS, DL, DstVT, LHS, RHS));  in combineBitOpWithPACK()
49385 /// If this is a zero/all-bits result that is bitwise-anded with a low bits
49387 /// with a shift-right to eliminate loading the vector constant mask value.
49390   SDValue Op0 = peekThroughBitcasts(N->getOperand(0));  in combineAndMaskToShift()
49391   SDValue Op1 = peekThroughBitcasts(N->getOperand(1));  in combineAndMaskToShift()
49392   EVT VT = Op0.getValueType();  in combineAndMaskToShift()  local
49393   if (VT != Op1.getValueType() || !VT.isSimple() || !VT.isInteger())  in combineAndMaskToShift()
49397   // shift and "andn". This saves a materialization of a -1 vector constant.  in combineAndMaskToShift()
49400   // and (pcmpgt X, -1), Y --> pandn (vsrai X, BitWidth - 1), Y  in combineAndMaskToShift()
49405   if (N->getValueType(0) == VT &&  in combineAndMaskToShift()
49406       supportedVectorShiftWithImm(VT, Subtarget, ISD::SRA)) {  in combineAndMaskToShift()
49420           getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X,  in combineAndMaskToShift()
49421                                      VT.getScalarSizeInBits() - 1, DAG);  in combineAndMaskToShift()
49422       return DAG.getNode(X86ISD::ANDNP, DL, VT, Sra, Y);  in combineAndMaskToShift()
49434   if (!supportedVectorShiftWithImm(VT, Subtarget, ISD::SRL))  in combineAndMaskToShift()
49437   unsigned EltBitWidth = VT.getScalarSizeInBits();  in combineAndMaskToShift()
49443   SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);  in combineAndMaskToShift()
49444   SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);  in combineAndMaskToShift()
49445   return DAG.getBitcast(N->getValueType(0), Shift);  in combineAndMaskToShift()
49451   if (Ld->isIndexed())  in getIndexFromUnindexedLoad()
49454   SDValue Base = Ld->getBasePtr();  in getIndexFromUnindexedLoad()
49468 static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {  in hasBZHI()  argument
49470          (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));  in hasBZHI()
49474 // 'and-load' sequence.
49478 //   int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1}
49481 // It's equivalent to performing bzhi (zero high bits) on the input, with the
49485   MVT VT = Node->getSimpleValueType(0);  in combineAndLoadToBZHI()  local
49489   if (!hasBZHI(Subtarget, VT))  in combineAndLoadToBZHI()
49494     SDValue N = Node->getOperand(i);  in combineAndLoadToBZHI()
49501     const Value *MemOp = Ld->getMemOperand()->getValue();  in combineAndLoadToBZHI()
49507       if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) {  in combineAndLoadToBZHI()
49508         if (GV->isConstant() && GV->hasDefinitiveInitializer()) {  in combineAndLoadToBZHI()
49510           Constant *Init = GV->getInitializer();  in combineAndLoadToBZHI()
49511           Type *Ty = Init->getType();  in combineAndLoadToBZHI()
49513               !Ty->getArrayElementType()->isIntegerTy() ||  in combineAndLoadToBZHI()
49514               Ty->getArrayElementType()->getScalarSizeInBits() !=  in combineAndLoadToBZHI()
49515                   VT.getSizeInBits() ||  in combineAndLoadToBZHI()
49516               Ty->getArrayNumElements() >  in combineAndLoadToBZHI()
49517                   Ty->getArrayElementType()->getScalarSizeInBits())  in combineAndLoadToBZHI()
49521           uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();  in combineAndLoadToBZHI()
49524             auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j));  in combineAndLoadToBZHI()
49525             if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) {  in combineAndLoadToBZHI()
49533           // Do the transformation (For 32-bit type):  in combineAndLoadToBZHI()
49534           // -> (and (load arr[idx]), inp)  in combineAndLoadToBZHI()
49535           // <- (and (srl 0xFFFFFFFF, (sub 32, idx)))  in combineAndLoadToBZHI()
49537           SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0);  in combineAndLoadToBZHI()
49538           SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32);  in combineAndLoadToBZHI()
49549           SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);  in combineAndLoadToBZHI()
49550           SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub);  in combineAndLoadToBZHI()
49552           return DAG.getNode(ISD::AND, dl, VT, Inp, LShr);  in combineAndLoadToBZHI()
49562 // where the setcc will freely 0 upper bits of k-register. We can replace the
49567   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");  in combineScalarAndWithMaskSetcc()
49569   EVT VT = N->getValueType(0);  in combineScalarAndWithMaskSetcc()  local
49573   auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(1));  in combineScalarAndWithMaskSetcc()
49578   assert(!VT.isVector() && "Expected scalar VT!");  in combineScalarAndWithMaskSetcc()
49580   SDValue Src = N->getOperand(0);  in combineScalarAndWithMaskSetcc()
49612       !C1->getAPIntValue().isMask(SubVecVT.getVectorNumElements()))  in combineScalarAndWithMaskSetcc()
49634   // and cast it back to VT.  in combineScalarAndWithMaskSetcc()
49642   return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);  in combineScalarAndWithMaskSetcc()
49651   // Only do this re-ordering if op has one use.  in getBMIMatchingOp()
49666                            Op.getOperand(1 - OpIdx));  in getBMIMatchingOp()
49682     // BLSR: (and x, (add x, -1))  in getBMIMatchingOp()
49683     // BLSMSK: (xor x, (add x, -1))  in getBMIMatchingOp()
49692   EVT VT = N->getValueType(0);  in combineBMILogicOp()  local
49694   if (!Subtarget.hasBMI() || !VT.isScalarInteger() ||  in combineBMILogicOp()
49695       (VT != MVT::i32 && VT != MVT::i64))  in combineBMILogicOp()
49698   assert(N->getOpcode() == ISD::AND || N->getOpcode() == ISD::XOR);  in combineBMILogicOp()
49703             getBMIMatchingOp(N->getOpcode(), DAG, N->getOperand(OpIdx),  in combineBMILogicOp()
49704                              N->getOperand(1 - OpIdx), 0))  in combineBMILogicOp()
49715   //  ->  in combineX86SubCmpForFlags()
49721   //  ->  in combineX86SubCmpForFlags()
49727   SDValue SetCC = N->getOperand(0);  in combineX86SubCmpForFlags()
49733   SDNode *BrCond = *Flag->uses().begin();  in combineX86SubCmpForFlags()
49734   if (BrCond->getOpcode() != X86ISD::BRCOND)  in combineX86SubCmpForFlags()
49737   if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=  in combineX86SubCmpForFlags()
49744   if (N->getOpcode() == X86ISD::SUB)  in combineX86SubCmpForFlags()
49745     X = DAG.getMergeValues({N->getOperand(0), X}, SDLoc(N));  in combineX86SubCmpForFlags()
49749       static_cast<X86::CondCode>(CCN->getAsAPIntVal().getSExtValue());  in combineX86SubCmpForFlags()
49755   SmallVector<SDValue> Ops(BrCond->op_values());  in combineX86SubCmpForFlags()
49756   if (isNullConstant(N->getOperand(1)))  in combineX86SubCmpForFlags()
49758   else if (isOneConstant(N->getOperand(1)))  in combineX86SubCmpForFlags()
49764       DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);  in combineX86SubCmpForFlags()
49765   // Avoid self-assign error b/c CC1 can be `e/ne`.  in combineX86SubCmpForFlags()
49775   //  ->  in combineAndOrForCcmpCtest()
49779   //  ->  in combineAndOrForCcmpCtest()
49787   SDValue SetCC0 = N->getOperand(0);  in combineAndOrForCcmpCtest()
49788   SDValue SetCC1 = N->getOperand(1);  in combineAndOrForCcmpCtest()
49793   auto GetCombineToOpc = [&](SDValue V) -> unsigned {  in combineAndOrForCcmpCtest()
49819   bool IsOR = N->getOpcode() == ISD::OR;  in combineAndOrForCcmpCtest()
49830       static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());  in combineAndOrForCcmpCtest()
49854   SDValue N0 = N->getOperand(0);  in combineAnd()
49855   SDValue N1 = N->getOperand(1);  in combineAnd()
49856   EVT VT = N->getValueType(0);  in combineAnd()  local
49861   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {  in combineAnd()
49868   // Use a 32-bit and+zext if upper bits known zero.  in combineAnd()
49869   if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) {  in combineAnd()
49880   // Match all-of bool scalar reductions into a bitcast/movmsk + cmp.  in combineAnd()
49882   if (VT == MVT::i1) {  in combineAnd()
49903   //    `(-x << C0) & C1`  in combineAnd()
49905   //    `(x * (Pow2_Ceil(C1) - (1 << C0))) & C1`  in combineAnd()
49918       const APInt &MulC = N01C->getAPIntValue();  in combineAnd()
49919       const APInt &AndC = N1C->getAPIntValue();  in combineAnd()
49920       APInt MulCLowBit = MulC & (-MulC);  in combineAnd()
49923         SDValue Neg = DAG.getNegative(N0.getOperand(0), dl, VT);  in combineAnd()
49925         assert(MulCLowBitLog != -1 &&  in combineAnd()
49927         SDValue Shift = DAG.getNode(ISD::SHL, dl, VT, Neg,  in combineAnd()
49928                                     DAG.getConstant(MulCLowBitLog, dl, VT));  in combineAnd()
49929         return DAG.getNode(ISD::AND, dl, VT, Shift, N1);  in combineAnd()
49970   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))  in combineAnd()
49971   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.  in combineAnd()
49973   if (VT.isVector() && getTargetConstantFromNode(N1)) {  in combineAnd()
49977         DAG.ComputeNumSignBits(N1) == VT.getScalarSizeInBits() &&  in combineAnd()
49978         N0->hasOneUse() && N0.getOperand(1)->hasOneUse()) {  in combineAnd()
49979       SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1);  in combineAnd()
49980       return DAG.getNode(Opc0, dl, VT, N0.getOperand(0), MaskMul);  in combineAnd()
49984   // Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant  in combineAnd()
49986   if (isOneConstant(N1) && N0->hasOneUse()) {  in combineAnd()
49990            Src.getOperand(0)->hasOneUse())  in combineAnd()
50011       if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32))  in combineAnd()
50013           return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT);  in combineAnd()
50017   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {  in combineAnd()
50028       int NumElts = VT.getVectorNumElements();  in combineAnd()
50029       int EltSizeInBits = VT.getScalarSizeInBits();  in combineAnd()
50038             // We can't assume an undef src element gives an undef dst - the  in combineAnd()
50059       if (N->getOpcode() != ISD::DELETED_NODE)  in combineAnd()
50067       return DAG.getNode(ISD::AND, dl, VT, NewN0 ? NewN0 : N0,  in combineAnd()
50072   if ((VT.getScalarSizeInBits() % 8) == 0 &&  in combineAnd()
50074       isa<ConstantSDNode>(N0.getOperand(1)) && N0->hasOneUse()) {  in combineAnd()
50082     if (VT == SrcVecVT.getScalarType() && N0->isOnlyUserOf(SrcVec.getNode()) &&  in combineAnd()
50105         return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,  in combineAnd()
50116 // Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
50119   assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");  in canonicalizeBitSelect()
50121   MVT VT = N->getSimpleValueType(0);  in canonicalizeBitSelect()  local
50122   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in canonicalizeBitSelect()
50123   if (!VT.isVector() || (EltSizeInBits % 8) != 0)  in canonicalizeBitSelect()
50126   SDValue N0 = peekThroughBitcasts(N->getOperand(0));  in canonicalizeBitSelect()
50127   SDValue N1 = peekThroughBitcasts(N->getOperand(1));  in canonicalizeBitSelect()
50133   if (!(Subtarget.hasXOP() || useVPTERNLOG(Subtarget, VT) ||  in canonicalizeBitSelect()
50150     // TODO - add UNDEF elts support.  in canonicalizeBitSelect()
50159   if (useVPTERNLOG(Subtarget, VT)) {  in canonicalizeBitSelect()
50160     // Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.  in canonicalizeBitSelect()
50161     // VPTERNLOG is only available as vXi32/64-bit types.  in canonicalizeBitSelect()
50164         MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());  in canonicalizeBitSelect()
50171     return DAG.getBitcast(VT, Res);  in canonicalizeBitSelect()
50174   SDValue X = N->getOperand(0);  in canonicalizeBitSelect()
50176       DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)),  in canonicalizeBitSelect()
50177                   DAG.getBitcast(VT, N1.getOperand(0)));  in canonicalizeBitSelect()
50178   return DAG.getNode(ISD::OR, DL, VT, X, Y);  in canonicalizeBitSelect()
50183   if (N->getOpcode() != ISD::OR)  in matchLogicBlend()
50186   SDValue N0 = N->getOperand(0);  in matchLogicBlend()
50187   SDValue N1 = N->getOperand(1);  in matchLogicBlend()
50208   // TODO: Attempt to match against AND(XOR(-1,M),Y) as well, waiting for  in matchLogicBlend()
50223   assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");  in combineLogicBlendIntoPBLENDV()
50225   EVT VT = N->getValueType(0);  in combineLogicBlendIntoPBLENDV()  local
50226   if (!((VT.is128BitVector() && Subtarget.hasSSE2()) ||  in combineLogicBlendIntoPBLENDV()
50227         (VT.is256BitVector() && Subtarget.hasInt256())))  in combineLogicBlendIntoPBLENDV()
50249   if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL,  in combineLogicBlendIntoPBLENDV()
50261   MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8;  in combineLogicBlendIntoPBLENDV()
50267   return DAG.getBitcast(VT, Mask);  in combineLogicBlendIntoPBLENDV()
50278   EVT VT = Cmp.getOperand(0).getValueType();  in lowerX86CmpEqZeroToCtlzSrl()  local
50279   unsigned Log2b = Log2_32(VT.getSizeInBits());  in lowerX86CmpEqZeroToCtlzSrl()
50281   SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));  in lowerX86CmpEqZeroToCtlzSrl()
50282   // The result of the shift is true or false, and on X86, the 32-bit  in lowerX86CmpEqZeroToCtlzSrl()
50300   if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast())  in combineOrCmpEqZeroToCtlzSrl()
50304     return (N->getOpcode() == ISD::OR && N->hasOneUse());  in combineOrCmpEqZeroToCtlzSrl()
50307   // Check the zero extend is extending to 32-bit or more. The code generated by  in combineOrCmpEqZeroToCtlzSrl()
50308   // srl(ctlz) for 16-bit or less variants of the pattern would require extra  in combineOrCmpEqZeroToCtlzSrl()
50310   if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) ||  in combineOrCmpEqZeroToCtlzSrl()
50311       !isORCandidate(N->getOperand(0)))  in combineOrCmpEqZeroToCtlzSrl()
50316     return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&  in combineOrCmpEqZeroToCtlzSrl()
50317            X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&  in combineOrCmpEqZeroToCtlzSrl()
50318            N->getOperand(1).getOpcode() == X86ISD::CMP &&  in combineOrCmpEqZeroToCtlzSrl()
50319            isNullConstant(N->getOperand(1).getOperand(1)) &&  in combineOrCmpEqZeroToCtlzSrl()
50320            N->getOperand(1).getValueType().bitsGE(MVT::i32);  in combineOrCmpEqZeroToCtlzSrl()
50323   SDNode *OR = N->getOperand(0).getNode();  in combineOrCmpEqZeroToCtlzSrl()
50324   SDValue LHS = OR->getOperand(0);  in combineOrCmpEqZeroToCtlzSrl()
50325   SDValue RHS = OR->getOperand(1);  in combineOrCmpEqZeroToCtlzSrl()
50332     OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();  in combineOrCmpEqZeroToCtlzSrl()
50333     LHS = OR->getOperand(0);  in combineOrCmpEqZeroToCtlzSrl()
50334     RHS = OR->getOperand(1);  in combineOrCmpEqZeroToCtlzSrl()
50358     LHS = OR->getOperand(0);  in combineOrCmpEqZeroToCtlzSrl()
50359     RHS = OR->getOperand(1);  in combineOrCmpEqZeroToCtlzSrl()
50361     if (RHS->getOpcode() == ISD::OR)  in combineOrCmpEqZeroToCtlzSrl()
50369   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);  in combineOrCmpEqZeroToCtlzSrl()
50375   if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())  in foldMaskedMergeImpl()
50377   SDValue NotOp = And0_L->getOperand(0);  in foldMaskedMergeImpl()
50384   // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R  in foldMaskedMergeImpl()
50385   EVT VT = And1_L->getValueType(0);  in foldMaskedMergeImpl()  local
50386   SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);  in foldMaskedMergeImpl()
50387   SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);  in foldMaskedMergeImpl()
50388   SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);  in foldMaskedMergeImpl()
50389   SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);  in foldMaskedMergeImpl()
50396 /// "and-not" operation. This function is intended to be called from a
50399   // Note that masked-merge variants using XOR or ADD expressions are  in foldMaskedMerge()
50401   assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");  in foldMaskedMerge()
50402   SDValue N0 = Node->getOperand(0);  in foldMaskedMerge()
50403   if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())  in foldMaskedMerge()
50405   SDValue N1 = Node->getOperand(1);  in foldMaskedMerge()
50406   if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())  in foldMaskedMerge()
50410   SDValue N00 = N0->getOperand(0);  in foldMaskedMerge()
50411   SDValue N01 = N0->getOperand(1);  in foldMaskedMerge()
50412   SDValue N10 = N1->getOperand(0);  in foldMaskedMerge()
50413   SDValue N11 = N1->getOperand(1);  in foldMaskedMerge()
50429 static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,  in combineAddOrSubToADCOrSBB()  argument
50433   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))  in combineAddOrSubToADCOrSBB()
50436   // Look through a one-use zext.  in combineAddOrSubToADCOrSBB()
50453   // If X is -1 or 0, then we have an opportunity to avoid constants required in  in combineAddOrSubToADCOrSBB()
50457     if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||  in combineAddOrSubToADCOrSBB()
50458         (IsSub && CC == X86::COND_B && ConstantX->isZero())) {  in combineAddOrSubToADCOrSBB()
50459       // This is a complicated way to get -1 or 0 from the carry flag:  in combineAddOrSubToADCOrSBB()
50460       // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax  in combineAddOrSubToADCOrSBB()
50461       //  0 - SETB  -->  0 -  (CF) --> CF ? -1 : 0 --> SBB %eax, %eax  in combineAddOrSubToADCOrSBB()
50462       return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in combineAddOrSubToADCOrSBB()
50467     if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||  in combineAddOrSubToADCOrSBB()
50468         (IsSub && CC == X86::COND_A && ConstantX->isZero())) {  in combineAddOrSubToADCOrSBB()
50473         // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB  in combineAddOrSubToADCOrSBB()
50474         //  0 - SETA  (SUB A, B) -->  0 - SETB  (SUB B, A) --> SUB + SBB  in combineAddOrSubToADCOrSBB()
50476             X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),  in combineAddOrSubToADCOrSBB()
50479         return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in combineAddOrSubToADCOrSBB()
50487     // X + SETB Z --> adc X, 0  in combineAddOrSubToADCOrSBB()
50488     // X - SETB Z --> sbb X, 0  in combineAddOrSubToADCOrSBB()
50490                        DAG.getVTList(VT, MVT::i32), X,  in combineAddOrSubToADCOrSBB()
50491                        DAG.getConstant(0, DL, VT), EFLAGS);  in combineAddOrSubToADCOrSBB()
50504     if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&  in combineAddOrSubToADCOrSBB()
50508           DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),  in combineAddOrSubToADCOrSBB()
50512                          DAG.getVTList(VT, MVT::i32), X,  in combineAddOrSubToADCOrSBB()
50513                          DAG.getConstant(0, DL, VT), NewEFLAGS);  in combineAddOrSubToADCOrSBB()
50518     // X + SETAE --> sbb X, -1  in combineAddOrSubToADCOrSBB()
50519     // X - SETAE --> adc X, -1  in combineAddOrSubToADCOrSBB()
50521                        DAG.getVTList(VT, MVT::i32), X,  in combineAddOrSubToADCOrSBB()
50522                        DAG.getConstant(-1, DL, VT), EFLAGS);  in combineAddOrSubToADCOrSBB()
50526     // X + SETBE --> sbb X, -1  in combineAddOrSubToADCOrSBB()
50527     // X - SETBE --> adc X, -1  in combineAddOrSubToADCOrSBB()
50534     if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&  in combineAddOrSubToADCOrSBB()
50538           DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),  in combineAddOrSubToADCOrSBB()
50542                          DAG.getVTList(VT, MVT::i32), X,  in combineAddOrSubToADCOrSBB()
50543                          DAG.getConstant(-1, DL, VT), NewEFLAGS);  in combineAddOrSubToADCOrSBB()
50558   // If X is -1 or 0, then we have an opportunity to avoid constants required in  in combineAddOrSubToADCOrSBB()
50561     // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with  in combineAddOrSubToADCOrSBB()
50563     //  0 - (Z != 0) --> sbb %eax, %eax, (neg Z)  in combineAddOrSubToADCOrSBB()
50564     // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)  in combineAddOrSubToADCOrSBB()
50565     if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||  in combineAddOrSubToADCOrSBB()
50566         (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {  in combineAddOrSubToADCOrSBB()
50570       return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in combineAddOrSubToADCOrSBB()
50575     // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'  in combineAddOrSubToADCOrSBB()
50577     //  0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50578     // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50579     if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||  in combineAddOrSubToADCOrSBB()
50580         (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {  in combineAddOrSubToADCOrSBB()
50584       return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in combineAddOrSubToADCOrSBB()
50596   SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in combineAddOrSubToADCOrSBB()
50598   // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50599   // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50602                        DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));  in combineAddOrSubToADCOrSBB()
50604   // X - (Z == 0) --> sub X, (zext(sete  Z, 0)) --> sbb X, 0, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50605   // X + (Z == 0) --> add X, (zext(sete  Z, 0)) --> adc X, 0, (cmp Z, 1)  in combineAddOrSubToADCOrSBB()
50607                      DAG.getConstant(0, DL, VT), Cmp1.getValue(1));  in combineAddOrSubToADCOrSBB()
50615   bool IsSub = N->getOpcode() == ISD::SUB;  in combineAddOrSubToADCOrSBB()
50616   SDValue X = N->getOperand(0);  in combineAddOrSubToADCOrSBB()
50617   SDValue Y = N->getOperand(1);  in combineAddOrSubToADCOrSBB()
50618   EVT VT = N->getValueType(0);  in combineAddOrSubToADCOrSBB()  local
50620   if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))  in combineAddOrSubToADCOrSBB()
50624   if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {  in combineAddOrSubToADCOrSBB()
50626       ADCOrSBB = DAG.getNegative(ADCOrSBB, DL, VT);  in combineAddOrSubToADCOrSBB()
50635   assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::OR) &&  in combineOrXorWithSETCC()
50647       bool IsSub = N->getOpcode() == ISD::XOR;  in combineOrXorWithSETCC()
50648       bool N1COdd = N1C->getZExtValue() & 1;  in combineOrXorWithSETCC()
50651         EVT VT = N->getValueType(0);  in combineOrXorWithSETCC()  local
50652         if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG))  in combineOrXorWithSETCC()
50658   // not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2)  in combineOrXorWithSETCC()
50659   if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ &&  in combineOrXorWithSETCC()
50663     MVT VT = N->getSimpleValueType(0);  in combineOrXorWithSETCC()  local
50667                                       VT.getScalarSizeInBits(), UndefElts,  in combineOrXorWithSETCC()
50674         return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0),  in combineOrXorWithSETCC()
50685   SDValue N0 = N->getOperand(0);  in combineOr()
50686   SDValue N1 = N->getOperand(1);  in combineOr()
50687   EVT VT = N->getValueType(0);  in combineOr()  local
50692   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {  in combineOr()
50699   // Match any-of bool scalar reductions into a bitcast/movmsk + cmp.  in combineOr()
50701   if (VT == MVT::i1) {  in combineOr()
50749   // (0 - SetCC) | C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it.  in combineOr()
50750   if ((VT == MVT::i32 || VT == MVT::i64) &&  in combineOr()
50759         uint64_t Val = CN->getZExtValue();  in combineOr()
50765           SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT);  in combineOr()
50766           R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT));  in combineOr()
50767           R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT));  in combineOr()
50774   // Combine OR(X,KSHIFTL(Y,Elts/2)) -> CONCAT_VECTORS(X,Y) == KUNPCK(X,Y).  in combineOr()
50775   // Combine OR(KSHIFTL(X,Elts/2),Y) -> CONCAT_VECTORS(Y,X) == KUNPCK(Y,X).  in combineOr()
50776   // iff the upper elements of the non-shifted arg are zero.  in combineOr()
50779     unsigned NumElts = VT.getVectorNumElements();  in combineOr()
50786           ISD::CONCAT_VECTORS, dl, VT,  in combineOr()
50794           ISD::CONCAT_VECTORS, dl, VT,  in combineOr()
50800   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {  in combineOr()
50811       int NumElts = VT.getVectorNumElements();  in combineOr()
50812       int EltSizeInBits = VT.getScalarSizeInBits();  in combineOr()
50824       if (N->getOpcode() != ISD::DELETED_NODE)  in combineOr()
50831   if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)  in combineOr()
50842 ///   XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
50844 ///   SETGT(X, -1)
50847   EVT ResultType = N->getValueType(0);  in foldXorTruncShiftIntoCmp()
50851   SDValue N0 = N->getOperand(0);  in foldXorTruncShiftIntoCmp()
50852   SDValue N1 = N->getOperand(1);  in foldXorTruncShiftIntoCmp()
50874       Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1))  in foldXorTruncShiftIntoCmp()
50877   // Create a greater-than comparison against -1.  in foldXorTruncShiftIntoCmp()
50887                               DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);  in foldXorTruncShiftIntoCmp()
50894 ///   xor (sra X, elt_size(X)-1), -1
50896 ///   pcmpgt X, -1
50902   EVT VT = N->getValueType(0);  in foldVectorXorShiftIntoCmp()  local
50903   if (!VT.isSimple())  in foldVectorXorShiftIntoCmp()
50906   switch (VT.getSimpleVT().SimpleTy) {  in foldVectorXorShiftIntoCmp()
50907   // clang-format off  in foldVectorXorShiftIntoCmp()
50917     // clang-format on  in foldVectorXorShiftIntoCmp()
50922   SDValue Shift = N->getOperand(0);  in foldVectorXorShiftIntoCmp()
50923   SDValue Ones = N->getOperand(1);  in foldVectorXorShiftIntoCmp()
50932       ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1))  in foldVectorXorShiftIntoCmp()
50935   // Create a greater-than comparison against -1. We don't use the more obvious  in foldVectorXorShiftIntoCmp()
50936   // greater-than-or-equal-to-zero because SSE/AVX don't have that instruction.  in foldVectorXorShiftIntoCmp()
50937   return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT);  in foldVectorXorShiftIntoCmp()
50956 static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG,  in detectUSatPattern()  argument
50960   // Saturation with truncation. We truncate from InVT to VT.  in detectUSatPattern()
50961   assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&  in detectUSatPattern()
50965   auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue {  in detectUSatPattern()
50976     if (C2.isMask(VT.getScalarSizeInBits()))  in detectUSatPattern()
50981       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()))  in detectUSatPattern()
50986       if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) &&  in detectUSatPattern()
51003 static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {  in detectSSatPattern()  argument
51004   unsigned NumDstBits = VT.getScalarSizeInBits();  in detectSSatPattern()
51009                         const APInt &Limit) -> SDValue {  in detectSSatPattern()
51037 static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,  in combineTruncateWithSat()  argument
51040   if (!Subtarget.hasSSE2() || !VT.isVector())  in combineTruncateWithSat()
51043   EVT SVT = VT.getVectorElementType();  in combineTruncateWithSat()
51047   // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is  in combineTruncateWithSat()
51048   // split across two registers. We can use a packusdw+perm to clamp to 0-65535  in combineTruncateWithSat()
51050   // clip to 0-255.  in combineTruncateWithSat()
51052       InVT == MVT::v16i32 && VT == MVT::v16i8) {  in combineTruncateWithSat()
51053     if (SDValue USatVal = detectSSatPattern(In, VT, true)) {  in combineTruncateWithSat()
51058       return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid);  in combineTruncateWithSat()
51064   // For 256-bit or smaller vectors, we require VLX.  in combineTruncateWithSat()
51066   // If the result type is 256-bits or larger and we have disable 512-bit  in combineTruncateWithSat()
51072                       !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);  in combineTruncateWithSat()
51074   if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&  in combineTruncateWithSat()
51075       isPowerOf2_32(VT.getVectorNumElements()) &&  in combineTruncateWithSat()
51078     if (SDValue USatVal = detectSSatPattern(In, VT, true)) {  in combineTruncateWithSat()
51079       // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).  in combineTruncateWithSat()
51081         EVT MidVT = VT.changeVectorElementType(MVT::i16);  in combineTruncateWithSat()
51085         SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG,  in combineTruncateWithSat()
51090         return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG,  in combineTruncateWithSat()
51093     if (SDValue SSatVal = detectSSatPattern(In, VT))  in combineTruncateWithSat()
51094       return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG,  in combineTruncateWithSat()
51104     if (SDValue SSatVal = detectSSatPattern(In, VT)) {  in combineTruncateWithSat()
51107     } else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) {  in combineTruncateWithSat()
51112       unsigned ResElts = VT.getVectorNumElements();  in combineTruncateWithSat()
51129       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,  in combineTruncateWithSat()
51142   EVT RegVT = Ld->getValueType(0);  in combineConstantPoolLoads()
51143   SDValue Ptr = Ld->getBasePtr();  in combineConstantPoolLoads()
51144   SDValue Chain = Ld->getChain();  in combineConstantPoolLoads()
51145   ISD::LoadExtType Ext = Ld->getExtensionType();  in combineConstantPoolLoads()
51147   if (Ext != ISD::NON_EXTLOAD || !Subtarget.hasAVX() || !Ld->isSimple())  in combineConstantPoolLoads()
51170   for (SDNode *User : Chain->uses()) {  in combineConstantPoolLoads()
51173         (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||  in combineConstantPoolLoads()
51174          User->getOpcode() == X86ISD::VBROADCAST_LOAD ||  in combineConstantPoolLoads()
51176         UserLd->getChain() == Chain && !User->hasAnyUseOfValue(1) &&  in combineConstantPoolLoads()
51177         User->getValueSizeInBits(0).getFixedValue() >  in combineConstantPoolLoads()
51179       EVT UserVT = User->getValueType(0);  in combineConstantPoolLoads()
51180       SDValue UserPtr = UserLd->getBasePtr();  in combineConstantPoolLoads()
51186         unsigned LdSize = LdC->getType()->getPrimitiveSizeInBits();  in combineConstantPoolLoads()
51187         unsigned UserSize = UserC->getType()->getPrimitiveSizeInBits();  in combineConstantPoolLoads()
51216   EVT RegVT = Ld->getValueType(0);  in combineLoad()
51217   EVT MemVT = Ld->getMemoryVT();  in combineLoad()
51221   // For chips with slow 32-byte unaligned loads, break the 32-byte operation  in combineLoad()
51222   // into two 16-byte operations. Also split non-temporal aligned loads on  in combineLoad()
51223   // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.  in combineLoad()
51224   ISD::LoadExtType Ext = Ld->getExtensionType();  in combineLoad()
51228       ((Ld->isNonTemporal() && !Subtarget.hasInt256() &&  in combineLoad()
51229         Ld->getAlign() >= Align(16)) ||  in combineLoad()
51231                                *Ld->getMemOperand(), &Fast) &&  in combineLoad()
51238     SDValue Ptr1 = Ld->getBasePtr();  in combineLoad()
51244         DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(),  in combineLoad()
51245                     Ld->getOriginalAlign(),  in combineLoad()
51246                     Ld->getMemOperand()->getFlags());  in combineLoad()
51247     SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2,  in combineLoad()
51248                                 Ld->getPointerInfo().getWithOffset(HalfOffset),  in combineLoad()
51249                                 Ld->getOriginalAlign(),  in combineLoad()
51250                                 Ld->getMemOperand()->getFlags());  in combineLoad()
51258   // Bool vector load - attempt to cast to an integer, as we have good  in combineLoad()
51265       SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(),  in combineLoad()
51266                                     Ld->getPointerInfo(),  in combineLoad()
51267                                     Ld->getOriginalAlign(),  in combineLoad()
51268                                     Ld->getMemOperand()->getFlags());  in combineLoad()
51276   if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() &&  in combineLoad()
51278     SDValue Ptr = Ld->getBasePtr();  in combineLoad()
51279     SDValue Chain = Ld->getChain();  in combineLoad()
51280     for (SDNode *User : Chain->uses()) {  in combineLoad()
51283           User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD &&  in combineLoad()
51284           UserLd->getChain() == Chain && UserLd->getBasePtr() == Ptr &&  in combineLoad()
51285           UserLd->getMemoryVT().getSizeInBits() == MemVT.getSizeInBits() &&  in combineLoad()
51286           !User->hasAnyUseOfValue(1) &&  in combineLoad()
51287           User->getValueSizeInBits(0).getFixedValue() >  in combineLoad()
51301   unsigned AddrSpace = Ld->getAddressSpace();  in combineLoad()
51305     if (PtrVT != Ld->getBasePtr().getSimpleValueType()) {  in combineLoad()
51307           DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0);  in combineLoad()
51308       return DAG.getExtLoad(Ext, dl, RegVT, Ld->getChain(), Cast,  in combineLoad()
51309                             Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(),  in combineLoad()
51310                             Ld->getMemOperand()->getFlags());  in combineLoad()
51319 /// Otherwise, return -1.
51329   if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1)  in getOneTrueElt()
51330     return -1;  in getOneTrueElt()
51332   int TrueIndex = -1;  in getOneTrueElt()
51333   unsigned NumElts = BV->getValueType(0).getVectorNumElements();  in getOneTrueElt()
51335     const SDValue &Op = BV->getOperand(i);  in getOneTrueElt()
51340       return -1;  in getOneTrueElt()
51341     if (ConstNode->getAPIntValue().countr_one() >= 1) {  in getOneTrueElt()
51344         return -1;  in getOneTrueElt()
51359   int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());  in getParamsForOneTrueMaskedElt()
51365   EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();  in getParamsForOneTrueMaskedElt()
51367   Addr = MaskedOp->getBasePtr();  in getParamsForOneTrueMaskedElt()
51375   Alignment = commonAlignment(MaskedOp->getOriginalAlign(),  in getParamsForOneTrueMaskedElt()
51380 /// If exactly one element of the mask is set for a non-extending masked load,
51382 /// Note: It is expected that the degenerate cases of an all-zeros or all-ones
51388   assert(ML->isUnindexed() && "Unexpected indexed masked load!");  in reduceMaskedLoadToScalarLoad()
51389   // TODO: This is not x86-specific, so it could be lifted to DAGCombiner.  in reduceMaskedLoadToScalarLoad()
51402   EVT VT = ML->getValueType(0);  in reduceMaskedLoadToScalarLoad()  local
51403   EVT EltVT = VT.getVectorElementType();  in reduceMaskedLoadToScalarLoad()
51405   EVT CastVT = VT;  in reduceMaskedLoadToScalarLoad()
51408     CastVT = VT.changeVectorElementType(EltVT);  in reduceMaskedLoadToScalarLoad()
51412       DAG.getLoad(EltVT, DL, ML->getChain(), Addr,  in reduceMaskedLoadToScalarLoad()
51413                   ML->getPointerInfo().getWithOffset(Offset),  in reduceMaskedLoadToScalarLoad()
51414                   Alignment, ML->getMemOperand()->getFlags());  in reduceMaskedLoadToScalarLoad()
51416   SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru());  in reduceMaskedLoadToScalarLoad()
51421   Insert = DAG.getBitcast(VT, Insert);  in reduceMaskedLoadToScalarLoad()
51428   assert(ML->isUnindexed() && "Unexpected indexed masked load!");  in combineMaskedLoadConstantMask()
51429   if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode()))  in combineMaskedLoadConstantMask()
51433   EVT VT = ML->getValueType(0);  in combineMaskedLoadConstantMask()  local
51438   unsigned NumElts = VT.getVectorNumElements();  in combineMaskedLoadConstantMask()
51439   BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask());  in combineMaskedLoadConstantMask()
51440   bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0));  in combineMaskedLoadConstantMask()
51441   bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1));  in combineMaskedLoadConstantMask()
51443     SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(),  in combineMaskedLoadConstantMask()
51444                                 ML->getMemOperand());  in combineMaskedLoadConstantMask()
51445     SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd,  in combineMaskedLoadConstantMask()
51446                                   ML->getPassThru());  in combineMaskedLoadConstantMask()
51452   // (for example, vblendvps -> vblendps).  in combineMaskedLoadConstantMask()
51454   // Don't try this if the pass-through operand is already undefined. That would  in combineMaskedLoadConstantMask()
51456   if (ML->getPassThru().isUndef())  in combineMaskedLoadConstantMask()
51459   if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode()))  in combineMaskedLoadConstantMask()
51462   // The new masked load has an undef pass-through operand. The select uses the  in combineMaskedLoadConstantMask()
51463   // original pass-through operand.  in combineMaskedLoadConstantMask()
51465       VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(),  in combineMaskedLoadConstantMask()
51466       DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(),  in combineMaskedLoadConstantMask()
51467       ML->getAddressingMode(), ML->getExtensionType());  in combineMaskedLoadConstantMask()
51468   SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML,  in combineMaskedLoadConstantMask()
51469                                 ML->getPassThru());  in combineMaskedLoadConstantMask()
51480   if (Mld->isExpandingLoad())  in combineMaskedLoad()
51483   if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {  in combineMaskedLoad()
51494   // If the mask value has been legalized to a non-boolean vector, try to  in combineMaskedLoad()
51496   SDValue Mask = Mld->getMask();  in combineMaskedLoad()
51498     EVT VT = Mld->getValueType(0);  in combineMaskedLoad()  local
51500     APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));  in combineMaskedLoad()
51502       if (N->getOpcode() != ISD::DELETED_NODE)  in combineMaskedLoad()
51509           VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(),  in combineMaskedLoad()
51510           NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(),  in combineMaskedLoad()
51511           Mld->getAddressingMode(), Mld->getExtensionType());  in combineMaskedLoad()
51517 /// If exactly one element of the mask is set for a non-truncating masked store,
51519 /// Note: It is expected that the degenerate cases of an all-zeros or all-ones
51524   // TODO: This is not x86-specific, so it could be lifted to DAGCombiner.  in reduceMaskedStoreToScalarStore()
51536   SDValue Value = MS->getValue();  in reduceMaskedStoreToScalarStore()
51537   EVT VT = Value.getValueType();  in reduceMaskedStoreToScalarStore()  local
51538   EVT EltVT = VT.getVectorElementType();  in reduceMaskedStoreToScalarStore()
51541     EVT CastVT = VT.changeVectorElementType(EltVT);  in reduceMaskedStoreToScalarStore()
51548   return DAG.getStore(MS->getChain(), DL, Extract, Addr,  in reduceMaskedStoreToScalarStore()
51549                       MS->getPointerInfo().getWithOffset(Offset),  in reduceMaskedStoreToScalarStore()
51550                       Alignment, MS->getMemOperand()->getFlags());  in reduceMaskedStoreToScalarStore()
51557   if (Mst->isCompressingStore())  in combineMaskedStore()
51560   EVT VT = Mst->getValue().getValueType();  in combineMaskedStore()  local
51564   if (Mst->isTruncatingStore())  in combineMaskedStore()
51570   // If the mask value has been legalized to a non-boolean vector, try to  in combineMaskedStore()
51572   SDValue Mask = Mst->getMask();  in combineMaskedStore()
51574     APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits()));  in combineMaskedStore()
51576       if (N->getOpcode() != ISD::DELETED_NODE)  in combineMaskedStore()
51582       return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(),  in combineMaskedStore()
51583                                 Mst->getBasePtr(), Mst->getOffset(), NewMask,  in combineMaskedStore()
51584                                 Mst->getMemoryVT(), Mst->getMemOperand(),  in combineMaskedStore()
51585                                 Mst->getAddressingMode());  in combineMaskedStore()
51588   SDValue Value = Mst->getValue();  in combineMaskedStore()
51589   if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() &&  in combineMaskedStore()
51591                             Mst->getMemoryVT())) {  in combineMaskedStore()
51592     return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0),  in combineMaskedStore()
51593                               Mst->getBasePtr(), Mst->getOffset(), Mask,  in combineMaskedStore()
51594                               Mst->getMemoryVT(), Mst->getMemOperand(),  in combineMaskedStore()
51595                               Mst->getAddressingMode(), true);  in combineMaskedStore()
51605   EVT StVT = St->getMemoryVT();  in combineStore()
51607   SDValue StoredVal = St->getValue();  in combineStore()
51608   EVT VT = StoredVal.getValueType();  in combineStore()  local
51612   if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() &&  in combineStore()
51613       VT.getVectorElementType() == MVT::i1) {  in combineStore()
51615     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());  in combineStore()
51618     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),  in combineStore()
51619                         St->getPointerInfo(), St->getOriginalAlign(),  in combineStore()
51620                         St->getMemOperand()->getFlags());  in combineStore()
51624   // This will avoid a copy to k-register.  in combineStore()
51625   if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&  in combineStore()
51631     return DAG.getStore(St->getChain(), dl, Val,  in combineStore()
51632                         St->getBasePtr(), St->getPointerInfo(),  in combineStore()
51633                         St->getOriginalAlign(),  in combineStore()
51634                         St->getMemOperand()->getFlags());  in combineStore()
51638   if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&  in combineStore()
51640     unsigned NumConcats = 8 / VT.getVectorNumElements();  in combineStore()
51642     SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT));  in combineStore()
51645     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),  in combineStore()
51646                         St->getPointerInfo(), St->getOriginalAlign(),  in combineStore()
51647                         St->getMemOperand()->getFlags());  in combineStore()
51651   if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||  in combineStore()
51652        VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&  in combineStore()
51654     // If its a v64i1 store without 64-bit support, we need two stores.  in combineStore()
51655     if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) {  in combineStore()
51657                                       StoredVal->ops().slice(0, 32));  in combineStore()
51660                                       StoredVal->ops().slice(32, 32));  in combineStore()
51663       SDValue Ptr0 = St->getBasePtr();  in combineStore()
51667           DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),  in combineStore()
51668                        St->getOriginalAlign(),  in combineStore()
51669                        St->getMemOperand()->getFlags());  in combineStore()
51671           DAG.getStore(St->getChain(), dl, Hi, Ptr1,  in combineStore()
51672                        St->getPointerInfo().getWithOffset(4),  in combineStore()
51673                        St->getOriginalAlign(),  in combineStore()
51674                        St->getMemOperand()->getFlags());  in combineStore()
51679     return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),  in combineStore()
51680                         St->getPointerInfo(), St->getOriginalAlign(),  in combineStore()
51681                         St->getMemOperand()->getFlags());  in combineStore()
51684   // If we are saving a 32-byte vector and 32-byte stores are slow, such as on  in combineStore()
51685   // Sandy Bridge, perform two 16-byte stores.  in combineStore()
51687   if (VT.is256BitVector() && StVT == VT &&  in combineStore()
51688       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,  in combineStore()
51689                              *St->getMemOperand(), &Fast) &&  in combineStore()
51691     unsigned NumElems = VT.getVectorNumElements();  in combineStore()
51698   // Split under-aligned vector non-temporal stores.  in combineStore()
51699   if (St->isNonTemporal() && StVT == VT &&  in combineStore()
51700       St->getAlign().value() < VT.getStoreSize()) {  in combineStore()
51701     // ZMM/YMM nt-stores - either it can be stored as a series of shorter  in combineStore()
51703     if (VT.is256BitVector() || VT.is512BitVector()) {  in combineStore()
51704       unsigned NumElems = VT.getVectorNumElements();  in combineStore()
51710     // XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64  in combineStore()
51712     if (VT.is128BitVector() && Subtarget.hasSSE2()) {  in combineStore()
51720   // Try to optimize v16i16->v16i8 truncating stores when BWI is not  in combineStore()
51722   if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() &&  in combineStore()
51723       St->getValue().getOpcode() == ISD::TRUNCATE &&  in combineStore()
51724       St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&  in combineStore()
51726       St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {  in combineStore()
51728                               St->getValue().getOperand(0));  in combineStore()
51729     return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),  in combineStore()
51730                              MVT::v16i8, St->getMemOperand());  in combineStore()
51734   if (!St->isTruncatingStore() &&  in combineStore()
51738       TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {  in combineStore()
51740     return EmitTruncSStore(IsSigned, St->getChain(),  in combineStore()
51741                            dl, StoredVal.getOperand(0), St->getBasePtr(),  in combineStore()
51742                            VT, St->getMemOperand(), DAG);  in combineStore()
51746   if (!St->isTruncatingStore()) {  in combineStore()
51766         if (NumTruncBits == VT.getSizeInBits() &&  in combineStore()
51768           return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(),  in combineStore()
51769                                    TruncVT, St->getMemOperand());  in combineStore()
51778   if (St->isTruncatingStore() && VT.isVector()) {  in combineStore()
51779     if (TLI.isTruncStoreLegal(VT, StVT)) {  in combineStore()
51780       if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT()))  in combineStore()
51781         return EmitTruncSStore(true /* Signed saturation */, St->getChain(),  in combineStore()
51782                                dl, Val, St->getBasePtr(),  in combineStore()
51783                                St->getMemoryVT(), St->getMemOperand(), DAG);  in combineStore()
51784       if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(),  in combineStore()
51786         return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),  in combineStore()
51787                                dl, Val, St->getBasePtr(),  in combineStore()
51788                                St->getMemoryVT(), St->getMemOperand(), DAG);  in combineStore()
51795   unsigned AddrSpace = St->getAddressSpace();  in combineStore()
51799     if (PtrVT != St->getBasePtr().getSimpleValueType()) {  in combineStore()
51801           DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0);  in combineStore()
51803           St->getChain(), dl, StoredVal, Cast, St->getPointerInfo(), StVT,  in combineStore()
51804           St->getOriginalAlign(), St->getMemOperand()->getFlags(),  in combineStore()
51805           St->getAAInfo());  in combineStore()
51809   // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering  in combineStore()
51814   // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.  in combineStore()
51815   if (VT.getSizeInBits() != 64)  in combineStore()
51826   if (VT == MVT::i64 && isa<LoadSDNode>(St->getValue()) &&  in combineStore()
51827       cast<LoadSDNode>(St->getValue())->isSimple() &&  in combineStore()
51828       St->getChain().hasOneUse() && St->isSimple()) {  in combineStore()
51829     auto *Ld = cast<LoadSDNode>(St->getValue());  in combineStore()
51835     if (!Ld->hasNUsesOfValue(1, 0))  in combineStore()
51841     SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(),  in combineStore()
51842                                 Ld->getBasePtr(), Ld->getMemOperand());  in combineStore()
51846     return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(),  in combineStore()
51847                         St->getMemOperand());  in combineStore()
51850   // This is similar to the above case, but here we handle a scalar 64-bit  in combineStore()
51851   // integer store that is extracted from a vector on a 32-bit target.  in combineStore()
51852   // If we have SSE2, then we can treat it like a floating-point double  in combineStore()
51856   if (VT == MVT::i64 &&  in combineStore()
51857       St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) {  in combineStore()
51858     SDValue OldExtract = St->getOperand(1);  in combineStore()
51865     return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(),  in combineStore()
51866                         St->getPointerInfo(), St->getOriginalAlign(),  in combineStore()
51867                         St->getMemOperand()->getFlags());  in combineStore()
51878   SDValue StoredVal = N->getOperand(1);  in combineVEXTRACT_STORE()
51879   MVT VT = StoredVal.getSimpleValueType();  in combineVEXTRACT_STORE()  local
51880   EVT MemVT = St->getMemoryVT();  in combineVEXTRACT_STORE()
51883   unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits();  in combineVEXTRACT_STORE()
51884   APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts);  in combineVEXTRACT_STORE()
51888     if (N->getOpcode() != ISD::DELETED_NODE)  in combineVEXTRACT_STORE()
51905 ///   A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
51907 /// A horizontal-op B, for some already available A and B, and if so then LHS is
51925   // which is A horizontal-op B.  in isHorizontalBinOp()
51927   MVT VT = LHS.getSimpleValueType();  in isHorizontalBinOp()  local
51928   assert((VT.is128BitVector() || VT.is256BitVector()) &&  in isHorizontalBinOp()
51930   unsigned NumElts = VT.getVectorNumElements();  in isHorizontalBinOp()
51967   //   LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>  in isHorizontalBinOp()
51968   // NOTE: A default initialized SDValue represents an UNDEF of type VT.  in isHorizontalBinOp()
52024   // AVX defines horizontal add/sub to operate independently on 128-bit lanes,  in isHorizontalBinOp()
52025   // so we just repeat the inner loop if this is a 256-bit op.  in isHorizontalBinOp()
52026   unsigned Num128BitChunks = VT.getSizeInBits() / 128;  in isHorizontalBinOp()
52046       // Compute the post-shuffle mask index based on where the element  in isHorizontalBinOp()
52050                   ((Base % NumElts) & ~(NumEltsPer128BitChunk - 1));  in isHorizontalBinOp()
52052       // The  low half of the 128-bit result must choose from A.  in isHorizontalBinOp()
52053       // The high half of the 128-bit result must choose from B,  in isHorizontalBinOp()
52069   // Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split).  in isHorizontalBinOp()
52070   if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() &&  in isHorizontalBinOp()
52071       isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask))  in isHorizontalBinOp()
52077     return User->getOpcode() == HOpcode && User->getValueType(0) == VT;  in isHorizontalBinOp()
52080       ForceHorizOp || (llvm::any_of(NewLHS->uses(), FoundHorizUser) &&  in isHorizontalBinOp()
52081                        llvm::any_of(NewRHS->uses(), FoundHorizUser));  in isHorizontalBinOp()
52091   LHS = DAG.getBitcast(VT, NewLHS);  in isHorizontalBinOp()
52092   RHS = DAG.getBitcast(VT, NewRHS);  in isHorizontalBinOp()
52099   EVT VT = N->getValueType(0);  in combineToHorizontalAddSub()  local
52100   unsigned Opcode = N->getOpcode();  in combineToHorizontalAddSub()
52105     return N->hasOneUse() &&  in combineToHorizontalAddSub()
52106            N->use_begin()->getOpcode() == ISD::VECTOR_SHUFFLE &&  in combineToHorizontalAddSub()
52107            (N->use_begin()->getOperand(0).getOpcode() == HorizOpcode ||  in combineToHorizontalAddSub()
52108             N->use_begin()->getOperand(1).getOpcode() == HorizOpcode);  in combineToHorizontalAddSub()
52114     if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||  in combineToHorizontalAddSub()
52115         (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {  in combineToHorizontalAddSub()
52116       SDValue LHS = N->getOperand(0);  in combineToHorizontalAddSub()
52117       SDValue RHS = N->getOperand(1);  in combineToHorizontalAddSub()
52121         SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);  in combineToHorizontalAddSub()
52123           HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,  in combineToHorizontalAddSub()
52124                                             DAG.getUNDEF(VT), PostShuffleMask);  in combineToHorizontalAddSub()
52131     if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 ||  in combineToHorizontalAddSub()
52132                                  VT == MVT::v16i16 || VT == MVT::v8i32)) {  in combineToHorizontalAddSub()
52133       SDValue LHS = N->getOperand(0);  in combineToHorizontalAddSub()
52134       SDValue RHS = N->getOperand(1);  in combineToHorizontalAddSub()
52142         SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,  in combineToHorizontalAddSub()
52145           HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,  in combineToHorizontalAddSub()
52146                                             DAG.getUNDEF(VT), PostShuffleMask);  in combineToHorizontalAddSub()
52158 //    <i32 -2147483648[float -0.000000e+00]> 0
52160 //    <(load 4 from constant-pool)> t0, t29
52171   EVT VT = N->getValueType(0);  in combineFMulcFCMulc()  local
52172   SDValue LHS = N->getOperand(0);  in combineFMulcFCMulc()
52173   SDValue RHS = N->getOperand(1);  in combineFMulcFCMulc()
52175       N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;  in combineFMulcFCMulc()
52177     if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) {  in combineFMulcFCMulc()
52179       if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) {  in combineFMulcFCMulc()
52189             SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));  in combineFMulcFCMulc()
52190             SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);  in combineFMulcFCMulc()
52191             r = DAG.getBitcast(VT, FCMulC);  in combineFMulcFCMulc()
52228   if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() ||  in combineFaddCFmul()
52229       !AllowContract(N->getFlags()))  in combineFaddCFmul()
52232   EVT VT = N->getValueType(0);  in combineFaddCFmul()  local
52233   if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)  in combineFaddCFmul()
52236   SDValue LHS = N->getOperand(0);  in combineFaddCFmul()
52237   SDValue RHS = N->getOperand(1);  in combineFaddCFmul()
52242                        &HasNoSignedZero](SDValue N) -> bool {  in combineFaddCFmul()
52247     if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) {  in combineFaddCFmul()
52255           ((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&  in combineFaddCFmul()
52256             HasNoSignedZero(Op0->getFlags())) ||  in combineFaddCFmul()
52257            IsVectorAllNegativeZero(Op0->getOperand(2)))) {  in combineFaddCFmul()
52274   MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);  in combineFaddCFmul()
52280       DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags());  in combineFaddCFmul()
52281   return DAG.getBitcast(VT, CFmul);  in combineFaddCFmul()
52284 /// Do target-specific dag combines on floating-point adds/subs.
52298   EVT VT = N->getValueType(0);  in combineLRINT_LLRINT()  local
52299   SDValue Src = N->getOperand(0);  in combineLRINT_LLRINT()
52303   if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 ||  in combineLRINT_LLRINT()
52307   return DAG.getNode(X86ISD::CVTP2SI, DL, VT,  in combineLRINT_LLRINT()
52312 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
52314 /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
52320   assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");  in combineTruncatedArithmetic()
52321   SDValue Src = N->getOperand(0);  in combineTruncatedArithmetic()
52325   EVT VT = N->getValueType(0);  in combineTruncatedArithmetic()  local
52328   auto IsFreeTruncation = [VT](SDValue Op) {  in combineTruncatedArithmetic()
52329     unsigned TruncSizeInBits = VT.getScalarSizeInBits();  in combineTruncatedArithmetic()
52348     SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);  in combineTruncatedArithmetic()
52349     SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);  in combineTruncatedArithmetic()
52350     return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);  in combineTruncatedArithmetic()
52359   if (!VT.isVector())  in combineTruncatedArithmetic()
52362   // In most cases its only worth pre-truncating if we're only facing the cost  in combineTruncatedArithmetic()
52367     // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its  in combineTruncatedArithmetic()
52370         TLI.isOperationLegal(SrcOpcode, VT) &&  in combineTruncatedArithmetic()
52381     if (TLI.isOperationLegal(SrcOpcode, VT) &&  in combineTruncatedArithmetic()
52398 static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,  in combinePMULH()  argument
52408   // Only handle vXi16 types that are at least 128-bits unless they will be  in combinePMULH()
52410   if (!VT.isVector() || VT.getVectorElementType() != MVT::i16)  in combinePMULH()
52427   // Count leading sign/zero bits on both inputs - if there are enough then  in combinePMULH()
52428   // truncation back to vXi16 will be cheap - either as a pack/shuffle  in combinePMULH()
52457       !(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) &&  in combinePMULH()
52463     return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));  in combinePMULH()
52467   LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS);  in combinePMULH()
52468   RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);  in combinePMULH()
52471   return DAG.getNode(Opc, DL, VT, LHS, RHS);  in combinePMULH()
52476 // adjacent pairs of 16-bit products, and saturates the result before
52477 // truncating to 16-bits.
52482 static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,  in detectPMADDUBSW()  argument
52485   if (!VT.isVector() || !Subtarget.hasSSSE3())  in detectPMADDUBSW()
52488   unsigned NumElems = VT.getVectorNumElements();  in detectPMADDUBSW()
52489   EVT ScalarVT = VT.getVectorElementType();  in detectPMADDUBSW()
52493   SDValue SSatVal = detectSSatPattern(In, VT);  in detectPMADDUBSW()
52570     unsigned IdxN00 = ConstN00Elt->getZExtValue();  in detectPMADDUBSW()
52571     unsigned IdxN01 = ConstN01Elt->getZExtValue();  in detectPMADDUBSW()
52572     unsigned IdxN10 = ConstN10Elt->getZExtValue();  in detectPMADDUBSW()
52573     unsigned IdxN11 = ConstN11Elt->getZExtValue();  in detectPMADDUBSW()
52620   return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },  in detectPMADDUBSW()
52626   EVT VT = N->getValueType(0);  in combineTruncate()  local
52627   SDValue Src = N->getOperand(0);  in combineTruncate()
52630   // Attempt to pre-truncate inputs to arithmetic ops instead.  in combineTruncate()
52635   if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))  in combineTruncate()
52639   if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))  in combineTruncate()
52643   if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))  in combineTruncate()
52648   if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {  in combineTruncate()
52655   if (Src.getOpcode() == ISD::LRINT && VT.getScalarType() == MVT::i32 &&  in combineTruncate()
52657     return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0));  in combineTruncate()
52664   EVT VT = N->getValueType(0);  in combineVTRUNC()  local
52665   SDValue In = N->getOperand(0);  in combineVTRUNC()
52668   if (SDValue SSatVal = detectSSatPattern(In, VT))  in combineVTRUNC()
52669     return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);  in combineVTRUNC()
52670   if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL))  in combineVTRUNC()
52671     return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);  in combineVTRUNC()
52674   APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));  in combineVTRUNC()
52683 /// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
52691   if (N->getOpcode() == ISD::FNEG)  in isFNEG()
52692     return N->getOperand(0);  in isFNEG()
52698   unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();  in isFNEG()
52701   EVT VT = Op->getValueType(0);  in isFNEG()  local
52704   if (VT.getScalarSizeInBits() != ScalarSize)  in isFNEG()
52711     // of this is VECTOR_SHUFFLE(-VEC1, UNDEF).  The mask can be anything here.  in isFNEG()
52715       if (NegOp0.getValueType() == VT) // FIXME: Can we do better?  in isFNEG()
52716         return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT),  in isFNEG()
52717                                     cast<ShuffleVectorSDNode>(Op)->getMask());  in isFNEG()
52722     // -V, INDEX).  in isFNEG()
52728       if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME  in isFNEG()
52729         return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,  in isFNEG()
52757       // Only allow bitcast from correctly-sized constant.  in isFNEG()
52773     // clang-format off  in negateFMAOpcode()
52787     // clang-format on  in negateFMAOpcode()
52793     // clang-format off  in negateFMAOpcode()
52811     // clang-format on  in negateFMAOpcode()
52818     // clang-format off  in negateFMAOpcode()
52828     // clang-format on  in negateFMAOpcode()
52835 /// Do target-specific dag combines on floating point negations.
52839   EVT OrigVT = N->getValueType(0);  in combineFneg()
52845   EVT VT = Arg.getValueType();  in combineFneg()  local
52846   EVT SVT = VT.getScalarType();  in combineFneg()
52850   if (!TLI.isTypeLegal(VT))  in combineFneg()
52854   // use of a constant by performing (-0 - A*B) instead.  in combineFneg()
52857       Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) {  in combineFneg()
52858     SDValue Zero = DAG.getConstantFP(0.0, DL, VT);  in combineFneg()
52859     SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),  in combineFneg()
52884   EVT VT = Op.getValueType();  in getNegatedExpression()  local
52885   EVT SVT = VT.getScalarType();  in getNegatedExpression()
52887   SDNodeFlags Flags = Op.getNode()->getFlags();  in getNegatedExpression()
52897     if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||  in getNegatedExpression()
52899         !isOperationLegal(ISD::FMA, VT))  in getNegatedExpression()
52922     // Fill in the non-negated ops with the original values.  in getNegatedExpression()
52926     return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps);  in getNegatedExpression()
52932       return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0);  in getNegatedExpression()
52942   MVT VT = N->getSimpleValueType(0);  in lowerX86FPLogicOp()  local
52944   if (!VT.isVector() || !Subtarget.hasSSE2())  in lowerX86FPLogicOp()
52949   unsigned IntBits = VT.getScalarSizeInBits();  in lowerX86FPLogicOp()
52951   MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits);  in lowerX86FPLogicOp()
52953   SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));  in lowerX86FPLogicOp()
52954   SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));  in lowerX86FPLogicOp()
52956   switch (N->getOpcode()) {  in lowerX86FPLogicOp()
52957   // clang-format off  in lowerX86FPLogicOp()
52963   // clang-format on  in lowerX86FPLogicOp()
52966   return DAG.getBitcast(VT, IntOp);  in lowerX86FPLogicOp()
52970 /// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
52972   if (N->getOpcode() != ISD::XOR)  in foldXor1SetCC()
52975   SDValue LHS = N->getOperand(0);  in foldXor1SetCC()
52976   if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC)  in foldXor1SetCC()
52980       X86::CondCode(LHS->getConstantOperandVal(0)));  in foldXor1SetCC()
52982   return getSETCC(NewCC, LHS->getOperand(1), DL, DAG);  in foldXor1SetCC()
52987   assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::SUB) &&  in combineXorSubCTLZ()
52992   EVT VT = N->getValueType(0);  in combineXorSubCTLZ()  local
52993   if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 &&  in combineXorSubCTLZ()
52994       (VT != MVT::i64 || !Subtarget.is64Bit()))  in combineXorSubCTLZ()
52997   SDValue N0 = N->getOperand(0);  in combineXorSubCTLZ()
52998   SDValue N1 = N->getOperand(1);  in combineXorSubCTLZ()
53010   } else if (N->getOpcode() == ISD::SUB) {  in combineXorSubCTLZ()
53023   if (C->getZExtValue() != uint64_t(OpCTLZ.getValueSizeInBits() - 1))  in combineXorSubCTLZ()
53025   EVT OpVT = VT;  in combineXorSubCTLZ()
53027   if (VT == MVT::i8) {  in combineXorSubCTLZ()
53035   if (VT == MVT::i8)  in combineXorSubCTLZ()
53044   SDValue N0 = N->getOperand(0);  in combineXor()
53045   SDValue N1 = N->getOperand(1);  in combineXor()
53046   EVT VT = N->getValueType(0);  in combineXor()  local
53050   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {  in combineXor()
53087   // Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs.  in combineXor()
53094         VT, DAG.getNOT(DL, N0.getOperand(0), N0.getOperand(0).getValueType()));  in combineXor()
53098   // Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub))  in combineXor()
53099   if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() &&  in combineXor()
53100       VT.getVectorElementType() == MVT::i1 &&  in combineXor()
53104         ISD::INSERT_SUBVECTOR, DL, VT, N0.getOperand(0),  in combineXor()
53109   // Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2))  in combineXor()
53110   // Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2))  in combineXor()
53113       N0.getOperand(0).getOpcode() == N->getOpcode()) {  in combineXor()
53117     if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) {  in combineXor()
53118       SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT);  in combineXor()
53119       SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT);  in combineXor()
53120       return DAG.getNode(ISD::XOR, DL, VT, LHS,  in combineXor()
53121                          DAG.getNode(ISD::XOR, DL, VT, RHS, N1));  in combineXor()
53134   SDValue N0 = N->getOperand(0);  in combineBITREVERSE()
53135   EVT VT = N->getValueType(0);  in combineBITREVERSE()  local
53137   // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X)))  in combineBITREVERSE()
53138   if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {  in combineBITREVERSE()
53148         ReverseMask[I] = (NumElts - 1) - I;  in combineBITREVERSE()
53151       return DAG.getBitcast(VT, Rev);  in combineBITREVERSE()
53162   unsigned Opcode = N->getOpcode();  in combineAVG()
53163   SDValue N0 = N->getOperand(0);  in combineAVG()
53164   SDValue N1 = N->getOperand(1);  in combineAVG()
53165   EVT VT = N->getValueType(0);  in combineAVG()  local
53166   EVT SVT = VT.getScalarType();  in combineAVG()
53169   // avgceils(x,y) -> flipsign(avgceilu(flipsign(x),flipsign(y)))  in combineAVG()
53171   if (Opcode == ISD::AVGCEILS && VT.isVector() && SVT == MVT::i8) {  in combineAVG()
53172     APInt SignBit = APInt::getSignMask(VT.getScalarSizeInBits());  in combineAVG()
53173     SDValue SignMask = DAG.getConstant(SignBit, DL, VT);  in combineAVG()
53174     N0 = DAG.getNode(ISD::XOR, DL, VT, N0, SignMask);  in combineAVG()
53175     N1 = DAG.getNode(ISD::XOR, DL, VT, N1, SignMask);  in combineAVG()
53176     return DAG.getNode(ISD::XOR, DL, VT,  in combineAVG()
53177                        DAG.getNode(ISD::AVGCEILU, DL, VT, N0, N1), SignMask);  in combineAVG()
53186   EVT VT = N->getValueType(0);  in combineBEXTR()  local
53187   unsigned NumBits = VT.getSizeInBits();  in combineBEXTR()
53189   // TODO - Constant Folding.  in combineBEXTR()
53208 /// to be used as a replacement operand with operations (eg, bitwise-and) where
53223   SDValue N0 = N->getOperand(0);  in combineFAndFNotToFAndn()
53224   SDValue N1 = N->getOperand(1);  in combineFAndFNotToFAndn()
53225   EVT VT = N->getValueType(0);  in combineFAndFNotToFAndn()  local
53229   if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||  in combineFAndFNotToFAndn()
53230         (VT == MVT::f64 && Subtarget.hasSSE2()) ||  in combineFAndFNotToFAndn()
53231         (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2())))  in combineFAndFNotToFAndn()
53238     return C && C->getConstantFPValue()->isAllOnesValue();  in combineFAndFNotToFAndn()
53241   // fand (fxor X, -1), Y --> fandn X, Y  in combineFAndFNotToFAndn()
53243     return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);  in combineFAndFNotToFAndn()
53245   // fand X, (fxor Y, -1) --> fandn Y, X  in combineFAndFNotToFAndn()
53247     return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);  in combineFAndFNotToFAndn()
53252 /// Do target-specific dag combines on X86ISD::FAND nodes.
53255   // FAND(0.0, x) -> 0.0  in combineFAnd()
53256   if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))  in combineFAnd()
53259   // FAND(x, 0.0) -> 0.0  in combineFAnd()
53260   if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))  in combineFAnd()
53269 /// Do target-specific dag combines on X86ISD::FANDN nodes.
53272   // FANDN(0.0, x) -> x  in combineFAndn()
53273   if (isNullFPScalarOrVectorConst(N->getOperand(0)))  in combineFAndn()
53274     return N->getOperand(1);  in combineFAndn()
53276   // FANDN(x, 0.0) -> 0.0  in combineFAndn()
53277   if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))  in combineFAndn()
53283 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
53287   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);  in combineFOr()
53289   // F[X]OR(0.0, x) -> x  in combineFOr()
53290   if (isNullFPScalarOrVectorConst(N->getOperand(0)))  in combineFOr()
53291     return N->getOperand(1);  in combineFOr()
53293   // F[X]OR(x, 0.0) -> x  in combineFOr()
53294   if (isNullFPScalarOrVectorConst(N->getOperand(1)))  in combineFOr()
53295     return N->getOperand(0);  in combineFOr()
53303 /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
53305   assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);  in combineFMinFMax()
53312   // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes  in combineFMinFMax()
53315   switch (N->getOpcode()) {  in combineFMinFMax()
53321   return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),  in combineFMinFMax()
53322                      N->getOperand(0), N->getOperand(1));  in combineFMinFMax()
53327   EVT VT = N->getValueType(0);  in combineFMinNumFMaxNum()  local
53328   if (Subtarget.useSoftFloat() || isSoftF16(VT, Subtarget))  in combineFMinNumFMaxNum()
53333   if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||  in combineFMinNumFMaxNum()
53334         (Subtarget.hasSSE2() && VT == MVT::f64) ||  in combineFMinNumFMaxNum()
53335         (Subtarget.hasFP16() && VT == MVT::f16) ||  in combineFMinNumFMaxNum()
53336         (VT.isVector() && TLI.isTypeLegal(VT))))  in combineFMinNumFMaxNum()
53339   SDValue Op0 = N->getOperand(0);  in combineFMinNumFMaxNum()
53340   SDValue Op1 = N->getOperand(1);  in combineFMinNumFMaxNum()
53342   auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;  in combineFMinNumFMaxNum()
53346   if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())  in combineFMinNumFMaxNum()
53347     return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());  in combineFMinNumFMaxNum()
53349   // If one of the operands is known non-NaN use the native min/max instructions  in combineFMinNumFMaxNum()
53350   // with the non-NaN input as second operand.  in combineFMinNumFMaxNum()
53352     return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());  in combineFMinNumFMaxNum()
53354     return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());  in combineFMinNumFMaxNum()
53358   if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())  in combineFMinNumFMaxNum()
53362                                          VT);  in combineFMinNumFMaxNum()
53368   //            ----------------  in combineFMinNumFMaxNum()
53370   // Op0        ----------------  in combineFMinNumFMaxNum()
53372   //            ----------------  in combineFMinNumFMaxNum()
53383   SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);  in combineFMinNumFMaxNum()
53388   return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);  in combineFMinNumFMaxNum()
53393   EVT VT = N->getValueType(0);  in combineX86INT_TO_FP()  local
53396   APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());  in combineX86INT_TO_FP()
53401   SDValue In = N->getOperand(0);  in combineX86INT_TO_FP()
53403   if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&  in combineX86INT_TO_FP()
53405     assert(InVT.is128BitVector() && "Expected 128-bit input vector");  in combineX86INT_TO_FP()
53406     LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0));  in combineX86INT_TO_FP()
53407     unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();  in combineX86INT_TO_FP()
53412       SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,  in combineX86INT_TO_FP()
53426   bool IsStrict = N->isTargetStrictFPOpcode();  in combineCVTP2I_CVTTP2I()
53427   EVT VT = N->getValueType(0);  in combineCVTP2I_CVTTP2I()  local
53430   SDValue In = N->getOperand(IsStrict ? 1 : 0);  in combineCVTP2I_CVTTP2I()
53432   if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&  in combineCVTP2I_CVTTP2I()
53434     assert(InVT.is128BitVector() && "Expected 128-bit input vector");  in combineCVTP2I_CVTTP2I()
53436     unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements();  in combineCVTP2I_CVTTP2I()
53443             DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},  in combineCVTP2I_CVTTP2I()
53444                         {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});  in combineCVTP2I_CVTTP2I()
53448             DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));  in combineCVTP2I_CVTTP2I()
53460 /// Do target-specific dag combines on X86ISD::ANDNP nodes.
53464   SDValue N0 = N->getOperand(0);  in combineAndnp()
53465   SDValue N1 = N->getOperand(1);  in combineAndnp()
53466   MVT VT = N->getSimpleValueType(0);  in combineAndnp()  local
53467   int NumElts = VT.getVectorNumElements();  in combineAndnp()
53468   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in combineAndnp()
53471   // ANDNP(undef, x) -> 0  in combineAndnp()
53472   // ANDNP(x, undef) -> 0  in combineAndnp()
53474     return DAG.getConstant(0, DL, VT);  in combineAndnp()
53476   // ANDNP(0, x) -> x  in combineAndnp()
53480   // ANDNP(x, 0) -> 0  in combineAndnp()
53482     return DAG.getConstant(0, DL, VT);  in combineAndnp()
53484   // ANDNP(x, -1) -> NOT(x) -> XOR(x, -1)  in combineAndnp()
53486     return DAG.getNOT(DL, N0, VT);  in combineAndnp()
53490     return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);  in combineAndnp()
53493   // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).  in combineAndnp()
53494   if (N1->hasOneUse())  in combineAndnp()
53497           DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);  in combineAndnp()
53511       return getConstVector(ResultBits, VT, DAG, DL);  in combineAndnp()
53517     if (N0->hasOneUse()) {  in combineAndnp()
53522         SDValue Not = getConstVector(EltBits0, VT, DAG, DL);  in combineAndnp()
53523         return DAG.getNode(ISD::AND, DL, VT, Not, N1);  in combineAndnp()
53529   if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {  in combineAndnp()
53547             // We can't assume an undef src element gives an undef dst - the  in combineAndnp()
53570       if (N->getOpcode() != ISD::DELETED_NODE)  in combineAndnp()
53581   SDValue N1 = N->getOperand(1);  in combineBT()
53583   // BT ignores high bits in the bit index operand.  in combineBT()
53587     if (N->getOpcode() != ISD::DELETED_NODE)  in combineBT()
53597   bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS;  in combineCVTPH2PS()
53598   SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in combineCVTPH2PS()
53600   if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) {  in combineCVTPH2PS()
53604       if (N->getOpcode() != ISD::DELETED_NODE)  in combineCVTPH2PS()
53611       LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0));  in combineCVTPH2PS()
53616               N->getOpcode(), dl, {MVT::v4f32, MVT::Other},  in combineCVTPH2PS()
53617               {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)});  in combineCVTPH2PS()
53620           SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32,  in combineCVTPH2PS()
53637   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);  in combineSextInRegCmov()
53639   EVT DstVT = N->getValueType(0);  in combineSextInRegCmov()
53641   SDValue N0 = N->getOperand(0);  in combineSextInRegCmov()
53642   SDValue N1 = N->getOperand(1);  in combineSextInRegCmov()
53643   EVT ExtraVT = cast<VTSDNode>(N1)->getVT();  in combineSextInRegCmov()
53699   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);  in combineSignExtendInReg()
53704   EVT VT = N->getValueType(0);  in combineSignExtendInReg()  local
53705   SDValue N0 = N->getOperand(0);  in combineSignExtendInReg()
53706   SDValue N1 = N->getOperand(1);  in combineSignExtendInReg()
53707   EVT ExtraVT = cast<VTSDNode>(N1)->getVT();  in combineSignExtendInReg()
53711   // both SSE and AVX2 since there is no sign-extended shift right  in combineSignExtendInReg()
53712   // operation on a vector with 64-bit elements.  in combineSignExtendInReg()
53713   //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->  in combineSignExtendInReg()
53715   if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||  in combineSignExtendInReg()
53728       return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1);  in combineSignExtendInReg()
53739 /// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
53740 /// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
53746   if (Ext->getOpcode() != ISD::SIGN_EXTEND &&  in promoteExtBeforeAdd()
53747       Ext->getOpcode() != ISD::ZERO_EXTEND)  in promoteExtBeforeAdd()
53751   EVT VT = Ext->getValueType(0);  in promoteExtBeforeAdd()  local
53752   if (VT != MVT::i64)  in promoteExtBeforeAdd()
53755   SDValue Add = Ext->getOperand(0);  in promoteExtBeforeAdd()
53761   bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;  in promoteExtBeforeAdd()
53762   bool NSW = Add->getFlags().hasNoSignedWrap();  in promoteExtBeforeAdd()
53763   bool NUW = Add->getFlags().hasNoUnsignedWrap();  in promoteExtBeforeAdd()
53783   // currently has a high threshold.  in promoteExtBeforeAdd()
53785   for (auto *User : Ext->uses()) {  in promoteExtBeforeAdd()
53786     if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {  in promoteExtBeforeAdd()
53795   int64_t AddC = Sext ? AddOp1C->getSExtValue() : AddOp1C->getZExtValue();  in promoteExtBeforeAdd()
53796   SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);  in promoteExtBeforeAdd()
53797   SDValue NewConstant = DAG.getConstant(AddC, SDLoc(Add), VT);  in promoteExtBeforeAdd()
53800   // sign-extended.  in promoteExtBeforeAdd()
53804   return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);  in promoteExtBeforeAdd()
53808 // operands and the result of CMOV is not used anywhere else - promote CMOV
53811 //        (or more) pseudo-CMOVs only when they go one-after-another and
53815 //     3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this
53816 //        promotion is also good in terms of code-size.
53817 //        (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit
53820   SDValue CMovN = Extend->getOperand(0);  in combineToExtendCMOV()
53824   EVT TargetVT = Extend->getValueType(0);  in combineToExtendCMOV()
53825   unsigned ExtendOpcode = Extend->getOpcode();  in combineToExtendCMOV()
53828   EVT VT = CMovN.getValueType();  in combineToExtendCMOV()  local
53842   if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32))  in combineToExtendCMOV()
53868   SDValue N0 = N->getOperand(0);  in combineExtSetcc()
53869   EVT VT = N->getValueType(0);  in combineExtSetcc()  local
53873   if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC)  in combineExtSetcc()
53877   EVT SVT = VT.getVectorElementType();  in combineExtSetcc()
53886   unsigned Size = VT.getSizeInBits();  in combineExtSetcc()
53892   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();  in combineExtSetcc()
53902   SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);  in combineExtSetcc()
53904   if (N->getOpcode() == ISD::ZERO_EXTEND)  in combineExtSetcc()
53913   SDValue N0 = N->getOperand(0);  in combineSext()
53914   EVT VT = N->getValueType(0);  in combineSext()  local
53917   // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))  in combineSext()
53920     SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0),  in combineSext()
53921                                  N0->getOperand(1));  in combineSext()
53943   if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0,  in combineSext()
53947   if (VT.isVector()) {  in combineSext()
53952       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));  in combineSext()
53974     return Use->getOpcode() != ISD::FMA && Use->getOpcode() != ISD::STRICT_FMA;  in getInvertedVectorForFMA()
53976   if (llvm::any_of(V->uses(), IsNotFMA))  in getInvertedVectorForFMA()
53980   EVT VT = V.getValueType();  in getInvertedVectorForFMA()  local
53981   EVT EltVT = VT.getVectorElementType();  in getInvertedVectorForFMA()
53982   for (const SDValue &Op : V->op_values()) {  in getInvertedVectorForFMA()
53984       Ops.push_back(DAG.getConstantFP(-Cst->getValueAPF(), SDLoc(Op), EltVT));  in getInvertedVectorForFMA()
53991   SDNode *NV = DAG.getNodeIfExists(ISD::BUILD_VECTOR, DAG.getVTList(VT), Ops);  in getInvertedVectorForFMA()
53997   if (llvm::any_of(NV->uses(), IsNotFMA))  in getInvertedVectorForFMA()
54004   for (const SDValue &Op : V->op_values()) {  in getInvertedVectorForFMA()
54006       if (Cst->isNegative())  in getInvertedVectorForFMA()
54018   EVT VT = N->getValueType(0);  in combineFMA()  local
54019   bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode();  in combineFMA()
54023   if (!TLI.isTypeLegal(VT))  in combineFMA()
54026   SDValue A = N->getOperand(IsStrict ? 1 : 0);  in combineFMA()
54027   SDValue B = N->getOperand(IsStrict ? 2 : 1);  in combineFMA()
54028   SDValue C = N->getOperand(IsStrict ? 3 : 2);  in combineFMA()
54030   // If the operation allows fast-math and the target does not support FMA,  in combineFMA()
54032   SDNodeFlags Flags = N->getFlags();  in combineFMA()
54034       TLI.isOperationExpand(ISD::FMA, VT)) {  in combineFMA()
54035     SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags);  in combineFMA()
54036     return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags);  in combineFMA()
54039   EVT ScalarVT = VT.getScalarType();  in combineFMA()
54085       negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false);  in combineFMA()
54087   // Propagate fast-math-flags to new FMA node.  in combineFMA()
54090     assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4");  in combineFMA()
54091     return DAG.getNode(NewOpcode, dl, {VT, MVT::Other},  in combineFMA()
54092                        {N->getOperand(0), A, B, C});  in combineFMA()
54094     if (N->getNumOperands() == 4)  in combineFMA()
54095       return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));  in combineFMA()
54096     return DAG.getNode(NewOpcode, dl, VT, A, B, C);  in combineFMA()
54100 // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
54101 // Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C)
54105   EVT VT = N->getValueType(0);  in combineFMADDSUB()  local
54110   SDValue N2 = N->getOperand(2);  in combineFMADDSUB()
54116   unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false);  in combineFMADDSUB()
54118   if (N->getNumOperands() == 4)  in combineFMADDSUB()
54119     return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),  in combineFMADDSUB()
54120                        NegN2, N->getOperand(3));  in combineFMADDSUB()
54121   return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),  in combineFMADDSUB()
54129   SDValue N0 = N->getOperand(0);  in combineZext()
54130   EVT VT = N->getValueType(0);  in combineZext()  local
54132   // (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry))  in combineZext()
54134   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND &&  in combineZext()
54136     SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0),  in combineZext()
54137                                  N0->getOperand(1));  in combineZext()
54157   if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0,  in combineZext()
54161   if (VT.isVector())  in combineZext()
54173       VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) {  in combineZext()
54188 /// pre-promote its result type since vXi1 vectors don't get promoted
54190 static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,  in truncateAVX512SetCCNoBWI()  argument
54194   if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&  in truncateAVX512SetCCNoBWI()
54195       VT.getVectorElementType() == MVT::i1 &&  in truncateAVX512SetCCNoBWI()
54199     return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);  in truncateAVX512SetCCNoBWI()
54207   const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();  in combineSetCC()
54208   const SDValue LHS = N->getOperand(0);  in combineSetCC()
54209   const SDValue RHS = N->getOperand(1);  in combineSetCC()
54210   EVT VT = N->getValueType(0);  in combineSetCC()  local
54215     if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG,  in combineSetCC()
54219     if (VT == MVT::i1) {  in combineSetCC()
54223         return DAG.getNode(ISD::TRUNCATE, DL, VT, getSETCC(X86CC, V, DL, DAG));  in combineSetCC()
54227       // cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0)  in combineSetCC()
54228       // cmpne(or(X,Y),X) --> cmpne(and(~X,Y),0)  in combineSetCC()
54230         if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) {  in combineSetCC()
54241         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);  in combineSetCC()
54243         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);  in combineSetCC()
54245       // cmpeq(and(X,Y),Y) --> cmpeq(and(~X,Y),0)  in combineSetCC()
54246       // cmpne(and(X,Y),Y) --> cmpne(and(~X,Y),0)  in combineSetCC()
54248         if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) {  in combineSetCC()
54259         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);  in combineSetCC()
54261         return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);  in combineSetCC()
54263       // cmpeq(trunc(x),C) --> cmpeq(x,C)  in combineSetCC()
54264       // cmpne(trunc(x),C) --> cmpne(x,C)  in combineSetCC()
54275           return DAG.getSetCC(DL, VT, LHS.getOperand(0),  in combineSetCC()
54280       //    icmp eq Abs(X) C ->  in combineSetCC()
54281       //        (icmp eq A, C) | (icmp eq A, -C)  in combineSetCC()
54282       //    icmp ne Abs(X) C ->  in combineSetCC()
54283       //        (icmp ne A, C) & (icmp ne A, -C)  in combineSetCC()
54289           const APInt &CInt = C->getAPIntValue();  in combineSetCC()
54293             SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC);  in combineSetCC()
54295                 DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC);  in combineSetCC()
54296             return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT,  in combineSetCC()
54304   if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&  in combineSetCC()
54323       assert(VT == Op0.getOperand(0).getValueType() &&  in combineSetCC()
54326         return DAG.getConstant(0, DL, VT);  in combineSetCC()
54328         return DAG.getConstant(1, DL, VT);  in combineSetCC()
54330         return DAG.getNOT(DL, Op0.getOperand(0), VT);  in combineSetCC()
54342   if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) {  in combineSetCC()
54399         if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,  in combineSetCC()
54402         return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC);  in combineSetCC()
54408           truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))  in combineSetCC()
54413   //        -> `(icmp ult (add x, -C), 2)`  in combineSetCC()
54417   // in worse codegen. So, undo the middle-end transform and go back to `(or  in combineSetCC()
54425   if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() &&  in combineSetCC()
54440       // If we had `(add x, -1)` and can lower with `umin`, don't transform as  in combineSetCC()
54457       else if ((CC == ISD::SETUGT && (-CmpC) == 3) ||  in combineSetCC()
54458                (CC == ISD::SETUGE && (-CmpC) == 2)) {  in combineSetCC()
54466             DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ);  in combineSetCC()
54468             DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ);  in combineSetCC()
54469         return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS);  in combineSetCC()
54474   // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early  in combineSetCC()
54476   if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&  in combineSetCC()
54480   // X pred 0.0 --> X pred -X  in combineSetCC()
54487       return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC);  in combineSetCC()
54496   SDValue Src = N->getOperand(0);  in combineMOVMSK()
54498   MVT VT = N->getSimpleValueType(0);  in combineMOVMSK()  local
54499   unsigned NumBits = VT.getScalarSizeInBits();  in combineMOVMSK()
54502   assert(VT == MVT::i32 && NumElts <= NumBits && "Unexpected MOVMSK types");  in combineMOVMSK()
54515     return DAG.getConstant(Imm, SDLoc(N), VT);  in combineMOVMSK()
54518   // Look through int->fp bitcasts that don't change the element width.  in combineMOVMSK()
54522     return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0));  in combineMOVMSK()
54524   // Fold movmsk(not(x)) -> not(movmsk(x)) to improve folding of movmsk results  in combineMOVMSK()
54530     return DAG.getNode(ISD::XOR, DL, VT,  in combineMOVMSK()
54531                        DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc),  in combineMOVMSK()
54532                        DAG.getConstant(NotMask, DL, VT));  in combineMOVMSK()
54535   // Fold movmsk(icmp_sgt(x,-1)) -> not(movmsk(x)) to improve folding of movmsk  in combineMOVMSK()
54541     return DAG.getNode(ISD::XOR, DL, VT,  in combineMOVMSK()
54542                        DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)),  in combineMOVMSK()
54543                        DAG.getConstant(NotMask, DL, VT));  in combineMOVMSK()
54546   // Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2))  in combineMOVMSK()
54547   // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))  in combineMOVMSK()
54549   // Use KnownBits to determine if only a single bit is non-zero  in combineMOVMSK()
54563         // vXi8 shifts - we only care about the signbit so can use PSLLW.  in combineMOVMSK()
54575       return DAG.getNode(X86ISD::MOVMSK, DL, VT, DAG.getNOT(DL, Res, SrcVT));  in combineMOVMSK()
54579   // Fold movmsk(logic(X,C)) -> logic(movmsk(X),C)  in combineMOVMSK()
54580   if (N->isOnlyUserOf(Src.getNode())) {  in combineMOVMSK()
54594         SDValue NewMovMsk = DAG.getNode(X86ISD::MOVMSK, DL, VT, NewSrc);  in combineMOVMSK()
54595         return DAG.getNode(SrcBC.getOpcode(), DL, VT, NewMovMsk,  in combineMOVMSK()
54596                            DAG.getConstant(Mask, DL, VT));  in combineMOVMSK()
54613   MVT VT = N->getSimpleValueType(0);  in combineTESTP()  local
54614   unsigned NumBits = VT.getScalarSizeInBits();  in combineTESTP()
54628   SDValue Mask = MemOp->getMask();  in combineX86GatherScatter()
54635       if (N->getOpcode() != ISD::DELETED_NODE)  in combineX86GatherScatter()
54650     SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(),  in rebuildGatherScatter()
54651                       Gather->getMask(), Base, Index, Scale } ;  in rebuildGatherScatter()
54652     return DAG.getMaskedGather(Gather->getVTList(),  in rebuildGatherScatter()
54653                                Gather->getMemoryVT(), DL, Ops,  in rebuildGatherScatter()
54654                                Gather->getMemOperand(),  in rebuildGatherScatter()
54655                                Gather->getIndexType(),  in rebuildGatherScatter()
54656                                Gather->getExtensionType());  in rebuildGatherScatter()
54659   SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(),  in rebuildGatherScatter()
54660                     Scatter->getMask(), Base, Index, Scale };  in rebuildGatherScatter()
54661   return DAG.getMaskedScatter(Scatter->getVTList(),  in rebuildGatherScatter()
54662                               Scatter->getMemoryVT(), DL,  in rebuildGatherScatter()
54663                               Ops, Scatter->getMemOperand(),  in rebuildGatherScatter()
54664                               Scatter->getIndexType(),  in rebuildGatherScatter()
54665                               Scatter->isTruncatingStore());  in rebuildGatherScatter()
54672   SDValue Index = GorS->getIndex();  in combineGatherScatter()
54673   SDValue Base = GorS->getBasePtr();  in combineGatherScatter()
54674   SDValue Scale = GorS->getScale();  in combineGatherScatter()
54680     // Shrink constant indices if they are larger than 32-bits.  in combineGatherScatter()
54688       if (BV->isConstant() && IndexWidth > 32 &&  in combineGatherScatter()
54689           DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {  in combineGatherScatter()
54703         DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {  in combineGatherScatter()
54718     uint64_t ScaleAmt = Scale->getAsZExtVal();  in combineGatherScatter()
54721       if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) {  in combineGatherScatter()
54722         // FIXME: Allow non-constant?  in combineGatherScatter()
54725           APInt Adder = C->getAPIntValue() * ScaleAmt;  in combineGatherScatter()
54736       if (BV->isConstant() && isa<ConstantSDNode>(Base) &&  in combineGatherScatter()
54764   SDValue Mask = GorS->getMask();  in combineGatherScatter()
54768       if (N->getOpcode() != ISD::DELETED_NODE)  in combineGatherScatter()
54781   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));  in combineX86SetCC()
54782   SDValue EFLAGS = N->getOperand(1);  in combineX86SetCC()
54795   SDValue EFLAGS = N->getOperand(3);  in combineBrCond()
54796   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));  in combineBrCond()
54803     return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),  in combineBrCond()
54804                        N->getOperand(1), Cond, Flags);  in combineBrCond()
54813   // Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane  in combineVectorCompareAndMaskUnaryOp()
54817   //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->  in combineVectorCompareAndMaskUnaryOp()
54824   EVT VT = N->getValueType(0);  in combineVectorCompareAndMaskUnaryOp()  local
54825   bool IsStrict = N->isStrictFPOpcode();  in combineVectorCompareAndMaskUnaryOp()
54826   unsigned NumEltBits = VT.getScalarSizeInBits();  in combineVectorCompareAndMaskUnaryOp()
54827   SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);  in combineVectorCompareAndMaskUnaryOp()
54828   if (!VT.isVector() || Op0.getOpcode() != ISD::AND ||  in combineVectorCompareAndMaskUnaryOp()
54830       VT.getSizeInBits() != Op0.getValueSizeInBits())  in combineVectorCompareAndMaskUnaryOp()
54834   // make the transformation for non-constant splats as well, but it's unclear  in combineVectorCompareAndMaskUnaryOp()
54839     if (!BV->isConstant())  in combineVectorCompareAndMaskUnaryOp()
54844     EVT IntVT = BV->getValueType(0);  in combineVectorCompareAndMaskUnaryOp()
54849       SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other},  in combineVectorCompareAndMaskUnaryOp()
54850                                 {N->getOperand(0), SDValue(BV, 0)});  in combineVectorCompareAndMaskUnaryOp()
54852       SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));  in combineVectorCompareAndMaskUnaryOp()
54855     SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0),  in combineVectorCompareAndMaskUnaryOp()
54857     SDValue Res = DAG.getBitcast(VT, NewAnd);  in combineVectorCompareAndMaskUnaryOp()
54866 /// If we are converting a value to floating-point, try to replace scalar
54873   SDValue Trunc = N->getOperand(0);  in combineToFPTruncExtElt()
54889   // inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0)  in combineToFPTruncExtElt()
54898   return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt);  in combineToFPTruncExtElt()
54903   bool IsStrict = N->isStrictFPOpcode();  in combineUIntToFP()
54904   SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);  in combineUIntToFP()
54905   EVT VT = N->getValueType(0);  in combineUIntToFP()  local
54911   //   UINT_TO_FP(vXi1~15)  -> SINT_TO_FP(ZEXT(vXi1~15  to vXi16))  in combineUIntToFP()
54912   //   UINT_TO_FP(vXi17~31) -> SINT_TO_FP(ZEXT(vXi17~31 to vXi32))  in combineUIntToFP()
54914   //   UINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32))  in combineUIntToFP()
54915   // UINT_TO_FP(vXi33~63) -> SINT_TO_FP(ZEXT(vXi33~63 to vXi64))  in combineUIntToFP()
54916   if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {  in combineUIntToFP()
54930       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in combineUIntToFP()
54931                          {N->getOperand(0), P});  in combineUIntToFP()
54932     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);  in combineUIntToFP()
54935   // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))  in combineUIntToFP()
54936   // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))  in combineUIntToFP()
54937   // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))  in combineUIntToFP()
54939       VT.getScalarType() != MVT::f16) {  in combineUIntToFP()
54946       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in combineUIntToFP()
54947                          {N->getOperand(0), P});  in combineUIntToFP()
54948     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);  in combineUIntToFP()
54954   SDNodeFlags Flags = N->getFlags();  in combineUIntToFP()
54957       return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other},  in combineUIntToFP()
54958                          {N->getOperand(0), Op0});  in combineUIntToFP()
54959     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);  in combineUIntToFP()
54970   bool IsStrict = N->isStrictFPOpcode();  in combineSIntToFP()
54975   SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);  in combineSIntToFP()
54976   EVT VT = N->getValueType(0);  in combineSIntToFP()  local
54982   //   SINT_TO_FP(vXi1~15)  -> SINT_TO_FP(SEXT(vXi1~15  to vXi16))  in combineSIntToFP()
54983   //   SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))  in combineSIntToFP()
54985   //   SINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32))  in combineSIntToFP()
54986   // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))  in combineSIntToFP()
54987   if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {  in combineSIntToFP()
55001       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in combineSIntToFP()
55002                          {N->getOperand(0), P});  in combineSIntToFP()
55003     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);  in combineSIntToFP()
55006   // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))  in combineSIntToFP()
55007   // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))  in combineSIntToFP()
55008   // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))  in combineSIntToFP()
55010       VT.getScalarType() != MVT::f16) {  in combineSIntToFP()
55015       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in combineSIntToFP()
55016                          {N->getOperand(0), P});  in combineSIntToFP()
55017     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);  in combineSIntToFP()
55026     if (NumSignBits >= (BitWidth - 31)) {  in combineSIntToFP()
55034           return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},  in combineSIntToFP()
55035                              {N->getOperand(0), Trunc});  in combineSIntToFP()
55036         return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);  in combineSIntToFP()
55040       assert(InVT == MVT::v2i64 && "Unexpected VT!");  in combineSIntToFP()
55043                                           { 0, 2, -1, -1 });  in combineSIntToFP()
55045         return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other},  in combineSIntToFP()
55046                            {N->getOperand(0), Shuf});  in combineSIntToFP()
55047       return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf);  in combineSIntToFP()
55052   // a 32-bit target where SSE doesn't support i64->FP operations.  in combineSIntToFP()
55058     if (VT == MVT::f16 || VT == MVT::f128)  in combineSIntToFP()
55062     // the VT is f80.  in combineSIntToFP()
55063     if (Subtarget.hasDQI() && VT != MVT::f80)  in combineSIntToFP()
55066     if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) &&  in combineSIntToFP()
55069           Subtarget.getTargetLowering()->BuildFILD(  in combineSIntToFP()
55070               VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(),  in combineSIntToFP()
55071               Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG);  in combineSIntToFP()
55087   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");  in needCarryOrOverflowFlag()
55089   for (const SDNode *User : Flags->uses()) {  in needCarryOrOverflowFlag()
55091     switch (User->getOpcode()) {  in needCarryOrOverflowFlag()
55097       CC = (X86::CondCode)User->getConstantOperandVal(0);  in needCarryOrOverflowFlag()
55101       CC = (X86::CondCode)User->getConstantOperandVal(2);  in needCarryOrOverflowFlag()
55106     // clang-format off  in needCarryOrOverflowFlag()
55114     // clang-format on  in needCarryOrOverflowFlag()
55122   assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");  in onlyZeroFlagUsed()
55124   for (const SDNode *User : Flags->uses()) {  in onlyZeroFlagUsed()
55126     switch (User->getOpcode()) {  in onlyZeroFlagUsed()
55140     X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo);  in onlyZeroFlagUsed()
55152   if (!isNullConstant(N->getOperand(1)))  in combineCMP()
55160   SDValue Op = N->getOperand(0);  in combineCMP()
55161   EVT VT = Op.getValueType();  in combineCMP()  local
55174     unsigned BitWidth = VT.getSizeInBits();  in combineCMP()
55177       unsigned MaskBits = BitWidth - ShAmt.getZExtValue();  in combineCMP()
55182         Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),  in combineCMP()
55183                          DAG.getConstant(Mask, dl, VT));  in combineCMP()
55185                            DAG.getConstant(0, dl, VT));  in combineCMP()
55192   // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)  in combineCMP()
55220   // Peek through any zero-extend if we're only testing for a zero result.  in combineCMP()
55238   // i32 truncated op to prevent partial-reg compares of promoted ops.  in combineCMP()
55241       APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits());  in combineCMP()
55279   SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));  in combineCMP()
55280   SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));  in combineCMP()
55283   SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in combineCMP()
55289                        DAG.getConstant(0, dl, VT));  in combineCMP()
55298   assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&  in combineX86AddSub()
55302   SDValue LHS = N->getOperand(0);  in combineX86AddSub()
55303   SDValue RHS = N->getOperand(1);  in combineX86AddSub()
55304   MVT VT = LHS.getSimpleValueType();  in combineX86AddSub()  local
55305   bool IsSub = X86ISD::SUB == N->getOpcode();  in combineX86AddSub()
55308   if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0))  in combineX86AddSub()
55313   if (!N->hasAnyUseOfValue(1)) {  in combineX86AddSub()
55314     SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);  in combineX86AddSub()
55321     SDVTList VTs = DAG.getVTList(N->getValueType(0));  in combineX86AddSub()
55325         Op = DAG.getNegative(Op, DL, VT);  in combineX86AddSub()
55330   MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());  in combineX86AddSub()
55334   return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG,  in combineX86AddSub()
55339   SDValue LHS = N->getOperand(0);  in combineSBB()
55340   SDValue RHS = N->getOperand(1);  in combineSBB()
55341   SDValue BorrowIn = N->getOperand(2);  in combineSBB()
55344     MVT VT = N->getSimpleValueType(0);  in combineSBB()  local
55345     SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in combineSBB()
55349   // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry)  in combineSBB()
55352       !N->hasAnyUseOfValue(1))  in combineSBB()
55353     return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0),  in combineSBB()
55362   SDValue LHS = N->getOperand(0);  in combineADC()
55363   SDValue RHS = N->getOperand(1);  in combineADC()
55364   SDValue CarryIn = N->getOperand(2);  in combineADC()
55370     return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS,  in combineADC()
55376   if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() &&  in combineADC()
55381     EVT VT = N->getValueType(0);  in combineADC()  local
55382     SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1));  in combineADC()
55384         ISD::AND, DL, VT,  in combineADC()
55385         DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,  in combineADC()
55387         DAG.getConstant(1, DL, VT));  in combineADC()
55391   // Fold ADC(C1,C2,Carry) -> ADC(0,C1+C2,Carry)  in combineADC()
55394   if (LHSC && RHSC && !LHSC->isZero() && !N->hasAnyUseOfValue(1)) {  in combineADC()
55396     APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue();  in combineADC()
55397     return DAG.getNode(X86ISD::ADC, DL, N->getVTList(),  in combineADC()
55403     MVT VT = N->getSimpleValueType(0);  in combineADC()  local
55404     SDVTList VTs = DAG.getVTList(VT, MVT::i32);  in combineADC()
55408   // Fold ADC(ADD(X,Y),0,Carry) -> ADC(X,Y,Carry)  in combineADC()
55410   if (LHS.getOpcode() == ISD::ADD && RHSC && RHSC->isZero() &&  in combineADC()
55411       !N->hasAnyUseOfValue(1))  in combineADC()
55412     return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), LHS.getOperand(0),  in combineADC()
55419                             const SDLoc &DL, EVT VT,  in matchPMADDWD()  argument
55439   if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 ||  in matchPMADDWD()
55440       VT.getVectorNumElements() < 4 ||  in matchPMADDWD()
55441       !isPowerOf2_32(VT.getVectorNumElements()))  in matchPMADDWD()
55456   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) {  in matchPMADDWD()
55457     SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i),  in matchPMADDWD()
55458             Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1);  in matchPMADDWD()
55465     auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1));  in matchPMADDWD()
55466     auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1));  in matchPMADDWD()
55467     auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1));  in matchPMADDWD()
55468     auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1));  in matchPMADDWD()
55471     unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(),  in matchPMADDWD()
55472              Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue();  in matchPMADDWD()
55490       Mul = Op0L->getOperand(0);  in matchPMADDWD()
55491       if (Mul->getOpcode() != ISD::MUL ||  in matchPMADDWD()
55496     if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) ||  in matchPMADDWD()
55497         Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0))  in matchPMADDWD()
55508                                  VT.getVectorNumElements() * 2);  in matchPMADDWD()
55520   return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder);  in matchPMADDWD()
55527                               const SDLoc &DL, EVT VT,  in matchPMADDWD_2()  argument
55535   if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 ||  in matchPMADDWD_2()
55536       VT.getVectorNumElements() < 4 ||  in matchPMADDWD_2()
55537       !isPowerOf2_32(VT.getVectorNumElements()))  in matchPMADDWD_2()
55596     unsigned IdxN00 = ConstN00Elt->getZExtValue();  in matchPMADDWD_2()
55597     unsigned IdxN01 = ConstN01Elt->getZExtValue();  in matchPMADDWD_2()
55598     unsigned IdxN10 = ConstN10Elt->getZExtValue();  in matchPMADDWD_2()
55599     unsigned IdxN11 = ConstN11Elt->getZExtValue();  in matchPMADDWD_2()
55621       if (In0.getValueSizeInBits() < VT.getSizeInBits() ||  in matchPMADDWD_2()
55622           In1.getValueSizeInBits() < VT.getSizeInBits())  in matchPMADDWD_2()
55649                                VT.getVectorNumElements() * 2);  in matchPMADDWD_2()
55658   return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 },  in matchPMADDWD_2()
55662 // ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
55667                                    const SDLoc &DL, EVT VT) {  in combineAddOfPMADDWD()  argument
55671   // TODO: Add 256/512-bit support once VPMADDWD combines with shuffles.  in combineAddOfPMADDWD()
55672   if (VT.getSizeInBits() > 128)  in combineAddOfPMADDWD()
55675   unsigned NumElts = VT.getVectorNumElements();  in combineAddOfPMADDWD()
55703   return DAG.getNode(X86ISD::VPMADDWD, DL, VT, LHS, RHS);  in combineAddOfPMADDWD()
55709 /// earlier folds that may be used to turn select-of-constants into logic hacks.
55713   // If an operand is zero, add-of-0 gets simplified away, so that's clearly  in pushAddIntoCmovOfConsts()
55714   // better because we eliminate 1-2 instructions. This transform is still  in pushAddIntoCmovOfConsts()
55717   // immediate asm operands (fit in 32-bits).  in pushAddIntoCmovOfConsts()
55730   SDValue Cmov = N->getOperand(0);  in pushAddIntoCmovOfConsts()
55731   SDValue OtherOp = N->getOperand(1);  in pushAddIntoCmovOfConsts()
55742   EVT VT = N->getValueType(0);  in pushAddIntoCmovOfConsts()  local
55749   // a 3-operand LEA which is likely slower than a 2-operand LEA.  in pushAddIntoCmovOfConsts()
55753       all_of(N->uses(), [&](SDNode *Use) {  in pushAddIntoCmovOfConsts()
55755         return MemNode && MemNode->getBasePtr().getNode() == N;  in pushAddIntoCmovOfConsts()
55757     // add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y  in pushAddIntoCmovOfConsts()
55761     FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp);  in pushAddIntoCmovOfConsts()
55762     TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp);  in pushAddIntoCmovOfConsts()
55763     Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp,  in pushAddIntoCmovOfConsts()
55765     return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y);  in pushAddIntoCmovOfConsts()
55768   // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)  in pushAddIntoCmovOfConsts()
55769   FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);  in pushAddIntoCmovOfConsts()
55770   TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);  in pushAddIntoCmovOfConsts()
55771   return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),  in pushAddIntoCmovOfConsts()
55778   EVT VT = N->getValueType(0);  in combineAdd()  local
55779   SDValue Op0 = N->getOperand(0);  in combineAdd()
55780   SDValue Op1 = N->getOperand(1);  in combineAdd()
55786   if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))  in combineAdd()
55788   if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget))  in combineAdd()
55790   if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT))  in combineAdd()
55797   // add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0)  in combineAdd()
55806       return DAG.getNode(X86ISD::PSADBW, DL, VT, Sum,  in combineAdd()
55813   // FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in  in combineAdd()
55816   if (VT.isVector()) {  in combineAdd()
55821       SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));  in combineAdd()
55822       return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt);  in combineAdd()
55828       SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));  in combineAdd()
55829       return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt);  in combineAdd()
55833   // Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)  in combineAdd()
55834   if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() &&  in combineAdd()
55836     assert(!Op0->hasAnyUseOfValue(1) && "Overflow bit in use");  in combineAdd()
55837     return DAG.getNode(X86ISD::ADC, SDLoc(Op0), Op0->getVTList(), Op1,  in combineAdd()
55844 // Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov
55845 // condition comes from the subtract node that produced -X. This matches the
55849   SDValue N0 = N->getOperand(0);  in combineSubABS()
55850   SDValue N1 = N->getOperand(1);  in combineSubABS()
55865   // Get the X and -X from the negate.  in combineSubABS()
55878   MVT VT = N->getSimpleValueType(0);  in combineSubABS()  local
55879   SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,  in combineSubABS()
55882   return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);  in combineSubABS()
55886   SDValue Op0 = N->getOperand(0);  in combineSubSetcc()
55887   SDValue Op1 = N->getOperand(1);  in combineSubSetcc()
55891   // (add (zero_extend (setcc inverted) C-1))   if C is a nonzero immediate  in combineSubSetcc()
55893   EVT VT = N->getValueType(0);  in combineSubSetcc()  local
55896       !Op0C->isZero() && Op1.getOperand(0).getOpcode() == X86ISD::SETCC &&  in combineSubSetcc()
55901     APInt NewImm = Op0C->getAPIntValue() - 1;  in combineSubSetcc()
55904     NewSetCC = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NewSetCC);  in combineSubSetcc()
55905     return DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(VT, VT), NewSetCC,  in combineSubSetcc()
55906                        DAG.getConstant(NewImm, DL, VT));  in combineSubSetcc()
55915   // ->  in combineX86CloadCstore()
55917   if (N->getConstantOperandVal(3) != X86::COND_NE)  in combineX86CloadCstore()
55920   SDValue Sub = N->getOperand(4);  in combineX86CloadCstore()
55929   SmallVector<SDValue, 5> Ops(N->op_values());  in combineX86CloadCstore()
55933   return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops,  in combineX86CloadCstore()
55934                                  cast<MemSDNode>(N)->getMemoryVT(),  in combineX86CloadCstore()
55935                                  cast<MemSDNode>(N)->getMemOperand());  in combineX86CloadCstore()
55941   SDValue Op0 = N->getOperand(0);  in combineSub()
55942   SDValue Op1 = N->getOperand(1);  in combineSub()
55949         return !Cst->isOpaque();  in combineSub()
55959   // sub(C1, xor(X, C2)) -> add(xor(X, ~C2), C1+1)  in combineSub()
55962       Op1->hasOneUse()) {  in combineSub()
55963     EVT VT = Op0.getValueType();  in combineSub()  local
55964     SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),  in combineSub()
55965                                  DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT));  in combineSub()
55967         DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT));  in combineSub()
55968     return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd);  in combineSub()
55978   // Fold SUB(X,ADC(Y,0,W)) -> SBB(X,Y,W)  in combineSub()
55979   if (Op1.getOpcode() == X86ISD::ADC && Op1->hasOneUse() &&  in combineSub()
55981     assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");  in combineSub()
55982     return DAG.getNode(X86ISD::SBB, SDLoc(Op1), Op1->getVTList(), Op0,  in combineSub()
55986   // Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y)  in combineSub()
55988   if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse() &&  in combineSub()
55990     assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use");  in combineSub()
55991     SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0,  in combineSub()
56008   unsigned Opcode = N->getOpcode();  in combineVectorCompare()
56012   SDValue LHS = N->getOperand(0);  in combineVectorCompare()
56013   SDValue RHS = N->getOperand(1);  in combineVectorCompare()
56014   MVT VT = N->getSimpleValueType(0);  in combineVectorCompare()  local
56015   unsigned EltBits = VT.getScalarSizeInBits();  in combineVectorCompare()
56016   unsigned NumElts = VT.getVectorNumElements();  in combineVectorCompare()
56020     return (Opcode == X86ISD::PCMPEQ) ? DAG.getAllOnesConstant(DL, VT)  in combineVectorCompare()
56021                                       : DAG.getConstant(0, DL, VT);  in combineVectorCompare()
56024   // PCMPEQ(X,UNDEF) -> UNDEF  in combineVectorCompare()
56025   // PCMPGT(X,UNDEF) -> 0  in combineVectorCompare()
56026   // PCMPGT(UNDEF,X) -> 0  in combineVectorCompare()
56043       return getConstVector(Results, LHSUndefs | RHSUndefs, VT, DAG, DL);  in combineVectorCompare()
56044     return getConstVector(Results, VT, DAG, DL);  in combineVectorCompare()
56053 CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS,  in CastIntSETCCtoFP()  argument
56055   MVT SVT = VT.getScalarType();  in CastIntSETCCtoFP()
56075 static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,  in combineConcatVectorOps()  argument
56080   unsigned EltSizeInBits = VT.getScalarSizeInBits();  in combineConcatVectorOps()
56083     return DAG.getUNDEF(VT);  in combineConcatVectorOps()
56088     return getZeroVector(VT, Subtarget, DAG, DL);  in combineConcatVectorOps()
56098       (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) {  in combineConcatVectorOps()
56101       return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));  in combineConcatVectorOps()
56103     // concat_vectors(movddup(x),movddup(x)) -> broadcast(x)  in combineConcatVectorOps()
56104     if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&  in combineConcatVectorOps()
56107                                               VT.getScalarType(), Subtarget)))  in combineConcatVectorOps()
56108       return DAG.getNode(X86ISD::VBROADCAST, DL, VT,  in combineConcatVectorOps()
56113     // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)  in combineConcatVectorOps()
56118         Op0.getOperand(0).getValueType() == VT.getScalarType())  in combineConcatVectorOps()
56119       return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));  in combineConcatVectorOps()
56122     //                extract_subvector(broadcast(x))) -> broadcast(x)  in combineConcatVectorOps()
56124     //                extract_subvector(subv_broadcast(x))) -> subv_broadcast(x)  in combineConcatVectorOps()
56126         Op0.getOperand(0).getValueType() == VT) {  in combineConcatVectorOps()
56132           Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT())  in combineConcatVectorOps()
56136     // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x))  in combineConcatVectorOps()
56139       return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56140                          DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,  in combineConcatVectorOps()
56145   // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128.  in combineConcatVectorOps()
56146   // Only concat of subvector high halves which vperm2x128 is best at.  in combineConcatVectorOps()
56148   if (VT.is256BitVector() && NumOps == 2) {  in combineConcatVectorOps()
56160         return DAG.getNode(X86ISD::VPERM2X128, DL, VT,  in combineConcatVectorOps()
56161                            DAG.getBitcast(VT, Src0.getOperand(0)),  in combineConcatVectorOps()
56162                            DAG.getBitcast(VT, Src1.getOperand(0)),  in combineConcatVectorOps()
56169   // TODO - combineX86ShufflesRecursively should handle shuffle concatenation  in combineConcatVectorOps()
56174     auto ConcatSubOperand = [&](EVT VT, ArrayRef<SDValue> SubOps, unsigned I) {  in combineConcatVectorOps()  argument
56187             VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Subs));  in combineConcatVectorOps()
56189       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);  in combineConcatVectorOps()
56191     auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {  in combineConcatVectorOps()  argument
56201                          Sub.getOperand(0).getValueType() == VT &&  in combineConcatVectorOps()
56212         if (VT == MVT::v4f64 || VT == MVT::v4i64)  in combineConcatVectorOps()
56213           return DAG.getNode(X86ISD::UNPCKL, DL, VT,  in combineConcatVectorOps()
56214                              ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56215                              ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56217         if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))  in combineConcatVectorOps()
56218           return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI  in combineConcatVectorOps()
56220                              DL, VT, ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56229         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56230                            ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56235       if (!IsSplat && VT.getScalarType() == MVT::f32 &&  in combineConcatVectorOps()
56239         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56240                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56241                            ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));  in combineConcatVectorOps()
56249           ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56250            (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&  in combineConcatVectorOps()
56257         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56258                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56259                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56266       if (!IsSplat && NumOps == 2 && VT.is256BitVector() &&  in combineConcatVectorOps()
56268         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56269                            ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));  in combineConcatVectorOps()
56274           (VT.is256BitVector() ||  in combineConcatVectorOps()
56275            (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&  in combineConcatVectorOps()
56279         MVT FloatVT = VT.changeVectorElementType(MVT::f32);  in combineConcatVectorOps()
56280         SDValue Res = DAG.getBitcast(FloatVT, ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56283         return DAG.getBitcast(VT, Res);  in combineConcatVectorOps()
56285       if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {  in combineConcatVectorOps()
56289         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56290                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56298       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56299                        (VT.is512BitVector() && Subtarget.useBWIRegs()))) {  in combineConcatVectorOps()
56303         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56310           (VT.is512BitVector() && Subtarget.useAVX512Regs())) {  in combineConcatVectorOps()
56331           return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src);  in combineConcatVectorOps()
56336       if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {  in combineConcatVectorOps()
56361           return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);  in combineConcatVectorOps()
56366       if (!IsSplat && VT.is512BitVector() && Subtarget.useAVX512Regs()) {  in combineConcatVectorOps()
56374           MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;  in combineConcatVectorOps()
56383           return DAG.getBitcast(VT, Res);  in combineConcatVectorOps()
56389       if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {  in combineConcatVectorOps()
56398         return DAG.getNode(X86ISD::SHUF128, DL, VT, LHS, RHS,  in combineConcatVectorOps()
56404       if (!IsSplat && NumOps == 2 && VT.is256BitVector()) {  in combineConcatVectorOps()
56412           return DAG.getNode(ISD::TRUNCATE, DL, VT,  in combineConcatVectorOps()
56419       // Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.  in combineConcatVectorOps()
56421       if (VT == MVT::v4i64 && !Subtarget.hasInt256() &&  in combineConcatVectorOps()
56425         SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56434         return DAG.getBitcast(VT, Res);  in combineConcatVectorOps()
56441       if (((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56442            (VT.is512BitVector() && Subtarget.useAVX512Regs() &&  in combineConcatVectorOps()
56447         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56448                            ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));  in combineConcatVectorOps()
56454       if (VT.is512BitVector() && Subtarget.useAVX512Regs() &&  in combineConcatVectorOps()
56458         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56459                            ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1));  in combineConcatVectorOps()
56466       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56467                        (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {  in combineConcatVectorOps()
56468         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56469                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56470                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56475       if (!IsSplat && VT.is256BitVector() &&  in combineConcatVectorOps()
56476           (Subtarget.hasInt256() || VT == MVT::v8i32) &&  in combineConcatVectorOps()
56477           (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) {  in combineConcatVectorOps()
56479           return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56480                              ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56481                              ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56503         MVT FpVT = VT.changeVectorElementType(FpSVT);  in combineConcatVectorOps()
56507           SDValue LHS = ConcatSubOperand(VT, Ops, 0);  in combineConcatVectorOps()
56508           SDValue RHS = ConcatSubOperand(VT, Ops, 1);  in combineConcatVectorOps()
56516               VT, DAG.getNode(X86ISD::CMPP, DL, FpVT, LHS, RHS,  in combineConcatVectorOps()
56526       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56527                        (VT.is512BitVector() && Subtarget.useBWIRegs()))) {  in combineConcatVectorOps()
56528         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56529                            ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56534           (VT.is256BitVector() ||  in combineConcatVectorOps()
56535            (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&  in combineConcatVectorOps()
56539         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56540                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56541                            ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));  in combineConcatVectorOps()
56547       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56548                        (VT.is512BitVector() && Subtarget.useAVX512Regs() &&  in combineConcatVectorOps()
56550         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56551                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56552                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56561       if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) &&  in combineConcatVectorOps()
56562           (VT.is256BitVector() ||  in combineConcatVectorOps()
56563            (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {  in combineConcatVectorOps()
56564         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56565                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56566                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56570       if (!IsSplat && (VT.is256BitVector() ||  in combineConcatVectorOps()
56571                        (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {  in combineConcatVectorOps()
56572         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56573                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56574                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56581       if (!IsSplat && VT.is256BitVector() &&  in combineConcatVectorOps()
56582           (VT.isFloatingPoint() || Subtarget.hasInt256())) {  in combineConcatVectorOps()
56583         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56584                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56585                            ConcatSubOperand(VT, Ops, 1));  in combineConcatVectorOps()
56590       if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56591                        (VT.is512BitVector() && Subtarget.useBWIRegs()))) {  in combineConcatVectorOps()
56595         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56602           ((VT.is256BitVector() && Subtarget.hasInt256()) ||  in combineConcatVectorOps()
56603            (VT.is512BitVector() && Subtarget.useBWIRegs())) &&  in combineConcatVectorOps()
56607         return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56608                            ConcatSubOperand(VT, Ops, 0),  in combineConcatVectorOps()
56609                            ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));  in combineConcatVectorOps()
56613       if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) {  in combineConcatVectorOps()
56621         uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0;  in combineConcatVectorOps()
56622         MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements());  in combineConcatVectorOps()
56623         MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());  in combineConcatVectorOps()
56626         return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1),  in combineConcatVectorOps()
56627                              ConcatSubOperand(VT, Ops, 0));  in combineConcatVectorOps()
56632           (VT.is256BitVector() ||  in combineConcatVectorOps()
56633            (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&  in combineConcatVectorOps()
56640             return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56642                                ConcatSubOperand(VT, Ops, 1),  in combineConcatVectorOps()
56643                                ConcatSubOperand(VT, Ops, 2));  in combineConcatVectorOps()
56648       if (!IsSplat && VT.is256BitVector() && NumOps == 2 &&  in combineConcatVectorOps()
56650           IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {  in combineConcatVectorOps()
56654           return DAG.getNode(Op0.getOpcode(), DL, VT,  in combineConcatVectorOps()
56656                              ConcatSubOperand(VT, Ops, 1),  in combineConcatVectorOps()
56657                              ConcatSubOperand(VT, Ops, 2));  in combineConcatVectorOps()
56668     if (TLI->allowsMemoryAccess(Ctx, DAG.getDataLayout(), VT,  in combineConcatVectorOps()
56669                                 *FirstLd->getMemOperand(), &Fast) &&  in combineConcatVectorOps()
56672               EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false))  in combineConcatVectorOps()
56680     APInt UndefElts = APInt::getZero(VT.getVectorNumElements());  in combineConcatVectorOps()
56691     if (EltBits.size() == VT.getVectorNumElements()) {  in combineConcatVectorOps()
56692       Constant *C = getConstantVector(VT, EltBits, UndefElts, Ctx);  in combineConcatVectorOps()
56697       SDValue Ld = DAG.getLoad(VT, DL, DAG.getEntryNode(), CV, MPI);  in combineConcatVectorOps()
56708       (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) {  in combineConcatVectorOps()
56717               getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {  in combineConcatVectorOps()
56726   // If we're splatting a 128-bit subvector to 512-bits, use SHUF128 directly.  in combineConcatVectorOps()
56727   if (IsSplat && NumOps == 4 && VT.is512BitVector() &&  in combineConcatVectorOps()
56729     MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64;  in combineConcatVectorOps()
56734     return DAG.getBitcast(VT, Res);  in combineConcatVectorOps()
56743   EVT VT = N->getValueType(0);  in combineCONCAT_VECTORS()  local
56744   EVT SrcVT = N->getOperand(0).getValueType();  in combineCONCAT_VECTORS()
56746   SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());  in combineCONCAT_VECTORS()
56748   if (VT.getVectorElementType() == MVT::i1) {  in combineCONCAT_VECTORS()
56751     APInt Constant = APInt::getZero(VT.getSizeInBits());  in combineCONCAT_VECTORS()
56755       Constant.insertBits(C->getAPIntValue(), I * SubSizeInBits);  in combineCONCAT_VECTORS()
56756       if (I == (E - 1)) {  in combineCONCAT_VECTORS()
56757         EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());  in combineCONCAT_VECTORS()
56759           return DAG.getBitcast(VT, DAG.getConstant(Constant, SDLoc(N), IntVT));  in combineCONCAT_VECTORS()
56767   if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {  in combineCONCAT_VECTORS()
56768     if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,  in combineCONCAT_VECTORS()
56782   MVT OpVT = N->getSimpleValueType(0);  in combineINSERT_SUBVECTOR()
56787   SDValue Vec = N->getOperand(0);  in combineINSERT_SUBVECTOR()
56788   SDValue SubVec = N->getOperand(1);  in combineINSERT_SUBVECTOR()
56790   uint64_t IdxVal = N->getConstantOperandVal(2);  in combineINSERT_SUBVECTOR()
56827                              Ins.getOperand(1), N->getOperand(2));  in combineINSERT_SUBVECTOR()
56836   // insert_subvector X, (insert_subvector undef, Y, 0), Idx -->  in combineINSERT_SUBVECTOR()
56843                        SubVec.getOperand(1), N->getOperand(2));  in combineINSERT_SUBVECTOR()
56905     SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() };  in combineINSERT_SUBVECTOR()
56908                                 MemIntr->getMemoryVT(),  in combineINSERT_SUBVECTOR()
56909                                 MemIntr->getMemOperand());  in combineINSERT_SUBVECTOR()
56932 /// is a common pattern for AVX1 integer code because 256-bit selects may be
56933 /// legal, but there is almost no integer math/logic available for 256-bit.
56938   SDValue Sel = Ext->getOperand(0);  in narrowExtractedVectorSelect()
56945   // TODO: This can be extended to handle extraction to 256-bits.  in narrowExtractedVectorSelect()
56946   MVT VT = Ext->getSimpleValueType(0);  in narrowExtractedVectorSelect()  local
56947   if (!VT.is128BitVector())  in narrowExtractedVectorSelect()
56954   MVT WideVT = Ext->getOperand(0).getSimpleValueType();  in narrowExtractedVectorSelect()
56961   unsigned ExtIdx = Ext->getConstantOperandVal(1);  in narrowExtractedVectorSelect()
56977   unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits();  in narrowExtractedVectorSelect()
56984   return DAG.getBitcast(VT, NarrowSel);  in narrowExtractedVectorSelect()
56990   // For AVX1 only, if we are extracting from a 256-bit and+not (which will  in combineEXTRACT_SUBVECTOR()
56992   // split the 'and' into 128-bit ops to avoid the concatenate and extract.  in combineEXTRACT_SUBVECTOR()
57000   if (!N->getValueType(0).isSimple())  in combineEXTRACT_SUBVECTOR()
57003   MVT VT = N->getSimpleValueType(0);  in combineEXTRACT_SUBVECTOR()  local
57004   SDValue InVec = N->getOperand(0);  in combineEXTRACT_SUBVECTOR()
57005   unsigned IdxVal = N->getConstantOperandVal(1);  in combineEXTRACT_SUBVECTOR()
57008   unsigned SizeInBits = VT.getSizeInBits();  in combineEXTRACT_SUBVECTOR()
57010   unsigned NumSubElts = VT.getVectorNumElements();  in combineEXTRACT_SUBVECTOR()
57021       SDValue NotOp = V->getOperand(0);  in combineEXTRACT_SUBVECTOR()
57026       // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1  in combineEXTRACT_SUBVECTOR()
57028       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,  in combineEXTRACT_SUBVECTOR()
57029                          DAG.getBitcast(InVecVT, Concat), N->getOperand(1));  in combineEXTRACT_SUBVECTOR()
57040     return getZeroVector(VT, Subtarget, DAG, DL);  in combineEXTRACT_SUBVECTOR()
57043     if (VT.getScalarType() == MVT::i1)  in combineEXTRACT_SUBVECTOR()
57044       return DAG.getConstant(1, DL, VT);  in combineEXTRACT_SUBVECTOR()
57045     return getOnesVector(VT, DAG, DL);  in combineEXTRACT_SUBVECTOR()
57049     return DAG.getBuildVector(VT, DL, InVec->ops().slice(IdxVal, NumSubElts));  in combineEXTRACT_SUBVECTOR()
57055   if (VT.getVectorElementType() != MVT::i1 &&  in combineEXTRACT_SUBVECTOR()
57059     SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT,  in combineEXTRACT_SUBVECTOR()
57060                                  InVec.getOperand(0), N->getOperand(1));  in combineEXTRACT_SUBVECTOR()
57061     unsigned NewIdxVal = InVec.getConstantOperandVal(2) - IdxVal;  in combineEXTRACT_SUBVECTOR()
57062     return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, NewExt,  in combineEXTRACT_SUBVECTOR()
57077       cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT)  in combineEXTRACT_SUBVECTOR()
57091         return DAG.getUNDEF(VT);  in combineEXTRACT_SUBVECTOR()
57093         return getZeroVector(VT, Subtarget, DAG, DL);  in combineEXTRACT_SUBVECTOR()
57117     if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) {  in combineEXTRACT_SUBVECTOR()
57121         return DAG.getNode(X86ISD::CVTSI2P, DL, VT, InVec.getOperand(0));  in combineEXTRACT_SUBVECTOR()
57126         return DAG.getNode(X86ISD::CVTUI2P, DL, VT, InVec.getOperand(0));  in combineEXTRACT_SUBVECTOR()
57131         return DAG.getNode(X86ISD::VFPEXT, DL, VT, InVec.getOperand(0));  in combineEXTRACT_SUBVECTOR()
57135     if (InOpcode == ISD::FP_TO_SINT && VT == MVT::v4i32) {  in combineEXTRACT_SUBVECTOR()
57138         return DAG.getNode(InOpcode, DL, VT,  in combineEXTRACT_SUBVECTOR()
57149       return DAG.getNode(ExtOp, DL, VT, Ext);  in combineEXTRACT_SUBVECTOR()
57158       return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2);  in combineEXTRACT_SUBVECTOR()
57165       return DAG.getNode(InOpcode, DL, VT, Ext);  in combineEXTRACT_SUBVECTOR()
57177         return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, InVec.getOperand(2));  in combineEXTRACT_SUBVECTOR()
57178       return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1);  in combineEXTRACT_SUBVECTOR()
57184       return DAG.getNode(InOpcode, DL, VT, Ext0);  in combineEXTRACT_SUBVECTOR()
57188   // Always split vXi64 logical shifts where we're extracting the upper 32-bits  in combineEXTRACT_SUBVECTOR()
57195     return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1));  in combineEXTRACT_SUBVECTOR()
57202   EVT VT = N->getValueType(0);  in combineScalarToVector()  local
57203   SDValue Src = N->getOperand(0);  in combineScalarToVector()
57210   if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse() &&  in combineScalarToVector()
57215   if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&  in combineScalarToVector()
57219     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),  in combineScalarToVector()
57224   if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {  in combineScalarToVector()
57234         if (Ld->getExtensionType() == Ext &&  in combineScalarToVector()
57235             Ld->getMemoryVT().getScalarSizeInBits() <= 32)  in combineScalarToVector()
57247           VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,  in combineScalarToVector()
57252           VT,  in combineScalarToVector()
57259   if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST &&  in combineScalarToVector()
57261     return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0));  in combineScalarToVector()
57265   if (VT.getScalarType() == Src.getValueType())  in combineScalarToVector()
57266     for (SDNode *User : Src->uses())  in combineScalarToVector()
57267       if (User->getOpcode() == X86ISD::VBROADCAST &&  in combineScalarToVector()
57268           Src == User->getOperand(0)) {  in combineScalarToVector()
57269         unsigned SizeInBits = VT.getFixedSizeInBits();  in combineScalarToVector()
57271             User->getValueSizeInBits(0).getFixedValue();  in combineScalarToVector()
57287   SDValue LHS = N->getOperand(0);  in combinePMULDQ()
57288   SDValue RHS = N->getOperand(1);  in combinePMULDQ()
57293     return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS);  in combinePMULDQ()
57298     return DAG.getConstant(0, SDLoc(N), N->getValueType(0));  in combinePMULDQ()
57312   if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() &&  in combinePMULDQ()
57318                                LHS.getOperand(0), { 0, -1, 1, -1 });  in combinePMULDQ()
57320     return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);  in combinePMULDQ()
57322   if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() &&  in combinePMULDQ()
57328                                RHS.getOperand(0), { 0, -1, 1, -1 });  in combinePMULDQ()
57330     return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);  in combinePMULDQ()
57339   MVT VT = N->getSimpleValueType(0);  in combineVPMADD()  local
57340   SDValue LHS = N->getOperand(0);  in combineVPMADD()
57341   SDValue RHS = N->getOperand(1);  in combineVPMADD()
57342   unsigned Opc = N->getOpcode();  in combineVPMADD()
57351     return DAG.getConstant(0, SDLoc(N), VT);  in combineVPMADD()
57357   unsigned DstEltBits = VT.getScalarSizeInBits();  in combineVPMADD()
57371     return getConstVector(Result, VT, DAG, SDLoc(N));  in combineVPMADD()
57375   APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());  in combineVPMADD()
57385   EVT VT = N->getValueType(0);  in combineEXTEND_VECTOR_INREG()  local
57386   SDValue In = N->getOperand(0);  in combineEXTEND_VECTOR_INREG()
57387   unsigned Opcode = N->getOpcode();  in combineEXTEND_VECTOR_INREG()
57396     if (Ld->isSimple()) {  in combineEXTEND_VECTOR_INREG()
57401       EVT MemVT = VT.changeVectorElementType(SVT);  in combineEXTEND_VECTOR_INREG()
57402       if (TLI.isLoadExtLegal(Ext, VT, MemVT)) {  in combineEXTEND_VECTOR_INREG()
57404             Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),  in combineEXTEND_VECTOR_INREG()
57405             MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags());  in combineEXTEND_VECTOR_INREG()
57412   // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).  in combineEXTEND_VECTOR_INREG()
57414     return DAG.getNode(Opcode, DL, VT, In.getOperand(0));  in combineEXTEND_VECTOR_INREG()
57417   // -> EXTEND_VECTOR_INREG(X).  in combineEXTEND_VECTOR_INREG()
57418   // TODO: Handle non-zero subvector indices.  in combineEXTEND_VECTOR_INREG()
57423     return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0));  in combineEXTEND_VECTOR_INREG()
57425   // Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0).  in combineEXTEND_VECTOR_INREG()
57429       In.getValueSizeInBits() == VT.getSizeInBits()) {  in combineEXTEND_VECTOR_INREG()
57430     unsigned NumElts = VT.getVectorNumElements();  in combineEXTEND_VECTOR_INREG()
57431     unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits();  in combineEXTEND_VECTOR_INREG()
57436     return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts));  in combineEXTEND_VECTOR_INREG()
57442     if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))  in combineEXTEND_VECTOR_INREG()
57452   EVT VT = N->getValueType(0);  in combineKSHIFT()  local
57454   if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))  in combineKSHIFT()
57455     return DAG.getConstant(0, SDLoc(N), VT);  in combineKSHIFT()
57458   APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());  in combineKSHIFT()
57473   if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16)  in combineFP16_TO_FP()
57476   if (N->getValueType(0) != MVT::f32 ||  in combineFP16_TO_FP()
57477       N->getOperand(0).getOperand(0).getValueType() != MVT::f32)  in combineFP16_TO_FP()
57482                             N->getOperand(0).getOperand(0));  in combineFP16_TO_FP()
57493   EVT VT = N->getValueType(0);  in combineFP_EXTEND()  local
57494   bool IsStrict = N->isStrictFPOpcode();  in combineFP_EXTEND()
57495   SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in combineFP_EXTEND()
57501         !IsStrict && Src.getOperand(0).getValueType() == VT)  in combineFP_EXTEND()
57508     if (VT.getVectorElementType() == MVT::f64) {  in combineFP_EXTEND()
57509       EVT TmpVT = VT.changeVectorElementType(MVT::f32);  in combineFP_EXTEND()
57510       return DAG.getNode(ISD::FP_EXTEND, dl, VT,  in combineFP_EXTEND()
57513     assert(VT.getVectorElementType() == MVT::f32 && "Unexpected fpext");  in combineFP_EXTEND()
57518     return DAG.getBitcast(VT, Src);  in combineFP_EXTEND()
57530   if (VT.getVectorElementType() != MVT::f32 &&  in combineFP_EXTEND()
57531       VT.getVectorElementType() != MVT::f64)  in combineFP_EXTEND()
57534   unsigned NumElts = VT.getVectorNumElements();  in combineFP_EXTEND()
57558                       {N->getOperand(0), Src});  in combineFP_EXTEND()
57571     // Extend to the original VT if necessary.  in combineFP_EXTEND()
57572     if (Cvt.getValueType() != VT) {  in combineFP_EXTEND()
57573       Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other},  in combineFP_EXTEND()
57580   // Extend to the original VT if necessary.  in combineFP_EXTEND()
57581   return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);  in combineFP_EXTEND()
57589   assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD ||  in combineBROADCAST_LOAD()
57590           N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) &&  in combineBROADCAST_LOAD()
57594   if (N->hasAnyUseOfValue(1))  in combineBROADCAST_LOAD()
57599   SDValue Ptr = MemIntrin->getBasePtr();  in combineBROADCAST_LOAD()
57600   SDValue Chain = MemIntrin->getChain();  in combineBROADCAST_LOAD()
57601   EVT VT = N->getSimpleValueType(0);  in combineBROADCAST_LOAD()  local
57602   EVT MemVT = MemIntrin->getMemoryVT();  in combineBROADCAST_LOAD()
57605   // The input chain and the size of the memory VT must match.  in combineBROADCAST_LOAD()
57606   for (SDNode *User : Ptr->uses())  in combineBROADCAST_LOAD()
57607     if (User != N && User->getOpcode() == N->getOpcode() &&  in combineBROADCAST_LOAD()
57608         cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr &&  in combineBROADCAST_LOAD()
57609         cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&  in combineBROADCAST_LOAD()
57610         cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==  in combineBROADCAST_LOAD()
57612         !User->hasAnyUseOfValue(1) &&  in combineBROADCAST_LOAD()
57613         User->getValueSizeInBits(0).getFixedValue() > VT.getFixedSizeInBits()) {  in combineBROADCAST_LOAD()
57615                                          VT.getSizeInBits());  in combineBROADCAST_LOAD()
57616       Extract = DAG.getBitcast(VT, Extract);  in combineBROADCAST_LOAD()
57628   bool IsStrict = N->isStrictFPOpcode();  in combineFP_ROUND()
57629   EVT VT = N->getValueType(0);  in combineFP_ROUND()  local
57630   SDValue Src = N->getOperand(IsStrict ? 1 : 0);  in combineFP_ROUND()
57633   if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||  in combineFP_ROUND()
57640   unsigned NumElts = VT.getVectorNumElements();  in combineFP_ROUND()
57651       bool IsOp0Strict = Op0->isStrictFPOpcode();  in combineFP_ROUND()
57693                       {N->getOperand(0), Src, Rnd});  in combineFP_ROUND()
57701     EVT IntVT = VT.changeVectorElementTypeToInteger();  in combineFP_ROUND()
57706   Cvt = DAG.getBitcast(VT, Cvt);  in combineFP_ROUND()
57715   SDValue Src = N->getOperand(0);  in combineMOVDQ2Q()
57721     if (LN->isSimple()) {  in combineMOVDQ2Q()
57722       SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(),  in combineMOVDQ2Q()
57723                                   LN->getBasePtr(),  in combineMOVDQ2Q()
57724                                   LN->getPointerInfo(),  in combineMOVDQ2Q()
57725                                   LN->getOriginalAlign(),  in combineMOVDQ2Q()
57726                                   LN->getMemOperand()->getFlags());  in combineMOVDQ2Q()
57737   unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();  in combinePDEP()
57748   switch (N->getOpcode()) {  in PerformDAGCombine()
57749   // clang-format off  in PerformDAGCombine()
57935   // clang-format on  in PerformDAGCombine()
57941 bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {  in preferABDSToABSWithNSW()
57945 // Prefer (non-AVX512) vector TRUNCATE(SIGN_EXTEND_INREG(X)) to use of PACKSS.
57946 bool X86TargetLowering::preferSextInRegOfTruncate(EVT TruncVT, EVT VT,  in preferSextInRegOfTruncate()  argument
57948   return Subtarget.hasAVX512() || !VT.isVector();  in preferSextInRegOfTruncate()
57951 bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {  in isTypeDesirableForOp()
57952   if (!isTypeLegal(VT))  in isTypeDesirableForOp()
57956   if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)  in isTypeDesirableForOp()
57959   // TODO: Almost no 8-bit ops are desirable because they have no actual  in isTypeDesirableForOp()
57960   //       size/speed advantages vs. 32-bit ops, but they do have a major  in isTypeDesirableForOp()
57963   // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and  in isTypeDesirableForOp()
57964   // we have specializations to turn 32-bit multiply/shl into LEA or other ops.  in isTypeDesirableForOp()
57965   // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally  in isTypeDesirableForOp()
57967   if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)  in isTypeDesirableForOp()
57972   if (VT == MVT::i16) {  in isTypeDesirableForOp()
58006   Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");  in expandIndirectJTBranch()
58008     // In case control-flow branch protection is enabled, we need to add  in expandIndirectJTBranch()
58023   EVT VT = LogicOp->getValueType(0);  in isDesirableToCombineLogicOpOfSETCC()  local
58024   EVT OpVT = SETCC0->getOperand(0).getValueType();  in isDesirableToCombineLogicOpOfSETCC()
58025   if (!VT.isInteger())  in isDesirableToCombineLogicOpOfSETCC()
58028   if (VT.isVector())  in isDesirableToCombineLogicOpOfSETCC()
58037   // TODO: Currently we lower (icmp eq/ne (and ~X, Y), 0) -> `test (not X), Y`,  in isDesirableToCombineLogicOpOfSETCC()
58043   EVT VT = Op.getValueType();  in IsDesirableToPromoteOp()  local
58044   bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&  in IsDesirableToPromoteOp()
58049   // 8-bit multiply-by-constant can usually be expanded to something cheaper  in IsDesirableToPromoteOp()
58051   if (VT != MVT::i16 && !Is8BitMulByConstant)  in IsDesirableToPromoteOp()
58057     SDNode *User = *Op->use_begin();  in IsDesirableToPromoteOp()
58062     return Ld->getBasePtr() == St->getBasePtr();  in IsDesirableToPromoteOp()
58070     SDNode *User = *Op->use_begin();  in IsDesirableToPromoteOp()
58071     if (User->getOpcode() != ISD::ATOMIC_STORE)  in IsDesirableToPromoteOp()
58075     return Ld->getBasePtr() == St->getBasePtr();  in IsDesirableToPromoteOp()
58123 //===----------------------------------------------------------------------===//
58125 //===----------------------------------------------------------------------===//
58163   InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());  in ExpandInlineAsm()
58165   const std::string &AsmStr = IA->getAsmString();  in ExpandInlineAsm()
58167   IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());  in ExpandInlineAsm()
58168   if (!Ty || Ty->getBitWidth() % 16 != 0)  in ExpandInlineAsm()
58171   // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"  in ExpandInlineAsm()
58194     // rorw $$8, ${0:w}  -->  llvm.bswap.i16  in ExpandInlineAsm()
58195     if (CI->getType()->isIntegerTy(16) &&  in ExpandInlineAsm()
58196         IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&  in ExpandInlineAsm()
58200       StringRef ConstraintsStr = IA->getConstraintString();  in ExpandInlineAsm()
58208     if (CI->getType()->isIntegerTy(32) &&  in ExpandInlineAsm()
58209         IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&  in ExpandInlineAsm()
58214       StringRef ConstraintsStr = IA->getConstraintString();  in ExpandInlineAsm()
58221     if (CI->getType()->isIntegerTy(64)) {  in ExpandInlineAsm()
58222       InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();  in ExpandInlineAsm()
58226         // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64  in ExpandInlineAsm()
58361   Type *Ty = CallOperandVal->getType();  in getSingleConstraintMatchWeight()
58377     if (CallOperandVal->getType()->isIntegerTy())  in getSingleConstraintMatchWeight()
58383     if (Ty->isFloatingPointTy())  in getSingleConstraintMatchWeight()
58387     if (Ty->isX86_MMXTy() && Subtarget.hasMMX())  in getSingleConstraintMatchWeight()
58398       if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||  in getSingleConstraintMatchWeight()
58399           ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||  in getSingleConstraintMatchWeight()
58400           ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))  in getSingleConstraintMatchWeight()
58405       if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())  in getSingleConstraintMatchWeight()
58410       if (Ty->isX86_MMXTy() && Subtarget.hasMMX())  in getSingleConstraintMatchWeight()
58430       if (CallOperandVal->getType()->isIntegerTy())  in getSingleConstraintMatchWeight()
58436     if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())  in getSingleConstraintMatchWeight()
58440     if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||  in getSingleConstraintMatchWeight()
58441         ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()))  in getSingleConstraintMatchWeight()
58446     if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())  in getSingleConstraintMatchWeight()
58451       if (C->getZExtValue() <= 31)  in getSingleConstraintMatchWeight()
58456       if (C->getZExtValue() <= 63)  in getSingleConstraintMatchWeight()
58461       if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))  in getSingleConstraintMatchWeight()
58466       if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))  in getSingleConstraintMatchWeight()
58471       if (C->getZExtValue() <= 3)  in getSingleConstraintMatchWeight()
58476       if (C->getZExtValue() <= 0xff)  in getSingleConstraintMatchWeight()
58486       if ((C->getSExtValue() >= -0x80000000LL) &&  in getSingleConstraintMatchWeight()
58487           (C->getSExtValue() <= 0x7fffffffLL))  in getSingleConstraintMatchWeight()
58492       if (C->getZExtValue() <= 0xffffffff)  in getSingleConstraintMatchWeight()
58534   // Extend to 32-bits  in LowerAsmOutputForConstraint()
58552       if (C->getZExtValue() <= 31) {  in LowerAsmOperandForConstraint()
58553         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58561       if (C->getZExtValue() <= 63) {  in LowerAsmOperandForConstraint()
58562         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58570       if (isInt<8>(C->getSExtValue())) {  in LowerAsmOperandForConstraint()
58571         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58579       if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||  in LowerAsmOperandForConstraint()
58580           (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) {  in LowerAsmOperandForConstraint()
58581         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58589       if (C->getZExtValue() <= 3) {  in LowerAsmOperandForConstraint()
58590         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58598       if (C->getZExtValue() <= 255) {  in LowerAsmOperandForConstraint()
58599         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58607       if (C->getZExtValue() <= 127) {  in LowerAsmOperandForConstraint()
58608         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58615     // 32-bit signed value  in LowerAsmOperandForConstraint()
58618                                            C->getSExtValue())) {  in LowerAsmOperandForConstraint()
58620         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64);  in LowerAsmOperandForConstraint()
58633       Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),  in LowerAsmOperandForConstraint()
58634                                               BA->getValueType(0)));  in LowerAsmOperandForConstraint()
58637       if (Op->getOpcode() == ISD::ADD &&  in LowerAsmOperandForConstraint()
58638           isa<ConstantSDNode>(Op->getOperand(1))) {  in LowerAsmOperandForConstraint()
58639         Offset = cast<ConstantSDNode>(Op->getOperand(1))->getSExtValue();  in LowerAsmOperandForConstraint()
58640         Op = Op->getOperand(0);  in LowerAsmOperandForConstraint()
58643         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58644                                                  GA->getValueType(0), Offset));  in LowerAsmOperandForConstraint()
58649     // 32-bit unsigned value  in LowerAsmOperandForConstraint()
58652                                            C->getZExtValue())) {  in LowerAsmOperandForConstraint()
58653         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),  in LowerAsmOperandForConstraint()
58665       bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1;  in LowerAsmOperandForConstraint()
58669       int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue()  in LowerAsmOperandForConstraint()
58670                                                   : CST->getSExtValue();  in LowerAsmOperandForConstraint()
58682     // If we are in non-pic codegen mode, we allow the address of a global (with  in LowerAsmOperandForConstraint()
58688               Subtarget.classifyGlobalReference(GA->getGlobal())))  in LowerAsmOperandForConstraint()
58741                                                 MVT VT) const {  in getRegForInlineAsmConstraint()
58761         if (VT == MVT::v1i1 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
58763         if (VT == MVT::v8i1 || VT == MVT::i8)  in getRegForInlineAsmConstraint()
58765         if (VT == MVT::v16i1 || VT == MVT::i16)  in getRegForInlineAsmConstraint()
58769         if (VT == MVT::v32i1 || VT == MVT::i32)  in getRegForInlineAsmConstraint()
58771         if (VT == MVT::v64i1 || VT == MVT::i64)  in getRegForInlineAsmConstraint()
58775     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.  in getRegForInlineAsmConstraint()
58777         if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
58781         if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
58785         if (VT == MVT::i32 || VT == MVT::f32)  in getRegForInlineAsmConstraint()
58789         if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
58796       // 32-bit fallthrough  in getRegForInlineAsmConstraint()
58798       if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
58800       if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
58802       if (VT == MVT::i32 || VT == MVT::f32 ||  in getRegForInlineAsmConstraint()
58803           (!VT.isVector() && !Subtarget.is64Bit()))  in getRegForInlineAsmConstraint()
58805       if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
58810       if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
58814       if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
58818       if (VT == MVT::i32 || VT == MVT::f32 ||  in getRegForInlineAsmConstraint()
58819           (!VT.isVector() && !Subtarget.is64Bit()))  in getRegForInlineAsmConstraint()
58823       if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
58829       if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
58831       if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
58833       if (VT == MVT::i32 || VT == MVT::f32 ||  in getRegForInlineAsmConstraint()
58834           (!VT.isVector() && !Subtarget.is64Bit()))  in getRegForInlineAsmConstraint()
58836       if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
58840       // If SSE is enabled for this VT, use f80 to ensure the isel moves the  in getRegForInlineAsmConstraint()
58842       if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))  in getRegForInlineAsmConstraint()
58844       if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))  in getRegForInlineAsmConstraint()
58846       if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80)  in getRegForInlineAsmConstraint()
58857       switch (VT.SimpleTy) {  in getRegForInlineAsmConstraint()
58960       return getRegForInlineAsmConstraint(TRI, "x", VT);  in getRegForInlineAsmConstraint()
58966       switch (VT.SimpleTy) {  in getRegForInlineAsmConstraint()
59035         if (VT == MVT::v1i1 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
59037         if (VT == MVT::v8i1 || VT == MVT::i8)  in getRegForInlineAsmConstraint()
59039         if (VT == MVT::v16i1 || VT == MVT::i16)  in getRegForInlineAsmConstraint()
59043         if (VT == MVT::v32i1 || VT == MVT::i32)  in getRegForInlineAsmConstraint()
59045         if (VT == MVT::v64i1 || VT == MVT::i64)  in getRegForInlineAsmConstraint()
59055       if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
59057       if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
59059       if (VT == MVT::i32 || VT == MVT::f32)  in getRegForInlineAsmConstraint()
59061       if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
59065       if (VT == MVT::i8 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
59067       if (VT == MVT::i16)  in getRegForInlineAsmConstraint()
59069       if (VT == MVT::i32 || VT == MVT::f32)  in getRegForInlineAsmConstraint()
59071       if (VT != MVT::f80 && !VT.isVector())  in getRegForInlineAsmConstraint()
59083   Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);  in getRegForInlineAsmConstraint()
59087     // Only match x87 registers if the VT is one SelectionDAGBuilder can convert  in getRegForInlineAsmConstraint()
59089     if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {  in getRegForInlineAsmConstraint()
59090       // Map st(0) -> st(7) -> ST0  in getRegForInlineAsmConstraint()
59100         return std::make_pair(X86::FP0 + Constraint[4] - '0',  in getRegForInlineAsmConstraint()
59109     // flags -> EFLAGS  in getRegForInlineAsmConstraint()
59113     // dirflag -> DF  in getRegForInlineAsmConstraint()
59116         VT == MVT::Other)  in getRegForInlineAsmConstraint()
59119     // fpsr -> FPSW  in getRegForInlineAsmConstraint()
59121     if (StringRef("{fpsr}").equals_insensitive(Constraint) && VT == MVT::Other)  in getRegForInlineAsmConstraint()
59127   // Make sure it isn't a register that requires 64-bit mode.  in getRegForInlineAsmConstraint()
59130       TRI->getEncodingValue(Res.first) >= 8) {  in getRegForInlineAsmConstraint()
59131     // Register requires REX prefix, but we're in 32-bit mode.  in getRegForInlineAsmConstraint()
59137       TRI->getEncodingValue(Res.first) & 0x10) {  in getRegForInlineAsmConstraint()
59143   // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to  in getRegForInlineAsmConstraint()
59146   if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)  in getRegForInlineAsmConstraint()
59158     unsigned Size = VT.getSizeInBits();  in getRegForInlineAsmConstraint()
59171         // Model GCC's behavior here and select a fixed pair of 32-bit  in getRegForInlineAsmConstraint()
59192       if (RC && RC->contains(DestReg))  in getRegForInlineAsmConstraint()
59205     if (VT == MVT::f16)  in getRegForInlineAsmConstraint()
59207     else if (VT == MVT::f32 || VT == MVT::i32)  in getRegForInlineAsmConstraint()
59209     else if (VT == MVT::f64 || VT == MVT::i64)  in getRegForInlineAsmConstraint()
59211     else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT))  in getRegForInlineAsmConstraint()
59213     else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT))  in getRegForInlineAsmConstraint()
59215     else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))  in getRegForInlineAsmConstraint()
59223     if (VT == MVT::v1i1 || VT == MVT::i1)  in getRegForInlineAsmConstraint()
59225     else if (VT == MVT::v8i1 || VT == MVT::i8)  in getRegForInlineAsmConstraint()
59227     else if (VT == MVT::v16i1 || VT == MVT::i16)  in getRegForInlineAsmConstraint()
59229     else if (VT == MVT::v32i1 || VT == MVT::i32)  in getRegForInlineAsmConstraint()
59231     else if (VT == MVT::v64i1 || VT == MVT::i64)  in getRegForInlineAsmConstraint()
59243 bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {  in isIntDivCheap()  argument
59248   // integer division, leaving the division as-is is a loss even in terms of  in isIntDivCheap()
59252   return OptSize && !VT.isVector();  in isIntDivCheap()
59261       Entry->getParent()->getInfo<X86MachineFunctionInfo>();  in initializeSplitCSR()
59262   AFI->setIsSplitCSR(true);  in initializeSplitCSR()
59269   const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());  in insertCopiesSplitCSR()
59274   MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();  in insertCopiesSplitCSR()
59275   MachineBasicBlock::iterator MBBI = Entry->begin();  in insertCopiesSplitCSR()
59283     Register NewVR = MRI->createVirtualRegister(RC);  in insertCopiesSplitCSR()
59285     // FIXME: this currently does not emit CFI pseudo-instructions, it works  in insertCopiesSplitCSR()
59286     // fine for CXX_FAST_TLS since the C++-style TLS access functions should be  in insertCopiesSplitCSR()
59288     // CFI pseudo-instructions.  in insertCopiesSplitCSR()
59290         Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&  in insertCopiesSplitCSR()
59292     Entry->addLiveIn(*I);  in insertCopiesSplitCSR()
59293     BuildMI(*Entry, MBBI, MIMetadata(), TII->get(TargetOpcode::COPY), NewVR)  in insertCopiesSplitCSR()
59296     // Insert the copy-back instructions right before the terminator.  in insertCopiesSplitCSR()
59298       BuildMI(*Exit, Exit->getFirstTerminator(), MIMetadata(),  in insertCopiesSplitCSR()
59299               TII->get(TargetOpcode::COPY), *I)  in insertCopiesSplitCSR()
59312   assert(MBBI->isCall() && MBBI->getCFIType() &&  in EmitKCFICheck()
59318   switch (MBBI->getOpcode()) {  in EmitKCFICheck()
59325     if (!TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11, /*UnfoldLoad=*/true,  in EmitKCFICheck()
59330     assert(MBBI->isCall() &&  in EmitKCFICheck()
59332     if (OrigCall->shouldUpdateCallSiteInfo())  in EmitKCFICheck()
59334     MBBI->setCFIType(MF, OrigCall->getCFIType());  in EmitKCFICheck()
59335     OrigCall->eraseFromParent();  in EmitKCFICheck()
59342   MachineOperand &Target = MBBI->getOperand(0);  in EmitKCFICheck()
59344   switch (MBBI->getOpcode()) {  in EmitKCFICheck()
59357     // 64-bit indirect thunk calls.  in EmitKCFICheck()
59367   return BuildMI(MBB, MBBI, MIMetadata(*MBBI), TII->get(X86::KCFI_CHECK))  in EmitKCFICheck()
59369       .addImm(MBBI->getCFIType())  in EmitKCFICheck()
59383       MF.getFunction().hasFnAttribute("no-stack-arg-probe"))  in hasInlineStackProbe()
59387   if (MF.getFunction().hasFnAttribute("probe-stack"))  in hasInlineStackProbe()
59388     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==  in hasInlineStackProbe()
59389            "inline-asm";  in hasInlineStackProbe()
59403   if (MF.getFunction().hasFnAttribute("probe-stack"))  in getStackProbeSymbolName()
59404     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString();  in getStackProbeSymbolName()
59409       MF.getFunction().hasFnAttribute("no-stack-arg-probe"))  in getStackProbeSymbolName()
59423   return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size",  in getStackProbeSize()
59428   if (ML && ML->isInnermost() &&  in getPrefLoopAlignment()