Lines Matching +full:low +full:- +full:vt
1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
71 #define DEBUG_TYPE "x86-isel"
74 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
78 "alignment set by x86-experimental-pref-loop-alignment."),
82 "x86-br-merging-base-cost", cl::init(2),
88 "will be merged, and above which conditionals will be split. Set to -1 "
93 "x86-br-merging-ccmp-bias", cl::init(6),
94 cl::desc("Increases 'x86-br-merging-base-cost' in cases that the target "
99 "x86-br-merging-likely-bias", cl::init(0),
100 cl::desc("Increases 'x86-br-merging-base-cost' in cases that it is likely "
105 "the instruction cost threshold. Set to -1 to never merge likely "
110 "x86-br-merging-unlikely-bias", cl::init(-1),
112 "Decreases 'x86-br-merging-base-cost' in cases that it is unlikely "
117 "the instruction cost threshold. Set to -1 to never merge unlikely "
122 "mul-constant-optimization", cl::init(true),
137 // X86-SSE is even stranger. It uses -1 or 0 for vector masks. in X86TargetLowering()
141 // default expansion to a no-op. in X86TargetLowering()
144 // For 64-bit, since we have so many registers, use the ILP scheduler. in X86TargetLowering()
145 // For 32-bit, use the register pressure specific scheduling. in X86TargetLowering()
154 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); in X86TargetLowering()
202 for (MVT VT : MVT::integer_valuetypes()) in X86TargetLowering() local
203 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); in X86TargetLowering()
216 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { in X86TargetLowering()
217 setCondCodeAction(ISD::SETOEQ, VT, Expand); in X86TargetLowering()
218 setCondCodeAction(ISD::SETUNE, VT, Expand); in X86TargetLowering()
264 // We have an algorithm for SSE2, and we turn this into a 64-bit in X86TargetLowering()
268 // We have an algorithm for SSE2->double, and we turn this into a in X86TargetLowering()
269 // 64-bit FILD followed by conditional FADD for other targets. in X86TargetLowering()
284 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 in X86TargetLowering()
298 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 in X86TargetLowering()
330 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { in X86TargetLowering()
331 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); in X86TargetLowering()
332 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); in X86TargetLowering()
350 // Without SSE, i64->f64 goes through memory. in X86TargetLowering()
358 // the two-result form to trivial CSE, which is able to combine x/y and x%y in X86TargetLowering()
361 // Scalar integer multiply-high is also lowered to use two-result in X86TargetLowering()
363 // (low) operations are left as Legal, as there are single-result in X86TargetLowering()
364 // instructions for this in x86. Using the two-result multiply instructions in X86TargetLowering()
365 // when both high and low results are needed must be arranged by dagcombine. in X86TargetLowering()
366 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { in X86TargetLowering()
367 setOperationAction(ISD::MULHS, VT, Expand); in X86TargetLowering()
368 setOperationAction(ISD::MULHU, VT, Expand); in X86TargetLowering()
369 setOperationAction(ISD::SDIV, VT, Expand); in X86TargetLowering()
370 setOperationAction(ISD::UDIV, VT, Expand); in X86TargetLowering()
371 setOperationAction(ISD::SREM, VT, Expand); in X86TargetLowering()
372 setOperationAction(ISD::UREM, VT, Expand); in X86TargetLowering()
377 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, in X86TargetLowering()
379 setOperationAction(ISD::BR_CC, VT, Expand); in X86TargetLowering()
380 setOperationAction(ISD::SELECT_CC, VT, Expand); in X86TargetLowering()
426 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { in X86TargetLowering()
427 if (VT == MVT::i64 && !Subtarget.is64Bit()) in X86TargetLowering()
429 setOperationAction(ISD::CTLZ , VT, Custom); in X86TargetLowering()
430 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); in X86TargetLowering()
436 // Special handling for half-precision floating point conversions. in X86TargetLowering()
448 for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) { in X86TargetLowering()
449 setOperationAction(ISD::STRICT_FP_TO_BF16, VT, Expand); in X86TargetLowering()
450 setOperationAction(ISD::STRICT_BF16_TO_FP, VT, Expand); in X86TargetLowering()
453 for (MVT VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) { in X86TargetLowering()
454 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); in X86TargetLowering()
455 setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand); in X86TargetLowering()
456 setTruncStoreAction(VT, MVT::f16, Expand); in X86TargetLowering()
457 setTruncStoreAction(VT, MVT::bf16, Expand); in X86TargetLowering()
459 setOperationAction(ISD::BF16_TO_FP, VT, Expand); in X86TargetLowering()
460 setOperationAction(ISD::FP_TO_BF16, VT, Custom); in X86TargetLowering()
486 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { in X86TargetLowering()
487 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
488 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
489 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); in X86TargetLowering()
490 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); in X86TargetLowering()
492 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { in X86TargetLowering()
493 if (VT == MVT::i64 && !Subtarget.is64Bit()) in X86TargetLowering()
495 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
496 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
505 // LLVM/Clang supports zero-cost DWARF and SEH exception handling. in X86TargetLowering()
513 for (auto VT : { MVT::i32, MVT::i64 }) { in X86TargetLowering()
514 if (VT == MVT::i64 && !Subtarget.is64Bit()) in X86TargetLowering()
516 setOperationAction(ISD::ConstantPool , VT, Custom); in X86TargetLowering()
517 setOperationAction(ISD::JumpTable , VT, Custom); in X86TargetLowering()
518 setOperationAction(ISD::GlobalAddress , VT, Custom); in X86TargetLowering()
519 setOperationAction(ISD::GlobalTLSAddress, VT, Custom); in X86TargetLowering()
520 setOperationAction(ISD::ExternalSymbol , VT, Custom); in X86TargetLowering()
521 setOperationAction(ISD::BlockAddress , VT, Custom); in X86TargetLowering()
524 // 64-bit shl, sra, srl (iff 32-bit x86) in X86TargetLowering()
525 for (auto VT : { MVT::i32, MVT::i64 }) { in X86TargetLowering()
526 if (VT == MVT::i64 && !Subtarget.is64Bit()) in X86TargetLowering()
528 setOperationAction(ISD::SHL_PARTS, VT, Custom); in X86TargetLowering()
529 setOperationAction(ISD::SRA_PARTS, VT, Custom); in X86TargetLowering()
530 setOperationAction(ISD::SRL_PARTS, VT, Custom); in X86TargetLowering()
539 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { in X86TargetLowering()
540 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); in X86TargetLowering()
541 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); in X86TargetLowering()
542 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); in X86TargetLowering()
543 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); in X86TargetLowering()
544 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); in X86TargetLowering()
545 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); in X86TargetLowering()
546 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); in X86TargetLowering()
553 // All CPUs supporting AVX will atomically load/store aligned 128-bit in X86TargetLowering()
562 // FIXME - use subtarget debug flags in X86TargetLowering()
600 auto setF16Action = [&] (MVT VT, LegalizeAction Action) { in X86TargetLowering() argument
601 setOperationAction(ISD::FABS, VT, Action); in X86TargetLowering()
602 setOperationAction(ISD::FNEG, VT, Action); in X86TargetLowering()
603 setOperationAction(ISD::FCOPYSIGN, VT, Expand); in X86TargetLowering()
604 setOperationAction(ISD::FREM, VT, Action); in X86TargetLowering()
605 setOperationAction(ISD::FMA, VT, Action); in X86TargetLowering()
606 setOperationAction(ISD::FMINNUM, VT, Action); in X86TargetLowering()
607 setOperationAction(ISD::FMAXNUM, VT, Action); in X86TargetLowering()
608 setOperationAction(ISD::FMINIMUM, VT, Action); in X86TargetLowering()
609 setOperationAction(ISD::FMAXIMUM, VT, Action); in X86TargetLowering()
610 setOperationAction(ISD::FSIN, VT, Action); in X86TargetLowering()
611 setOperationAction(ISD::FCOS, VT, Action); in X86TargetLowering()
612 setOperationAction(ISD::FSINCOS, VT, Action); in X86TargetLowering()
613 setOperationAction(ISD::FTAN, VT, Action); in X86TargetLowering()
614 setOperationAction(ISD::FSQRT, VT, Action); in X86TargetLowering()
615 setOperationAction(ISD::FPOW, VT, Action); in X86TargetLowering()
616 setOperationAction(ISD::FLOG, VT, Action); in X86TargetLowering()
617 setOperationAction(ISD::FLOG2, VT, Action); in X86TargetLowering()
618 setOperationAction(ISD::FLOG10, VT, Action); in X86TargetLowering()
619 setOperationAction(ISD::FEXP, VT, Action); in X86TargetLowering()
620 setOperationAction(ISD::FEXP2, VT, Action); in X86TargetLowering()
621 setOperationAction(ISD::FEXP10, VT, Action); in X86TargetLowering()
622 setOperationAction(ISD::FCEIL, VT, Action); in X86TargetLowering()
623 setOperationAction(ISD::FFLOOR, VT, Action); in X86TargetLowering()
624 setOperationAction(ISD::FNEARBYINT, VT, Action); in X86TargetLowering()
625 setOperationAction(ISD::FRINT, VT, Action); in X86TargetLowering()
626 setOperationAction(ISD::BR_CC, VT, Action); in X86TargetLowering()
627 setOperationAction(ISD::SETCC, VT, Action); in X86TargetLowering()
628 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
629 setOperationAction(ISD::SELECT_CC, VT, Action); in X86TargetLowering()
630 setOperationAction(ISD::FROUND, VT, Action); in X86TargetLowering()
631 setOperationAction(ISD::FROUNDEVEN, VT, Action); in X86TargetLowering()
632 setOperationAction(ISD::FTRUNC, VT, Action); in X86TargetLowering()
633 setOperationAction(ISD::FLDEXP, VT, Action); in X86TargetLowering()
646 // Disable f32->f64 extload as we can only generate this in one instruction in X86TargetLowering()
649 // non-optsize case. in X86TargetLowering()
652 for (auto VT : { MVT::f32, MVT::f64 }) { in X86TargetLowering()
654 setOperationAction(ISD::FABS, VT, Custom); in X86TargetLowering()
657 setOperationAction(ISD::FNEG, VT, Custom); in X86TargetLowering()
660 setOperationAction(ISD::FCOPYSIGN, VT, Custom); in X86TargetLowering()
663 setOperationAction(ISD::FADD, VT, Custom); in X86TargetLowering()
664 setOperationAction(ISD::FSUB, VT, Custom); in X86TargetLowering()
667 setOperationAction(ISD::FSIN , VT, Expand); in X86TargetLowering()
668 setOperationAction(ISD::FCOS , VT, Expand); in X86TargetLowering()
669 setOperationAction(ISD::FSINCOS, VT, Expand); in X86TargetLowering()
758 for (auto VT : { MVT::f32, MVT::f64 }) { in X86TargetLowering()
759 setOperationAction(ISD::UNDEF, VT, Expand); in X86TargetLowering()
760 setOperationAction(ISD::FCOPYSIGN, VT, Expand); in X86TargetLowering()
763 setOperationAction(ISD::FSIN , VT, Expand); in X86TargetLowering()
764 setOperationAction(ISD::FCOS , VT, Expand); in X86TargetLowering()
765 setOperationAction(ISD::FSINCOS, VT, Expand); in X86TargetLowering()
774 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS in X86TargetLowering()
775 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS in X86TargetLowering()
784 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS in X86TargetLowering()
785 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS in X86TargetLowering()
793 // Handle constrained floating-point operations of scalar. in X86TargetLowering()
832 // clang-format off in X86TargetLowering()
843 // clang-format on in X86TargetLowering()
857 // Handle constrained floating-point operations of scalar. in X86TargetLowering()
869 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten in X86TargetLowering()
896 // clang-format off in X86TargetLowering()
904 // clang-format on in X86TargetLowering()
953 for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16, in X86TargetLowering()
956 // clang-format off in X86TargetLowering()
957 setOperationAction(ISD::FSIN, VT, Expand); in X86TargetLowering()
958 setOperationAction(ISD::FSINCOS, VT, Expand); in X86TargetLowering()
959 setOperationAction(ISD::FCOS, VT, Expand); in X86TargetLowering()
960 setOperationAction(ISD::FTAN, VT, Expand); in X86TargetLowering()
961 setOperationAction(ISD::FREM, VT, Expand); in X86TargetLowering()
962 setOperationAction(ISD::FCOPYSIGN, VT, Expand); in X86TargetLowering()
963 setOperationAction(ISD::FPOW, VT, Expand); in X86TargetLowering()
964 setOperationAction(ISD::FLOG, VT, Expand); in X86TargetLowering()
965 setOperationAction(ISD::FLOG2, VT, Expand); in X86TargetLowering()
966 setOperationAction(ISD::FLOG10, VT, Expand); in X86TargetLowering()
967 setOperationAction(ISD::FEXP, VT, Expand); in X86TargetLowering()
968 setOperationAction(ISD::FEXP2, VT, Expand); in X86TargetLowering()
969 setOperationAction(ISD::FEXP10, VT, Expand); in X86TargetLowering()
970 // clang-format on in X86TargetLowering()
976 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { in X86TargetLowering() local
977 setOperationAction(ISD::SDIV, VT, Expand); in X86TargetLowering()
978 setOperationAction(ISD::UDIV, VT, Expand); in X86TargetLowering()
979 setOperationAction(ISD::SREM, VT, Expand); in X86TargetLowering()
980 setOperationAction(ISD::UREM, VT, Expand); in X86TargetLowering()
981 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); in X86TargetLowering()
982 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); in X86TargetLowering()
983 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); in X86TargetLowering()
984 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); in X86TargetLowering()
985 setOperationAction(ISD::FMA, VT, Expand); in X86TargetLowering()
986 setOperationAction(ISD::FFLOOR, VT, Expand); in X86TargetLowering()
987 setOperationAction(ISD::FCEIL, VT, Expand); in X86TargetLowering()
988 setOperationAction(ISD::FTRUNC, VT, Expand); in X86TargetLowering()
989 setOperationAction(ISD::FRINT, VT, Expand); in X86TargetLowering()
990 setOperationAction(ISD::FNEARBYINT, VT, Expand); in X86TargetLowering()
991 setOperationAction(ISD::FROUNDEVEN, VT, Expand); in X86TargetLowering()
992 setOperationAction(ISD::SMUL_LOHI, VT, Expand); in X86TargetLowering()
993 setOperationAction(ISD::MULHS, VT, Expand); in X86TargetLowering()
994 setOperationAction(ISD::UMUL_LOHI, VT, Expand); in X86TargetLowering()
995 setOperationAction(ISD::MULHU, VT, Expand); in X86TargetLowering()
996 setOperationAction(ISD::SDIVREM, VT, Expand); in X86TargetLowering()
997 setOperationAction(ISD::UDIVREM, VT, Expand); in X86TargetLowering()
998 setOperationAction(ISD::CTPOP, VT, Expand); in X86TargetLowering()
999 setOperationAction(ISD::CTTZ, VT, Expand); in X86TargetLowering()
1000 setOperationAction(ISD::CTLZ, VT, Expand); in X86TargetLowering()
1001 setOperationAction(ISD::ROTL, VT, Expand); in X86TargetLowering()
1002 setOperationAction(ISD::ROTR, VT, Expand); in X86TargetLowering()
1003 setOperationAction(ISD::BSWAP, VT, Expand); in X86TargetLowering()
1004 setOperationAction(ISD::SETCC, VT, Expand); in X86TargetLowering()
1005 setOperationAction(ISD::FP_TO_UINT, VT, Expand); in X86TargetLowering()
1006 setOperationAction(ISD::FP_TO_SINT, VT, Expand); in X86TargetLowering()
1007 setOperationAction(ISD::UINT_TO_FP, VT, Expand); in X86TargetLowering()
1008 setOperationAction(ISD::SINT_TO_FP, VT, Expand); in X86TargetLowering()
1009 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); in X86TargetLowering()
1010 setOperationAction(ISD::TRUNCATE, VT, Expand); in X86TargetLowering()
1011 setOperationAction(ISD::SIGN_EXTEND, VT, Expand); in X86TargetLowering()
1012 setOperationAction(ISD::ZERO_EXTEND, VT, Expand); in X86TargetLowering()
1013 setOperationAction(ISD::ANY_EXTEND, VT, Expand); in X86TargetLowering()
1014 setOperationAction(ISD::SELECT_CC, VT, Expand); in X86TargetLowering()
1016 setTruncStoreAction(InnerVT, VT, Expand); in X86TargetLowering()
1018 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); in X86TargetLowering()
1019 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); in X86TargetLowering()
1021 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like in X86TargetLowering()
1025 if (VT.getVectorElementType() == MVT::i1) in X86TargetLowering()
1026 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); in X86TargetLowering()
1030 if (VT.getVectorElementType() == MVT::f16 || in X86TargetLowering()
1031 VT.getVectorElementType() == MVT::bf16) in X86TargetLowering()
1032 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); in X86TargetLowering()
1037 // with -msoft-float, disable use of MMX as well. in X86TargetLowering()
1073 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM in X86TargetLowering()
1086 for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) { in X86TargetLowering()
1087 setOperationAction(ISD::FMAXIMUM, VT, Custom); in X86TargetLowering()
1088 setOperationAction(ISD::FMINIMUM, VT, Custom); in X86TargetLowering()
1091 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, in X86TargetLowering()
1093 setOperationAction(ISD::SDIV, VT, Custom); in X86TargetLowering()
1094 setOperationAction(ISD::SREM, VT, Custom); in X86TargetLowering()
1095 setOperationAction(ISD::UDIV, VT, Custom); in X86TargetLowering()
1096 setOperationAction(ISD::UREM, VT, Custom); in X86TargetLowering()
1126 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { in X86TargetLowering()
1127 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); in X86TargetLowering()
1128 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); in X86TargetLowering()
1129 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); in X86TargetLowering()
1130 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); in X86TargetLowering()
1149 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { in X86TargetLowering()
1150 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
1151 setOperationAction(ISD::CTPOP, VT, Custom); in X86TargetLowering()
1152 setOperationAction(ISD::ABS, VT, Custom); in X86TargetLowering()
1153 setOperationAction(ISD::ABDS, VT, Custom); in X86TargetLowering()
1154 setOperationAction(ISD::ABDU, VT, Custom); in X86TargetLowering()
1158 setCondCodeAction(ISD::SETLT, VT, Custom); in X86TargetLowering()
1159 setCondCodeAction(ISD::SETLE, VT, Custom); in X86TargetLowering()
1169 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { in X86TargetLowering()
1170 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); in X86TargetLowering()
1171 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
1172 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
1173 setOperationAction(ISD::VSELECT, VT, Custom); in X86TargetLowering()
1174 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1177 for (auto VT : { MVT::v8f16, MVT::v2f64, MVT::v2i64 }) { in X86TargetLowering()
1178 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
1179 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
1180 setOperationAction(ISD::VSELECT, VT, Custom); in X86TargetLowering()
1182 if (VT == MVT::v2i64 && !Subtarget.is64Bit()) in X86TargetLowering()
1185 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1186 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1213 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) { in X86TargetLowering()
1214 setOperationAction(ISD::FP_TO_SINT, VT, Custom); in X86TargetLowering()
1215 setOperationAction(ISD::FP_TO_UINT, VT, Custom); in X86TargetLowering()
1216 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); in X86TargetLowering()
1217 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); in X86TargetLowering()
1243 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for in X86TargetLowering()
1252 // Add 32-bit vector stores to help vectorization opportunities. in X86TargetLowering()
1287 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { in X86TargetLowering()
1288 setOperationAction(ISD::SRL, VT, Custom); in X86TargetLowering()
1289 setOperationAction(ISD::SHL, VT, Custom); in X86TargetLowering()
1290 setOperationAction(ISD::SRA, VT, Custom); in X86TargetLowering()
1291 if (VT == MVT::v2i64) continue; in X86TargetLowering()
1292 setOperationAction(ISD::ROTL, VT, Custom); in X86TargetLowering()
1293 setOperationAction(ISD::ROTR, VT, Custom); in X86TargetLowering()
1294 setOperationAction(ISD::FSHL, VT, Custom); in X86TargetLowering()
1295 setOperationAction(ISD::FSHR, VT, Custom); in X86TargetLowering()
1317 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { in X86TargetLowering()
1318 setOperationAction(ISD::BITREVERSE, VT, Custom); in X86TargetLowering()
1319 setOperationAction(ISD::CTLZ, VT, Custom); in X86TargetLowering()
1360 // FIXME: Do we need to handle scalar-to-vector here? in X86TargetLowering()
1370 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { in X86TargetLowering()
1371 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); in X86TargetLowering()
1372 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); in X86TargetLowering()
1386 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can in X86TargetLowering()
1402 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, in X86TargetLowering()
1404 setOperationAction(ISD::ROTL, VT, Custom); in X86TargetLowering()
1405 setOperationAction(ISD::ROTR, VT, Custom); in X86TargetLowering()
1409 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) in X86TargetLowering()
1410 setOperationAction(ISD::BITREVERSE, VT, Custom); in X86TargetLowering()
1431 for (auto VT : { MVT::v8f32, MVT::v4f64 }) { in X86TargetLowering()
1432 setOperationAction(ISD::FFLOOR, VT, Legal); in X86TargetLowering()
1433 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); in X86TargetLowering()
1434 setOperationAction(ISD::FCEIL, VT, Legal); in X86TargetLowering()
1435 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); in X86TargetLowering()
1436 setOperationAction(ISD::FTRUNC, VT, Legal); in X86TargetLowering()
1437 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); in X86TargetLowering()
1438 setOperationAction(ISD::FRINT, VT, Legal); in X86TargetLowering()
1439 setOperationAction(ISD::STRICT_FRINT, VT, Legal); in X86TargetLowering()
1440 setOperationAction(ISD::FNEARBYINT, VT, Legal); in X86TargetLowering()
1441 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); in X86TargetLowering()
1442 setOperationAction(ISD::FROUNDEVEN, VT, Legal); in X86TargetLowering()
1443 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); in X86TargetLowering()
1445 setOperationAction(ISD::FROUND, VT, Custom); in X86TargetLowering()
1447 setOperationAction(ISD::FNEG, VT, Custom); in X86TargetLowering()
1448 setOperationAction(ISD::FABS, VT, Custom); in X86TargetLowering()
1449 setOperationAction(ISD::FCOPYSIGN, VT, Custom); in X86TargetLowering()
1451 setOperationAction(ISD::FMAXIMUM, VT, Custom); in X86TargetLowering()
1452 setOperationAction(ISD::FMINIMUM, VT, Custom); in X86TargetLowering()
1492 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { in X86TargetLowering()
1493 setOperationAction(ISD::SRL, VT, Custom); in X86TargetLowering()
1494 setOperationAction(ISD::SHL, VT, Custom); in X86TargetLowering()
1495 setOperationAction(ISD::SRA, VT, Custom); in X86TargetLowering()
1496 setOperationAction(ISD::ABDS, VT, Custom); in X86TargetLowering()
1497 setOperationAction(ISD::ABDU, VT, Custom); in X86TargetLowering()
1498 if (VT == MVT::v4i64) continue; in X86TargetLowering()
1499 setOperationAction(ISD::ROTL, VT, Custom); in X86TargetLowering()
1500 setOperationAction(ISD::ROTR, VT, Custom); in X86TargetLowering()
1501 setOperationAction(ISD::FSHL, VT, Custom); in X86TargetLowering()
1502 setOperationAction(ISD::FSHR, VT, Custom); in X86TargetLowering()
1505 // These types need custom splitting if their input is a 128-bit vector. in X86TargetLowering()
1519 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { in X86TargetLowering()
1520 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); in X86TargetLowering()
1521 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); in X86TargetLowering()
1522 setOperationAction(ISD::ANY_EXTEND, VT, Custom); in X86TargetLowering()
1530 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { in X86TargetLowering()
1531 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
1532 setOperationAction(ISD::CTPOP, VT, Custom); in X86TargetLowering()
1533 setOperationAction(ISD::CTLZ, VT, Custom); in X86TargetLowering()
1534 setOperationAction(ISD::BITREVERSE, VT, Custom); in X86TargetLowering()
1538 setCondCodeAction(ISD::SETLT, VT, Custom); in X86TargetLowering()
1539 setCondCodeAction(ISD::SETLE, VT, Custom); in X86TargetLowering()
1550 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, in X86TargetLowering()
1552 setOperationAction(ISD::FMA, VT, Legal); in X86TargetLowering()
1553 setOperationAction(ISD::STRICT_FMA, VT, Legal); in X86TargetLowering()
1557 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { in X86TargetLowering()
1558 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1559 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1598 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { in X86TargetLowering()
1599 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1600 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1601 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1602 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1603 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); in X86TargetLowering()
1606 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { in X86TargetLowering()
1607 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); in X86TargetLowering()
1608 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); in X86TargetLowering()
1613 // when we have a 256bit-wide blend with immediate. in X86TargetLowering()
1628 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, in X86TargetLowering()
1630 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); in X86TargetLowering()
1631 setOperationAction(ISD::MSTORE, VT, Legal); in X86TargetLowering()
1635 // (result) is 128-bit but the source is 256-bit wide. in X86TargetLowering()
1636 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, in X86TargetLowering()
1638 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); in X86TargetLowering()
1641 // Custom lower several nodes for 256-bit types. in X86TargetLowering()
1642 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, in X86TargetLowering()
1644 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
1645 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
1646 setOperationAction(ISD::VSELECT, VT, Custom); in X86TargetLowering()
1647 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1648 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1649 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); in X86TargetLowering()
1650 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); in X86TargetLowering()
1651 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); in X86TargetLowering()
1652 setOperationAction(ISD::STORE, VT, Custom); in X86TargetLowering()
1670 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, in X86TargetLowering()
1672 setOperationAction(ISD::MGATHER, VT, Custom); in X86TargetLowering()
1678 for (MVT VT : { MVT::f16, MVT::v2f16, MVT::v4f16, MVT::v8f16 }) { in X86TargetLowering()
1679 setOperationAction(ISD::FP_ROUND, VT, Custom); in X86TargetLowering()
1680 setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); in X86TargetLowering()
1682 for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) { in X86TargetLowering()
1683 setOperationAction(ISD::FP_EXTEND, VT, Custom); in X86TargetLowering()
1684 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); in X86TargetLowering()
1693 // available with AVX512. 512-bit vectors are in a separate block controlled in X86TargetLowering()
1719 // There is no byte sized k-register load or store without AVX512DQ. in X86TargetLowering()
1732 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. in X86TargetLowering()
1733 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { in X86TargetLowering()
1734 setOperationAction(ISD::SIGN_EXTEND, VT, Custom); in X86TargetLowering()
1735 setOperationAction(ISD::ZERO_EXTEND, VT, Custom); in X86TargetLowering()
1736 setOperationAction(ISD::ANY_EXTEND, VT, Custom); in X86TargetLowering()
1739 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) in X86TargetLowering()
1740 setOperationAction(ISD::VSELECT, VT, Expand); in X86TargetLowering()
1742 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { in X86TargetLowering()
1743 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
1744 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
1745 setOperationAction(ISD::TRUNCATE, VT, Custom); in X86TargetLowering()
1747 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
1748 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); in X86TargetLowering()
1749 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1750 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); in X86TargetLowering()
1751 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
1752 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
1755 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) in X86TargetLowering()
1756 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); in X86TargetLowering()
1759 for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { in X86TargetLowering()
1760 setOperationAction(ISD::LRINT, VT, Legal); in X86TargetLowering()
1761 setOperationAction(ISD::LLRINT, VT, Legal); in X86TargetLowering()
1765 // This block controls legalization for 512-bit operations with 8/16/32/64 bit in X86TargetLowering()
1766 // elements. 512-bits can be disabled based on prefer-vector-width and in X86TargetLowering()
1767 // required-vector-width function attributes. in X86TargetLowering()
1789 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { in X86TargetLowering()
1790 setOperationAction(ISD::FMAXIMUM, VT, Custom); in X86TargetLowering()
1791 setOperationAction(ISD::FMINIMUM, VT, Custom); in X86TargetLowering()
1792 setOperationAction(ISD::FNEG, VT, Custom); in X86TargetLowering()
1793 setOperationAction(ISD::FABS, VT, Custom); in X86TargetLowering()
1794 setOperationAction(ISD::FMA, VT, Legal); in X86TargetLowering()
1795 setOperationAction(ISD::STRICT_FMA, VT, Legal); in X86TargetLowering()
1796 setOperationAction(ISD::FCOPYSIGN, VT, Custom); in X86TargetLowering()
1805 for (MVT VT : { MVT::v16i1, MVT::v16i8 }) { in X86TargetLowering()
1806 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); in X86TargetLowering()
1807 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); in X86TargetLowering()
1808 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); in X86TargetLowering()
1809 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); in X86TargetLowering()
1812 for (MVT VT : { MVT::v16i16, MVT::v16i32 }) { in X86TargetLowering()
1813 setOperationAction(ISD::FP_TO_SINT, VT, Custom); in X86TargetLowering()
1814 setOperationAction(ISD::FP_TO_UINT, VT, Custom); in X86TargetLowering()
1815 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); in X86TargetLowering()
1816 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); in X86TargetLowering()
1846 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE in X86TargetLowering()
1847 // to 512-bit rather than use the AVX2 instructions so that we can use in X86TargetLowering()
1848 // k-masks. in X86TargetLowering()
1850 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, in X86TargetLowering()
1852 setOperationAction(ISD::MLOAD, VT, Custom); in X86TargetLowering()
1853 setOperationAction(ISD::MSTORE, VT, Custom); in X86TargetLowering()
1871 // Extends from v64i1 masks to 512-bit vectors. in X86TargetLowering()
1877 for (auto VT : { MVT::v16f32, MVT::v8f64 }) { in X86TargetLowering()
1878 setOperationAction(ISD::FFLOOR, VT, Legal); in X86TargetLowering()
1879 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); in X86TargetLowering()
1880 setOperationAction(ISD::FCEIL, VT, Legal); in X86TargetLowering()
1881 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); in X86TargetLowering()
1882 setOperationAction(ISD::FTRUNC, VT, Legal); in X86TargetLowering()
1883 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); in X86TargetLowering()
1884 setOperationAction(ISD::FRINT, VT, Legal); in X86TargetLowering()
1885 setOperationAction(ISD::STRICT_FRINT, VT, Legal); in X86TargetLowering()
1886 setOperationAction(ISD::FNEARBYINT, VT, Legal); in X86TargetLowering()
1887 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); in X86TargetLowering()
1888 setOperationAction(ISD::FROUNDEVEN, VT, Legal); in X86TargetLowering()
1889 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); in X86TargetLowering()
1891 setOperationAction(ISD::FROUND, VT, Custom); in X86TargetLowering()
1894 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { in X86TargetLowering()
1895 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); in X86TargetLowering()
1896 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); in X86TargetLowering()
1921 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { in X86TargetLowering()
1922 setOperationAction(ISD::SRL, VT, Custom); in X86TargetLowering()
1923 setOperationAction(ISD::SHL, VT, Custom); in X86TargetLowering()
1924 setOperationAction(ISD::SRA, VT, Custom); in X86TargetLowering()
1925 setOperationAction(ISD::ROTL, VT, Custom); in X86TargetLowering()
1926 setOperationAction(ISD::ROTR, VT, Custom); in X86TargetLowering()
1927 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
1928 setOperationAction(ISD::ABDS, VT, Custom); in X86TargetLowering()
1929 setOperationAction(ISD::ABDU, VT, Custom); in X86TargetLowering()
1930 setOperationAction(ISD::BITREVERSE, VT, Custom); in X86TargetLowering()
1934 setCondCodeAction(ISD::SETLT, VT, Custom); in X86TargetLowering()
1935 setCondCodeAction(ISD::SETLE, VT, Custom); in X86TargetLowering()
1945 for (auto VT : { MVT::v16i32, MVT::v8i64 }) { in X86TargetLowering()
1946 setOperationAction(ISD::SMAX, VT, Legal); in X86TargetLowering()
1947 setOperationAction(ISD::UMAX, VT, Legal); in X86TargetLowering()
1948 setOperationAction(ISD::SMIN, VT, Legal); in X86TargetLowering()
1949 setOperationAction(ISD::UMIN, VT, Legal); in X86TargetLowering()
1950 setOperationAction(ISD::ABS, VT, Legal); in X86TargetLowering()
1951 setOperationAction(ISD::CTPOP, VT, Custom); in X86TargetLowering()
1954 for (auto VT : { MVT::v64i8, MVT::v32i16 }) { in X86TargetLowering()
1955 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1956 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); in X86TargetLowering()
1957 setOperationAction(ISD::CTLZ, VT, Custom); in X86TargetLowering()
1958 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1959 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1960 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1961 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1962 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1963 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1964 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1965 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom); in X86TargetLowering()
1984 // NonVLX sub-targets extend 128/256 vectors to use the 512 version. in X86TargetLowering()
1985 for (auto VT : { MVT::v16i32, MVT::v8i64} ) { in X86TargetLowering()
1986 setOperationAction(ISD::CTLZ, VT, Legal); in X86TargetLowering()
1991 for (auto VT : { MVT::v16i32, MVT::v8i64 }) in X86TargetLowering()
1992 setOperationAction(ISD::CTPOP, VT, Legal); in X86TargetLowering()
1996 // (result) is 256-bit but the source is 512-bit wide. in X86TargetLowering()
1997 // 128-bit was made Legal under AVX1. in X86TargetLowering()
1998 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, in X86TargetLowering()
2000 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); in X86TargetLowering()
2002 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, in X86TargetLowering()
2004 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); in X86TargetLowering()
2005 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); in X86TargetLowering()
2006 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
2007 setOperationAction(ISD::VSELECT, VT, Custom); in X86TargetLowering()
2008 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
2009 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
2010 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
2011 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); in X86TargetLowering()
2012 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
2022 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { in X86TargetLowering()
2023 setOperationAction(ISD::MLOAD, VT, Legal); in X86TargetLowering()
2024 setOperationAction(ISD::MSTORE, VT, Legal); in X86TargetLowering()
2025 setOperationAction(ISD::MGATHER, VT, Custom); in X86TargetLowering()
2026 setOperationAction(ISD::MSCATTER, VT, Custom); in X86TargetLowering()
2029 for (auto VT : { MVT::v64i8, MVT::v32i16 }) { in X86TargetLowering()
2030 setOperationAction(ISD::MLOAD, VT, Legal); in X86TargetLowering()
2031 setOperationAction(ISD::MSTORE, VT, Legal); in X86TargetLowering()
2039 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { in X86TargetLowering()
2040 setOperationAction(ISD::FSHL, VT, Custom); in X86TargetLowering()
2041 setOperationAction(ISD::FSHR, VT, Custom); in X86TargetLowering()
2054 for (auto VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v16i16, MVT::v8i32, in X86TargetLowering()
2056 setOperationAction(ISD::FSHL, VT, Custom); in X86TargetLowering()
2057 setOperationAction(ISD::FSHR, VT, Custom); in X86TargetLowering()
2062 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for in X86TargetLowering()
2065 // These operations are handled on non-VLX by artificially widening in in X86TargetLowering()
2085 for (auto VT : { MVT::v2i64, MVT::v4i64 }) { in X86TargetLowering()
2086 setOperationAction(ISD::SMAX, VT, Legal); in X86TargetLowering()
2087 setOperationAction(ISD::UMAX, VT, Legal); in X86TargetLowering()
2088 setOperationAction(ISD::SMIN, VT, Legal); in X86TargetLowering()
2089 setOperationAction(ISD::UMIN, VT, Legal); in X86TargetLowering()
2090 setOperationAction(ISD::ABS, VT, Legal); in X86TargetLowering()
2093 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { in X86TargetLowering()
2094 setOperationAction(ISD::ROTL, VT, Custom); in X86TargetLowering()
2095 setOperationAction(ISD::ROTR, VT, Custom); in X86TargetLowering()
2102 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, in X86TargetLowering()
2104 setOperationAction(ISD::MSCATTER, VT, Custom); in X86TargetLowering()
2118 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { in X86TargetLowering()
2119 setOperationAction(ISD::CTLZ, VT, Legal); in X86TargetLowering()
2124 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) in X86TargetLowering()
2125 setOperationAction(ISD::CTPOP, VT, Legal); in X86TargetLowering()
2135 for (auto VT : { MVT::v32i1, MVT::v64i1 }) { in X86TargetLowering()
2136 setOperationAction(ISD::VSELECT, VT, Expand); in X86TargetLowering()
2137 setOperationAction(ISD::TRUNCATE, VT, Custom); in X86TargetLowering()
2138 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
2139 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
2140 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
2141 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
2142 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
2143 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
2144 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); in X86TargetLowering()
2145 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); in X86TargetLowering()
2148 for (auto VT : { MVT::v16i1, MVT::v32i1 }) in X86TargetLowering()
2149 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); in X86TargetLowering()
2151 // Extends from v32i1 masks to 256-bit vectors. in X86TargetLowering()
2156 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { in X86TargetLowering()
2157 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); in X86TargetLowering()
2158 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); in X86TargetLowering()
2161 // These operations are handled on non-VLX by artificially widening in in X86TargetLowering()
2163 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns? in X86TargetLowering()
2166 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) in X86TargetLowering()
2167 setOperationAction(ISD::CTPOP, VT, Legal); in X86TargetLowering()
2172 auto setGroup = [&] (MVT VT) { in X86TargetLowering() argument
2173 setOperationAction(ISD::FADD, VT, Legal); in X86TargetLowering()
2174 setOperationAction(ISD::STRICT_FADD, VT, Legal); in X86TargetLowering()
2175 setOperationAction(ISD::FSUB, VT, Legal); in X86TargetLowering()
2176 setOperationAction(ISD::STRICT_FSUB, VT, Legal); in X86TargetLowering()
2177 setOperationAction(ISD::FMUL, VT, Legal); in X86TargetLowering()
2178 setOperationAction(ISD::STRICT_FMUL, VT, Legal); in X86TargetLowering()
2179 setOperationAction(ISD::FDIV, VT, Legal); in X86TargetLowering()
2180 setOperationAction(ISD::STRICT_FDIV, VT, Legal); in X86TargetLowering()
2181 setOperationAction(ISD::FSQRT, VT, Legal); in X86TargetLowering()
2182 setOperationAction(ISD::STRICT_FSQRT, VT, Legal); in X86TargetLowering()
2184 setOperationAction(ISD::FFLOOR, VT, Legal); in X86TargetLowering()
2185 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); in X86TargetLowering()
2186 setOperationAction(ISD::FCEIL, VT, Legal); in X86TargetLowering()
2187 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); in X86TargetLowering()
2188 setOperationAction(ISD::FTRUNC, VT, Legal); in X86TargetLowering()
2189 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); in X86TargetLowering()
2190 setOperationAction(ISD::FRINT, VT, Legal); in X86TargetLowering()
2191 setOperationAction(ISD::STRICT_FRINT, VT, Legal); in X86TargetLowering()
2192 setOperationAction(ISD::FNEARBYINT, VT, Legal); in X86TargetLowering()
2193 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); in X86TargetLowering()
2194 setOperationAction(ISD::FROUNDEVEN, VT, Legal); in X86TargetLowering()
2195 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); in X86TargetLowering()
2197 setOperationAction(ISD::FROUND, VT, Custom); in X86TargetLowering()
2199 setOperationAction(ISD::LOAD, VT, Legal); in X86TargetLowering()
2200 setOperationAction(ISD::STORE, VT, Legal); in X86TargetLowering()
2202 setOperationAction(ISD::FMA, VT, Legal); in X86TargetLowering()
2203 setOperationAction(ISD::STRICT_FMA, VT, Legal); in X86TargetLowering()
2204 setOperationAction(ISD::VSELECT, VT, Legal); in X86TargetLowering()
2205 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
2206 setOperationAction(ISD::SELECT, VT, Custom); in X86TargetLowering()
2208 setOperationAction(ISD::FNEG, VT, Custom); in X86TargetLowering()
2209 setOperationAction(ISD::FABS, VT, Custom); in X86TargetLowering()
2210 setOperationAction(ISD::FCOPYSIGN, VT, Custom); in X86TargetLowering()
2211 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); in X86TargetLowering()
2212 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
2214 setOperationAction(ISD::SETCC, VT, Custom); in X86TargetLowering()
2215 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); in X86TargetLowering()
2216 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); in X86TargetLowering()
2334 for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { in X86TargetLowering()
2335 setF16Action(VT, Expand); in X86TargetLowering()
2336 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); in X86TargetLowering()
2337 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); in X86TargetLowering()
2338 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); in X86TargetLowering()
2339 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); in X86TargetLowering()
2380 // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64 in X86TargetLowering()
2389 // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16 in X86TargetLowering()
2398 // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16 in X86TargetLowering()
2403 // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32 in X86TargetLowering()
2423 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't in X86TargetLowering()
2427 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better in X86TargetLowering()
2428 // than generic legalization for 64-bit multiplication-with-overflow, though. in X86TargetLowering()
2429 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { in X86TargetLowering()
2430 if (VT == MVT::i64 && !Subtarget.is64Bit()) in X86TargetLowering()
2433 setOperationAction(ISD::SADDO, VT, Custom); in X86TargetLowering()
2434 setOperationAction(ISD::UADDO, VT, Custom); in X86TargetLowering()
2435 setOperationAction(ISD::SSUBO, VT, Custom); in X86TargetLowering()
2436 setOperationAction(ISD::USUBO, VT, Custom); in X86TargetLowering()
2437 setOperationAction(ISD::SMULO, VT, Custom); in X86TargetLowering()
2438 setOperationAction(ISD::UMULO, VT, Custom); in X86TargetLowering()
2441 setOperationAction(ISD::UADDO_CARRY, VT, Custom); in X86TargetLowering()
2442 setOperationAction(ISD::USUBO_CARRY, VT, Custom); in X86TargetLowering()
2443 setOperationAction(ISD::SETCCCARRY, VT, Custom); in X86TargetLowering()
2444 setOperationAction(ISD::SADDO_CARRY, VT, Custom); in X86TargetLowering()
2445 setOperationAction(ISD::SSUBO_CARRY, VT, Custom); in X86TargetLowering()
2470 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` in X86TargetLowering()
2471 // is. We should promote the value to 64-bits to solve this. in X86TargetLowering()
2472 // This is what the CRT headers do - `fmodf` is an inline header in X86TargetLowering()
2476 // clang-format off in X86TargetLowering()
2496 // clang-format on in X86TargetLowering()
2498 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has in X86TargetLowering()
2506 // We have target-specific dag combine patterns for the following nodes: in X86TargetLowering()
2567 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores in X86TargetLowering()
2569 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores in X86TargetLowering()
2571 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores in X86TargetLowering()
2580 // Default loop alignment, which can be overridden by -align-loops. in X86TargetLowering()
2583 // An out-of-order CPU can speculatively execute past a predictable branch, in X86TargetLowering()
2591 // Default to having -disable-strictnode-mutation on in X86TargetLowering()
2595 // This has so far only been implemented for 64-bit MachO.
2614 X86TargetLowering::getPreferredVectorAction(MVT VT) const { in getPreferredVectorAction()
2615 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() && in getPreferredVectorAction()
2619 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && in getPreferredVectorAction()
2620 !Subtarget.hasF16C() && VT.getVectorElementType() == MVT::f16) in getPreferredVectorAction()
2623 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && in getPreferredVectorAction()
2624 VT.getVectorElementType() != MVT::i1) in getPreferredVectorAction()
2627 return TargetLoweringBase::getPreferredVectorAction(VT); in getPreferredVectorAction()
2636 //===----------------------------------------------------------------------===//
2638 //===----------------------------------------------------------------------===//
2650 Ld->getValueSizeInBits(0) == 128 && Ld->getAlign() < Align(16)) in mayFoldLoad()
2653 // TODO: If this is a non-temporal load and the target has an instruction in mayFoldLoad()
2666 // We can not replace a wide volatile load with a broadcast-from-memory, in mayFoldLoadIntoBroadcastFromMem()
2669 return !Ld->isVolatile() || in mayFoldLoadIntoBroadcastFromMem()
2670 Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits(); in mayFoldLoadIntoBroadcastFromMem()
2674 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); in mayFoldIntoStore()
2679 unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); in mayFoldIntoZeroExtend()
2754 int ReturnAddrIndex = FuncInfo->getRAIndex(); in getReturnAddressFrameIndex()
2758 unsigned SlotSize = RegInfo->getSlotSize(); in getReturnAddressFrameIndex()
2760 -(int64_t)SlotSize, in getReturnAddressFrameIndex()
2762 FuncInfo->setRAIndex(ReturnAddrIndex); in getReturnAddressFrameIndex()
2774 // If we don't have a symbolic displacement - we don't have any extra in isOffsetSuitableForCodeModel()
2780 // 64-bit offsets. in isOffsetSuitableForCodeModel()
2790 // For other non-large code models we assume that latest small object is 16MB in isOffsetSuitableForCodeModel()
2819 // clang-format off in TranslateIntegerX86CC()
2831 // clang-format on in TranslateIntegerX86CC()
2835 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific
2843 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) { in TranslateX86CC()
2844 // X > -1 -> X == 0, jump !sign. in TranslateX86CC()
2848 if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) { in TranslateX86CC()
2849 // X < 0 -> X == 0, jump on sign. in TranslateX86CC()
2852 if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) { in TranslateX86CC()
2853 // X >= 0 -> X == 0, jump on !sign. in TranslateX86CC()
2856 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) { in TranslateX86CC()
2857 // X < 1 -> X <= 0 in TranslateX86CC()
2892 // clang-format off in TranslateX86CC()
2893 default: llvm_unreachable("Condcode should be pre-legalized away"); in TranslateX86CC()
2914 // clang-format on in TranslateX86CC()
2937 static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) { in useVPTERNLOG() argument
2939 VT.is512BitVector(); in useVPTERNLOG()
2956 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); in getTgtMemIntrinsic()
2964 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); in getTgtMemIntrinsic()
2972 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); in getTgtMemIntrinsic()
2980 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); in getTgtMemIntrinsic()
2991 unsigned Size = I.getType()->getScalarSizeInBits(); in getTgtMemIntrinsic()
2992 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size); in getTgtMemIntrinsic()
3003 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits(); in getTgtMemIntrinsic()
3004 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size); in getTgtMemIntrinsic()
3025 unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits(); in getTgtMemIntrinsic()
3026 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), Size); in getTgtMemIntrinsic()
3036 switch (IntrData->Type) { in getTgtMemIntrinsic()
3042 MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); in getTgtMemIntrinsic() local
3044 if (IntrData->Type == TRUNCATE_TO_MEM_VI8) in getTgtMemIntrinsic()
3046 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) in getTgtMemIntrinsic()
3048 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) in getTgtMemIntrinsic()
3051 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); in getTgtMemIntrinsic()
3061 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); in getTgtMemIntrinsic()
3072 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); in getTgtMemIntrinsic()
3073 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); in getTgtMemIntrinsic()
3091 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, in isFPImmLegal() argument
3102 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"); in shouldReduceLoadWidth()
3104 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF in shouldReduceLoadWidth()
3106 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr(); in shouldReduceLoadWidth()
3109 return GA->getTargetFlags() != X86II::MO_GOTTPOFF; in shouldReduceLoadWidth()
3113 // can be store-folded. Therefore, it's probably not worth splitting the load. in shouldReduceLoadWidth()
3114 EVT VT = Load->getValueType(0); in shouldReduceLoadWidth() local
3115 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) { in shouldReduceLoadWidth()
3116 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) { in shouldReduceLoadWidth()
3122 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() || in shouldReduceLoadWidth()
3123 UI->use_begin()->getOpcode() != ISD::STORE) in shouldReduceLoadWidth()
3126 // All non-chain uses are extract + store. in shouldReduceLoadWidth()
3137 assert(Ty->isIntegerTy()); in shouldConvertConstantLoadToIntImm()
3139 unsigned BitSize = Ty->getPrimitiveSizeInBits(); in shouldConvertConstantLoadToIntImm()
3147 // a floating-point compare and we have blendv or conditional move, then it is in reduceSelectOfFPConstantLoads()
3148 // cheaper to select instead of doing a cross-register move and creating a in reduceSelectOfFPConstantLoads()
3154 bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { in convertSelectOfConstantsToMath()
3157 if (VT.isVector() && Subtarget.hasAVX512()) in convertSelectOfConstantsToMath()
3163 bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, in decomposeMulByConstant() argument
3175 // through type legalization on 32-bit targets so we would need to special in decomposeMulByConstant()
3177 while (getTypeAction(Context, VT) != TypeLegal) in decomposeMulByConstant()
3178 VT = getTypeToTransformTo(Context, VT); in decomposeMulByConstant()
3182 // most implementations, sub-vXi32 vector multiplies are always fast, in decomposeMulByConstant()
3185 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in decomposeMulByConstant()
3186 if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 && in decomposeMulByConstant()
3191 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || in decomposeMulByConstant()
3192 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); in decomposeMulByConstant()
3213 // TODO - do we have any exceptions? in shouldScalarizeBinop()
3228 bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, in shouldFormOverflowOp() argument
3231 if (VT.isVector()) in shouldFormOverflowOp()
3233 return VT.isSimple() || !isOperationExpand(Opcode, VT); in shouldFormOverflowOp()
3239 (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32); in isCheapToSpeculateCttz()
3247 bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const { in ShouldShrinkFPConstant()
3251 return !Subtarget.hasSSE2() || VT == MVT::f80; in ShouldShrinkFPConstant()
3254 bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const { in isScalarFPTypeInSSEReg()
3255 return (VT == MVT::f64 && Subtarget.hasSSE2()) || in isScalarFPTypeInSSEReg()
3256 (VT == MVT::f32 && Subtarget.hasSSE1()) || VT == MVT::f16; in isScalarFPTypeInSSEReg()
3305 EVT VT = Y.getValueType(); in hasAndNotCompare() local
3307 if (VT.isVector()) in hasAndNotCompare()
3313 // There are only 32-bit and 64-bit forms for 'andn'. in hasAndNotCompare()
3314 if (VT != MVT::i32 && VT != MVT::i64) in hasAndNotCompare()
3317 return !isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque(); in hasAndNotCompare()
3321 EVT VT = Y.getValueType(); in hasAndNot() local
3323 if (!VT.isVector()) in hasAndNot()
3328 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) in hasAndNot()
3331 if (VT == MVT::v4i32) in hasAndNot()
3360 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'. in shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd()
3365 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, in preferedOpcodeForCmpEqPiecesOfOperand() argument
3367 if (!VT.isInteger()) in preferedOpcodeForCmpEqPiecesOfOperand()
3371 if (VT.isVector()) { in preferedOpcodeForCmpEqPiecesOfOperand()
3374 PreferRotate = Subtarget.hasAVX512() && (VT.getScalarType() == MVT::i32 || in preferedOpcodeForCmpEqPiecesOfOperand()
3375 VT.getScalarType() == MVT::i64); in preferedOpcodeForCmpEqPiecesOfOperand()
3382 VT.getScalarSizeInBits() - ShiftOrRotateAmt.getZExtValue(); in preferedOpcodeForCmpEqPiecesOfOperand()
3396 if (VT.isVector()) in preferedOpcodeForCmpEqPiecesOfOperand()
3402 // at least imm32 mask (or be zext i32 -> i64). in preferedOpcodeForCmpEqPiecesOfOperand()
3403 if (VT == MVT::i64) in preferedOpcodeForCmpEqPiecesOfOperand()
3404 return AndMask->getSignificantBits() > 32 ? (unsigned)ISD::SRL in preferedOpcodeForCmpEqPiecesOfOperand()
3407 // We can only benefit if req at least 7-bit for the mask. We in preferedOpcodeForCmpEqPiecesOfOperand()
3413 if (VT == MVT::i64) in preferedOpcodeForCmpEqPiecesOfOperand()
3414 // Keep exactly 32-bit imm64, this is zext i32 -> i64 which is in preferedOpcodeForCmpEqPiecesOfOperand()
3416 return AndMask->getSignificantBits() > 33 ? (unsigned)ISD::SHL : ShiftOpc; in preferedOpcodeForCmpEqPiecesOfOperand()
3424 if (PreferRotate || !MayTransformRotate || VT.isVector()) in preferedOpcodeForCmpEqPiecesOfOperand()
3427 // Non-vector type and we have a zext mask with SRL. in preferedOpcodeForCmpEqPiecesOfOperand()
3453 return N->getOpcode() != ISD::FP_EXTEND; in preferScalarizeSplat()
3458 assert(((N->getOpcode() == ISD::SHL && in shouldFoldConstantShiftPairToMask()
3459 N->getOperand(0).getOpcode() == ISD::SRL) || in shouldFoldConstantShiftPairToMask()
3460 (N->getOpcode() == ISD::SRL && in shouldFoldConstantShiftPairToMask()
3461 N->getOperand(0).getOpcode() == ISD::SHL)) && in shouldFoldConstantShiftPairToMask()
3462 "Expected shift-shift mask"); in shouldFoldConstantShiftPairToMask()
3464 EVT VT = N->getValueType(0); in shouldFoldConstantShiftPairToMask() local
3465 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || in shouldFoldConstantShiftPairToMask()
3466 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { in shouldFoldConstantShiftPairToMask()
3467 // Only fold if the shift values are equal - so it folds to AND. in shouldFoldConstantShiftPairToMask()
3468 // TODO - we should fold if either is a non-uniform vector but we don't do in shouldFoldConstantShiftPairToMask()
3469 // the fold for non-splats yet. in shouldFoldConstantShiftPairToMask()
3470 return N->getOperand(1) == N->getOperand(0).getOperand(1); in shouldFoldConstantShiftPairToMask()
3476 EVT VT = Y.getValueType(); in shouldFoldMaskToVariableShiftPair() local
3479 if (VT.isVector()) in shouldFoldMaskToVariableShiftPair()
3482 // 64-bit shifts on 32-bit targets produce really bad bloated code. in shouldFoldMaskToVariableShiftPair()
3483 if (VT == MVT::i64 && !Subtarget.is64Bit()) in shouldFoldMaskToVariableShiftPair()
3499 bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { in shouldSplatInsEltVarIndex()
3502 return isTypeLegal(VT); in shouldSplatInsEltVarIndex()
3506 MVT VT = MVT::getIntegerVT(NumBits); in hasFastEqualityCompare() local
3507 if (isTypeLegal(VT)) in hasFastEqualityCompare()
3508 return VT; in hasFastEqualityCompare()
3518 // TODO: Allow 64-bit type for 32-bit target. in hasFastEqualityCompare()
3519 // TODO: 512-bit types should be allowed, but make sure that those in hasFastEqualityCompare()
3571 static bool isInRange(int Val, int Low, int Hi) { in isInRange() argument
3572 return (Val >= Low && Val < Hi); in isInRange()
3577 static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) { in isAnyInRange() argument
3578 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); in isAnyInRange()
3588 static bool isUndefOrInRange(int Val, int Low, int Hi) { in isUndefOrInRange() argument
3589 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); in isUndefOrInRange()
3594 static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) { in isUndefOrInRange() argument
3596 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); in isUndefOrInRange()
3601 static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { in isUndefOrZeroOrInRange() argument
3602 return isUndefOrZero(Val) || isInRange(Val, Low, Hi); in isUndefOrZeroOrInRange()
3607 static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { in isUndefOrZeroOrInRange() argument
3609 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); in isUndefOrZeroOrInRange()
3612 /// Return true if every element in Mask, is an in-place blend/select mask or is
3624 /// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
3626 unsigned Size, int Low, int Step = 1) { in isSequentialOrUndefInRange() argument
3627 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) in isSequentialOrUndefInRange()
3628 if (!isUndefOrEqual(Mask[i], Low)) in isSequentialOrUndefInRange()
3635 /// sequential range (Low, Low+Size], or is undef or is zero.
3637 unsigned Size, int Low, in isSequentialOrUndefOrZeroInRange() argument
3639 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) in isSequentialOrUndefOrZeroInRange()
3640 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) in isSequentialOrUndefOrZeroInRange()
3670 /// shuffle masks. The latter have the special property of a '-2' representing
3671 /// a zero-ed lane of a vector.
3686 // a pair of values. If we find such a case, use the non-undef mask's value. in canWidenShuffleElements()
3730 assert(!Zeroable.isZero() && "V2's non-undef elements are used?!"); in canWidenShuffleElements()
3784 // Use an UNDEF node if MaskElt == -1.
3785 // Split 64-bit constants in the 32-bit mode.
3786 static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, in getConstVector() argument
3792 MVT ConstVecVT = VT; in getConstVector()
3793 unsigned NumElts = VT.getVectorNumElements(); in getConstVector()
3795 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { in getConstVector()
3812 ConstsNode = DAG.getBitcast(VT, ConstsNode); in getConstVector()
3817 MVT VT, SelectionDAG &DAG, const SDLoc &dl) { in getConstVector() argument
3823 MVT ConstVecVT = VT; in getConstVector()
3824 unsigned NumElts = VT.getVectorNumElements(); in getConstVector()
3826 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { in getConstVector()
3838 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); in getConstVector()
3854 return DAG.getBitcast(VT, ConstsNode); in getConstVector()
3857 static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT, in getConstVector() argument
3860 return getConstVector(Bits, Undefs, VT, DAG, dl); in getConstVector()
3864 static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, in getZeroVector() argument
3866 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || in getZeroVector()
3867 VT.getVectorElementType() == MVT::i1) && in getZeroVector()
3872 // available, use a floating-point +0.0 instead. in getZeroVector()
3875 if (!Subtarget.hasSSE2() && VT.is128BitVector()) { in getZeroVector()
3877 } else if (VT.isFloatingPoint() && in getZeroVector()
3878 TLI.isTypeLegal(VT.getVectorElementType())) { in getZeroVector()
3879 Vec = DAG.getConstantFP(+0.0, dl, VT); in getZeroVector()
3880 } else if (VT.getVectorElementType() == MVT::i1) { in getZeroVector()
3881 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && in getZeroVector()
3883 Vec = DAG.getConstant(0, dl, VT); in getZeroVector()
3885 unsigned Num32BitElts = VT.getSizeInBits() / 32; in getZeroVector()
3888 return DAG.getBitcast(VT, Vec); in getZeroVector()
3916 EVT VT = Vec.getValueType(); in extractSubVector() local
3917 EVT ElVT = VT.getVectorElementType(); in extractSubVector()
3918 unsigned Factor = VT.getSizeInBits() / vectorWidth; in extractSubVector()
3920 VT.getVectorNumElements() / Factor); in extractSubVector()
3926 // This is the index of the first element of the vectorWidth-bit chunk in extractSubVector()
3928 IdxVal &= ~(ElemsPerChunk - 1); in extractSubVector()
3933 Vec->ops().slice(IdxVal, ElemsPerChunk)); in extractSubVector()
3945 /// Generate a DAG to grab 128-bits from a vector > 128 bits. This
3947 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
3949 /// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
3958 /// Generate a DAG to grab 256-bits from a 512-bit vector.
3973 EVT VT = Vec.getValueType(); in insertSubVector() local
3974 EVT ElVT = VT.getVectorElementType(); in insertSubVector()
3981 // This is the index of the first element of the vectorWidth-bit chunk in insertSubVector()
3983 IdxVal &= ~(ElemsPerChunk - 1); in insertSubVector()
3989 /// Generate a DAG to put 128-bits into a vector > 128 bits. This
3991 /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
3993 /// we want. It need not be aligned to a 128-bit boundary. That makes
4003 static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, in widenSubVector() argument
4006 assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() && in widenSubVector()
4007 Vec.getValueType().getScalarType() == VT.getScalarType() && in widenSubVector()
4009 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) in widenSubVector()
4010 : DAG.getUNDEF(VT); in widenSubVector()
4011 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, in widenSubVector()
4025 MVT VT = MVT::getVectorVT(SVT, WideNumElts); in widenSubVector() local
4026 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); in widenSubVector()
4031 static MVT widenMaskVectorType(MVT VT, const X86Subtarget &Subtarget) { in widenMaskVectorType() argument
4032 assert(VT.getVectorElementType() == MVT::i1 && "Expected bool vector"); in widenMaskVectorType()
4033 unsigned NumElts = VT.getVectorNumElements(); in widenMaskVectorType()
4036 return VT; in widenMaskVectorType()
4044 MVT VT = widenMaskVectorType(Vec.getSimpleValueType(), Subtarget); in widenMaskVector() local
4045 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); in widenMaskVector()
4055 if (N->getOpcode() == ISD::CONCAT_VECTORS) { in collectConcatOps()
4056 Ops.append(N->op_begin(), N->op_end()); in collectConcatOps()
4060 if (N->getOpcode() == ISD::INSERT_SUBVECTOR) { in collectConcatOps()
4061 SDValue Src = N->getOperand(0); in collectConcatOps()
4062 SDValue Sub = N->getOperand(1); in collectConcatOps()
4063 const APInt &Idx = N->getConstantOperandAPInt(2); in collectConcatOps()
4064 EVT VT = Src.getValueType(); in collectConcatOps() local
4067 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) { in collectConcatOps()
4074 if (Idx == (VT.getVectorNumElements() / 2)) { in collectConcatOps()
4143 EVT VT = Op.getValueType(); in splitVector() local
4144 unsigned NumElems = VT.getVectorNumElements(); in splitVector()
4145 unsigned SizeInBits = VT.getSizeInBits(); in splitVector()
4149 // If this is a splat value (with no-undefs) then use the lower subvector, in splitVector()
4162 EVT VT = Op.getValueType(); in splitVectorOp() local
4177 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in splitVectorOp()
4178 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, in splitVectorOp()
4187 // Make sure we only try to split 256/512-bit types to avoid creating in splitVectorIntUnary()
4189 [[maybe_unused]] EVT VT = Op.getValueType(); in splitVectorIntUnary() local
4192 (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); in splitVectorIntUnary()
4194 VT.getVectorNumElements() && in splitVectorIntUnary()
4204 [[maybe_unused]] EVT VT = Op.getValueType(); in splitVectorIntBinary() local
4205 assert(Op.getOperand(0).getValueType() == VT && in splitVectorIntBinary()
4206 Op.getOperand(1).getValueType() == VT && "Unexpected VTs!"); in splitVectorIntBinary()
4207 assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); in splitVectorIntBinary()
4213 // Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
4214 // 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
4215 // deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
4220 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, in SplitOpsAndApply() argument
4226 if (VT.getSizeInBits() > 512) { in SplitOpsAndApply()
4227 NumSubs = VT.getSizeInBits() / 512; in SplitOpsAndApply()
4228 assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); in SplitOpsAndApply()
4231 if (VT.getSizeInBits() > 256) { in SplitOpsAndApply()
4232 NumSubs = VT.getSizeInBits() / 256; in SplitOpsAndApply()
4233 assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); in SplitOpsAndApply()
4236 if (VT.getSizeInBits() > 128) { in SplitOpsAndApply()
4237 NumSubs = VT.getSizeInBits() / 128; in SplitOpsAndApply()
4238 assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); in SplitOpsAndApply()
4256 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); in SplitOpsAndApply()
4259 // Helper function that extends a non-512-bit vector op to 512-bits on non-VLX
4261 static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT, in getAVX512Node() argument
4265 MVT SVT = VT.getScalarType(); in getAVX512Node()
4271 // AVX512 broadcasts 32/64-bit operands. in getAVX512Node()
4272 // TODO: Support float once getAVX512Node is used by fp-ops. in getAVX512Node()
4283 if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, in getAVX512Node()
4291 bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector()); in getAVX512Node()
4293 MVT DstVT = VT; in getAVX512Node()
4304 assert(OpVT == VT && "Vector type mismatch"); in getAVX512Node()
4318 // Perform the 512-bit op then extract the bottom subvector. in getAVX512Node()
4320 Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); in getAVX512Node()
4324 /// Insert i1-subvector to i1-vector.
4394 if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems), in insert1BitVector()
4400 unsigned ShiftLeft = NumElems - SubVecNumElems; in insert1BitVector()
4401 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; in insert1BitVector()
4427 SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); in insert1BitVector()
4442 unsigned ShiftLeft = NumElems - SubVecNumElems; in insert1BitVector()
4443 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; in insert1BitVector()
4469 unsigned LowShift = NumElems - IdxVal; in insert1BitVector()
4470 SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, in insert1BitVector() local
4472 Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, in insert1BitVector()
4483 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); in insert1BitVector()
4497 EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); in concatSubVectors() local
4498 SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); in concatSubVectors()
4505 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { in getOnesVector() argument
4506 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && in getOnesVector()
4507 "Expected a 128/256/512-bit vector type"); in getOnesVector()
4508 unsigned NumElts = VT.getSizeInBits() / 32; in getOnesVector()
4510 return DAG.getBitcast(VT, Vec); in getOnesVector()
4513 static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, in getEXTEND_VECTOR_INREG() argument
4516 assert(VT.isVector() && InVT.isVector() && "Expected vector VTs."); in getEXTEND_VECTOR_INREG()
4521 // For 256-bit vectors, we only need the lower (128-bit) input half. in getEXTEND_VECTOR_INREG()
4522 // For 512-bit vectors, we only need the lower input half or quarter. in getEXTEND_VECTOR_INREG()
4524 assert(VT.getSizeInBits() == InVT.getSizeInBits() && in getEXTEND_VECTOR_INREG()
4526 unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); in getEXTEND_VECTOR_INREG()
4528 std::max(128U, (unsigned)VT.getSizeInBits() / Scale)); in getEXTEND_VECTOR_INREG()
4532 if (VT.getVectorNumElements() != InVT.getVectorNumElements()) in getEXTEND_VECTOR_INREG()
4535 return DAG.getNode(Opcode, DL, VT, In); in getEXTEND_VECTOR_INREG()
4539 static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS, in getBitSelect() argument
4541 LHS = DAG.getNode(ISD::AND, DL, VT, LHS, Mask); in getBitSelect()
4542 RHS = DAG.getNode(X86ISD::ANDNP, DL, VT, Mask, RHS); in getBitSelect()
4543 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); in getBitSelect()
4546 void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, in createUnpackShuffleMask() argument
4548 assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 && in createUnpackShuffleMask()
4551 int NumElts = VT.getVectorNumElements(); in createUnpackShuffleMask()
4552 int NumEltsInLane = 128 / VT.getScalarSizeInBits(); in createUnpackShuffleMask()
4562 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
4564 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
4565 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
4566 void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, in createSplat2ShuffleMask() argument
4569 int NumElts = VT.getVectorNumElements(); in createSplat2ShuffleMask()
4578 static SDValue getVectorShuffle(SelectionDAG &DAG, EVT VT, const SDLoc &dl, in getVectorShuffle() argument
4582 SmallVector<SDValue> Ops(Mask.size(), DAG.getUNDEF(VT.getScalarType())); in getVectorShuffle()
4592 return DAG.getBuildVector(VT, dl, Ops); in getVectorShuffle()
4595 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); in getVectorShuffle()
4599 static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, in getUnpackl() argument
4602 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false); in getUnpackl()
4603 return getVectorShuffle(DAG, VT, dl, V1, V2, Mask); in getUnpackl()
4607 static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, in getUnpackh() argument
4610 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false); in getUnpackh()
4611 return getVectorShuffle(DAG, VT, dl, V1, V2, Mask); in getUnpackh()
4618 const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS, in getPack() argument
4621 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in getPack()
4624 VT.getSizeInBits() == OpVT.getSizeInBits() && in getPack()
4630 // Rely on vector shuffles for vXi64 -> vXi32 packing. in getPack()
4634 int NumElts = VT.getVectorNumElements(); in getPack()
4641 return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, LHS), in getPack()
4642 DAG.getBitcast(VT, RHS), PackMask); in getPack()
4650 return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS); in getPack()
4654 return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS); in getPack()
4664 SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT); in getPack()
4668 return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS); in getPack()
4677 return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS); in getPack()
4681 /// This produces a shuffle where the low element of V2 is swizzled into the
4688 MVT VT = V2.getSimpleValueType(); in getShuffleVectorZeroOrUndef() local
4690 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); in getShuffleVectorZeroOrUndef()
4691 int NumElems = VT.getVectorNumElements(); in getShuffleVectorZeroOrUndef()
4694 // If this is the insertion idx, put the low elt of V2 here. in getShuffleVectorZeroOrUndef()
4696 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); in getShuffleVectorZeroOrUndef()
4706 // TODO: Add support for non-zero offsets.
4709 if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) in getTargetConstantFromBasePtr()
4711 return CNode->getConstVal(); in getTargetConstantFromBasePtr()
4717 return getTargetConstantFromBasePtr(Load->getBasePtr()); in getTargetConstantFromNode()
4741 EVT VT = Op.getValueType(); in getTargetConstantBitsFromNode() local
4742 unsigned SizeInBits = VT.getSizeInBits(); in getTargetConstantBitsFromNode()
4812 Mask = CInt->getValue(); in getTargetConstantBitsFromNode()
4816 Mask = CFP->getValueAPF().bitcastToAPInt(); in getTargetConstantBitsFromNode()
4820 Type *Ty = CDS->getType(); in getTargetConstantBitsFromNode()
4821 Mask = APInt::getZero(Ty->getPrimitiveSizeInBits()); in getTargetConstantBitsFromNode()
4822 Type *EltTy = CDS->getElementType(); in getTargetConstantBitsFromNode()
4823 bool IsInteger = EltTy->isIntegerTy(); in getTargetConstantBitsFromNode()
4825 EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy(); in getTargetConstantBitsFromNode()
4828 unsigned EltBits = EltTy->getPrimitiveSizeInBits(); in getTargetConstantBitsFromNode()
4829 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) in getTargetConstantBitsFromNode()
4831 Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits); in getTargetConstantBitsFromNode()
4833 Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(), in getTargetConstantBitsFromNode()
4850 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue()); in getTargetConstantBitsFromNode()
4855 APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); in getTargetConstantBitsFromNode()
4864 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4865 if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) { in getTargetConstantBitsFromNode()
4876 Type *CstTy = Cst->getType(); in getTargetConstantBitsFromNode()
4877 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); in getTargetConstantBitsFromNode()
4878 if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0) in getTargetConstantBitsFromNode()
4881 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4889 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], in getTargetConstantBitsFromNode()
4898 EltSizeInBits <= VT.getScalarSizeInBits()) { in getTargetConstantBitsFromNode()
4900 if (MemIntr->getMemoryVT().getStoreSizeInBits() != VT.getScalarSizeInBits()) in getTargetConstantBitsFromNode()
4903 SDValue Ptr = MemIntr->getBasePtr(); in getTargetConstantBitsFromNode()
4905 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4915 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); in getTargetConstantBitsFromNode()
4924 SDValue Ptr = MemIntr->getBasePtr(); in getTargetConstantBitsFromNode()
4928 Type *CstTy = Cst->getType(); in getTargetConstantBitsFromNode()
4929 unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); in getTargetConstantBitsFromNode()
4930 unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); in getTargetConstantBitsFromNode()
4931 if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || in getTargetConstantBitsFromNode()
4934 unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4941 if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], in getTargetConstantBitsFromNode()
4957 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4964 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); in getTargetConstantBitsFromNode()
4970 // If bitcasts to larger elements we might lose track of undefs - don't in getTargetConstantBitsFromNode()
4972 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); in getTargetConstantBitsFromNode()
4995 // TODO - support extract_subvector through bitcasts. in getTargetConstantBitsFromNode()
4996 if (EltSizeInBits != VT.getScalarSizeInBits()) in getTargetConstantBitsFromNode()
5004 unsigned NumSubElts = VT.getVectorNumElements(); in getTargetConstantBitsFromNode()
5017 // TODO - support shuffle through bitcasts. in getTargetConstantBitsFromNode()
5018 if (EltSizeInBits != VT.getScalarSizeInBits()) in getTargetConstantBitsFromNode()
5021 ArrayRef<int> Mask = SVN->getMask(); in getTargetConstantBitsFromNode()
5050 if (UndefElts1[M - NumElts]) in getTargetConstantBitsFromNode()
5052 EltBits.push_back(EltBits1[M - NumElts]); in getTargetConstantBitsFromNode()
5069 int SplatIndex = -1; in isConstantSplat()
5074 SplatIndex = -1; in isConstantSplat()
5108 // Match not(xor X, -1) -> X.
5109 // Match not(pcmpgt(C, X)) -> pcmpgt(X, C - 1).
5110 // Match not(extract_subvector(xor X, -1)) -> extract_subvector(X).
5111 // Match not(concat_vectors(xor X, -1, xor Y, -1)) -> concat_vectors(X, Y).
5135 // Don't fold min_signed_value -> (min_signed_value - 1) in IsNOT()
5139 Elt -= 1; in IsNOT()
5143 MVT VT = V.getSimpleValueType(); in IsNOT() local
5144 return DAG.getNode(X86ISD::PCMPGT, DL, VT, V.getOperand(1), in IsNOT()
5145 getConstVector(EltBits, UndefElts, VT, DAG, DL)); in IsNOT()
5162 /// A multi-stage pack shuffle mask is created by specifying NumStages > 1.
5164 static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, in createPackShuffleMask() argument
5167 unsigned NumElts = VT.getVectorNumElements(); in createPackShuffleMask()
5168 unsigned NumLanes = VT.getSizeInBits() / 128; in createPackShuffleMask()
5169 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); in createPackShuffleMask()
5171 unsigned Repetitions = 1u << (NumStages - 1); in createPackShuffleMask()
5186 static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, in getPackDemandedElts() argument
5188 int NumLanes = VT.getSizeInBits() / 128; in getPackDemandedElts()
5211 static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, in getHorizDemandedElts() argument
5213 getHorizDemandedEltsForFirstOperand(VT.getSizeInBits(), DemandedElts, in getHorizDemandedElts()
5219 /// Calculates the shuffle mask corresponding to the target-specific opcode.
5225 /// It is an error to call this with non-empty Mask/Ops vectors.
5232 MVT VT = N.getSimpleValueType(); in getTargetShuffleMask() local
5233 unsigned NumElems = VT.getVectorNumElements(); in getTargetShuffleMask()
5234 unsigned MaskEltSize = VT.getScalarSizeInBits(); in getTargetShuffleMask()
5246 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5247 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5248 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5253 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5254 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5255 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5260 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5261 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5262 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5267 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5277 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5278 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5288 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5289 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5294 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5295 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5300 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5301 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5306 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5307 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5312 assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && in getTargetShuffleMask()
5313 "Only 32-bit and 64-bit elements are supported!"); in getTargetShuffleMask()
5314 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5315 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5316 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5323 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); in getTargetShuffleMask()
5324 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5325 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5326 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5333 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); in getTargetShuffleMask()
5334 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5335 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5340 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); in getTargetShuffleMask()
5341 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5342 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5348 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5349 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5354 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5355 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5360 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5361 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5366 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5371 // We only decode broadcasts of same-sized vectors, peeking through to in getTargetShuffleMask()
5374 if (N.getOperand(0).getValueType() == VT) { in getTargetShuffleMask()
5381 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5392 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); in getTargetShuffleMask()
5393 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5394 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5404 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5405 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5412 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5413 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5417 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5418 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5419 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5424 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5425 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5426 ImmN = N.getConstantOperandVal(N.getNumOperands() - 1); in getTargetShuffleMask()
5431 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5436 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5441 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5446 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5447 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5452 unsigned CtrlImm = CtrlOp->getZExtValue(); in getTargetShuffleMask()
5463 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5464 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5474 assert(N.getOperand(1).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5487 assert(N.getOperand(0).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5488 assert(N.getOperand(2).getValueType() == VT && "Unexpected value type"); in getTargetShuffleMask()
5514 // inputs that are actually the same node. Re-map the mask to always point in getTargetShuffleMask()
5519 M -= Mask.size(); in getTargetShuffleMask()
5521 // If we didn't already add operands in the opcode-specific code, default to in getTargetShuffleMask()
5587 int Scale = Size / V->getNumOperands(); in computeZeroableShuffleElements()
5594 APInt Val = Cst->getAPIntValue(); in computeZeroableShuffleElements()
5599 APInt Val = Cst->getValueAPF().bitcastToAPInt(); in computeZeroableShuffleElements()
5610 int Scale = V->getNumOperands() / Size; in computeZeroableShuffleElements()
5638 MVT VT = N.getSimpleValueType(); in getTargetShuffleAndZeroables() local
5650 assert((VT.getSizeInBits() % Size) == 0 && in getTargetShuffleAndZeroables()
5652 unsigned EltSizeInBits = VT.getSizeInBits() / Size; in getTargetShuffleAndZeroables()
5689 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF. in getTargetShuffleAndZeroables()
5690 // TODO: We currently only set UNDEF for integer types - floats use the same in getTargetShuffleAndZeroables()
5697 if (Idx != 0 && !VT.isFloatingPoint()) in getTargetShuffleAndZeroables()
5704 // INSERT_SUBVECTOR - to widen vectors we often insert them into UNDEF in getTargetShuffleAndZeroables()
5727 assert(VT.getVectorNumElements() == (unsigned)Size && in getTargetShuffleAndZeroables()
5813 MVT VT = N.getSimpleValueType(); in getFauxShuffleMask() local
5814 unsigned NumElts = VT.getVectorNumElements(); in getFauxShuffleMask()
5815 unsigned NumSizeInBits = VT.getSizeInBits(); in getFauxShuffleMask()
5816 unsigned NumBitsPerElt = VT.getScalarSizeInBits(); in getFauxShuffleMask()
5827 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask(); in getFauxShuffleMask()
5838 // Attempt to decode as a per-byte mask. in getFauxShuffleMask()
5849 // We can't assume an undef src element gives an undef dst - the other src in getFauxShuffleMask()
5906 if (!N->isOnlyUserOf(Sub.getNode())) in getFauxShuffleMask()
5933 // Limit this to vXi64 512-bit vector cases to make the most of AVX512 in getFauxShuffleMask()
6011 // Check we have an in-range constant insertion index. in getFauxShuffleMask()
6092 getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS); in getFauxShuffleMask()
6104 // PACKSS then it was likely being used for sign-extension for a in getFauxShuffleMask()
6106 if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) && in getFauxShuffleMask()
6111 if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) && in getFauxShuffleMask()
6131 createPackShuffleMask(VT, Mask, IsUnary); in getFauxShuffleMask()
6164 Mask.append(NumElts - NumSrcElts, SM_SentinelZero); in getFauxShuffleMask()
6190 Mask[i + j] = i + j - ByteShift; in getFauxShuffleMask()
6194 Mask[i + j - ByteShift] = i + j; in getFauxShuffleMask()
6206 Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset); in getFauxShuffleMask()
6221 VT.getScalarType()) in getFauxShuffleMask()
6239 // We can only handle all-signbits extensions. in getFauxShuffleMask()
6295 M -= MaskWidth; in resolveTargetShuffleInputsAndMask()
6306 M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth); in resolveTargetShuffleInputsAndMask()
6330 EVT VT = Op.getValueType(); in getTargetShuffleInputs() local
6331 if (!VT.isSimple() || !VT.isVector()) in getTargetShuffleInputs()
6361 EVT VT = Op.getValueType(); in getTargetShuffleInputs() local
6362 if (!VT.isSimple() || !VT.isVector()) in getTargetShuffleInputs()
6372 static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, in getBROADCAST_LOAD() argument
6379 // Ensure this is a simple (non-atomic, non-voltile), temporal read memop. in getBROADCAST_LOAD()
6380 if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal()) in getBROADCAST_LOAD()
6383 SDValue Ptr = DAG.getMemBasePlusOffset(Mem->getBasePtr(), in getBROADCAST_LOAD()
6385 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in getBROADCAST_LOAD()
6386 SDValue Ops[] = {Mem->getChain(), Ptr}; in getBROADCAST_LOAD()
6390 Mem->getMemOperand(), Offset, MemVT.getStoreSize())); in getBROADCAST_LOAD()
6402 EVT VT = Op.getValueType(); in getShuffleScalarElt() local
6404 unsigned NumElems = VT.getVectorNumElements(); in getShuffleScalarElt()
6408 int Elt = SV->getMaskElt(Index); in getShuffleScalarElt()
6411 return DAG.getUNDEF(VT.getVectorElementType()); in getShuffleScalarElt()
6413 SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); in getShuffleScalarElt()
6419 MVT ShufVT = VT.getSimpleVT(); in getShuffleScalarElt()
6447 return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1); in getShuffleScalarElt()
6478 // For insert_vector_elt - either return the index matching scalar or recurse in getShuffleScalarElt()
6489 : DAG.getUNDEF(VT.getVectorElementType()); in getShuffleScalarElt()
6503 MVT VT = Op.getSimpleValueType(); in LowerBuildVectorAsInsert() local
6504 unsigned NumElts = VT.getVectorNumElements(); in LowerBuildVectorAsInsert()
6505 assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || in LowerBuildVectorAsInsert()
6506 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && in LowerBuildVectorAsInsert()
6523 V = getZeroVector(VT, Subtarget, DAG, DL); in LowerBuildVectorAsInsert()
6525 assert(0 == i && "Expected insertion into zero-index"); in LowerBuildVectorAsInsert()
6528 V = DAG.getBitcast(VT, V); in LowerBuildVectorAsInsert()
6532 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, V, Op.getOperand(i), in LowerBuildVectorAsInsert()
6548 // SSE4.1 - use PINSRB to insert each byte directly. in LowerBuildVectorv16i8()
6555 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. in LowerBuildVectorv16i8()
6556 // If both the lowest 16-bits are non-zero, then convert to MOVD. in LowerBuildVectorv16i8()
6647 MVT VT = Op.getSimpleValueType(); in LowerBuildVectorv4x32() local
6648 MVT EltVT = VT.getVectorElementType(); in LowerBuildVectorv4x32()
6653 SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); in LowerBuildVectorv4x32()
6655 return DAG.getBitcast(VT, Dup); in LowerBuildVectorv4x32()
6665 assert(Zeroable.size() - Zeroable.count() > 1 && in LowerBuildVectorv4x32()
6666 "We expect at least two non-zero elements!"); in LowerBuildVectorv4x32()
6679 // Make sure that this node is extracting from a 128-bit vector. in LowerBuildVectorv4x32()
6680 MVT VT = Elt.getOperand(0).getSimpleValueType(); in LowerBuildVectorv4x32() local
6681 if (!VT.is128BitVector()) in LowerBuildVectorv4x32()
6691 MVT VT = V1.getSimpleValueType(); in LowerBuildVectorv4x32() local
6704 Elt = Op->getOperand(EltIdx); in LowerBuildVectorv4x32()
6715 ? DAG.getUNDEF(VT) in LowerBuildVectorv4x32()
6716 : getZeroVector(VT, Subtarget, DAG, DL); in LowerBuildVectorv4x32()
6717 if (V1.getSimpleValueType() != VT) in LowerBuildVectorv4x32()
6718 V1 = DAG.getBitcast(VT, V1); in LowerBuildVectorv4x32()
6719 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask); in LowerBuildVectorv4x32()
6735 SDValue Current = Op->getOperand(i); in LowerBuildVectorv4x32()
6736 SDValue SrcVector = Current->getOperand(0); in LowerBuildVectorv4x32()
6745 assert(V1.getNode() && "Expected at least two non-zero elements!"); in LowerBuildVectorv4x32()
6758 return DAG.getBitcast(VT, Result); in LowerBuildVectorv4x32()
6762 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, in getVShift() argument
6765 assert(VT.is128BitVector() && "Unknown type for VShift"); in getVShift()
6771 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); in getVShift()
6774 static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, in LowerAsSplatVectorLoad() argument
6781 SDValue Ptr = LD->getBasePtr(); in LowerAsSplatVectorLoad()
6782 if (!ISD::isNormalLoad(LD) || !LD->isSimple()) in LowerAsSplatVectorLoad()
6784 EVT PVT = LD->getValueType(0); in LowerAsSplatVectorLoad()
6788 int FI = -1; in LowerAsSplatVectorLoad()
6791 FI = FINode->getIndex(); in LowerAsSplatVectorLoad()
6795 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); in LowerAsSplatVectorLoad()
6802 // FIXME: 256-bit vector instructions don't require a strict alignment, in LowerAsSplatVectorLoad()
6804 Align RequiredAlign(VT.getSizeInBits() / 8); in LowerAsSplatVectorLoad()
6805 SDValue Chain = LD->getChain(); in LowerAsSplatVectorLoad()
6826 int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1); in LowerAsSplatVectorLoad()
6833 int EltNo = (Offset - StartOffset) >> 2; in LowerAsSplatVectorLoad()
6834 unsigned NumElems = VT.getVectorNumElements(); in LowerAsSplatVectorLoad()
6838 LD->getPointerInfo().getWithOffset(StartOffset)); in LowerAsSplatVectorLoad()
6852 if (!BaseLd->isSimple()) in findEltLoadSrc()
6866 uint64_t Amt = AmtC->getZExtValue(); in findEltLoadSrc()
6880 uint64_t Idx = IdxC->getZExtValue(); in findEltLoadSrc()
6891 /// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6895 /// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6896 static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, in EltsFromConsecutiveLoads() argument
6900 if ((VT.getScalarSizeInBits() % 8) != 0) in EltsFromConsecutiveLoads()
6905 int LastLoadedElt = -1; in EltsFromConsecutiveLoads()
6931 if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) in EltsFromConsecutiveLoads()
6936 unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); in EltsFromConsecutiveLoads()
6947 // Handle Special Cases - all undef or undef/zero. in EltsFromConsecutiveLoads()
6949 return DAG.getUNDEF(VT); in EltsFromConsecutiveLoads()
6951 return VT.isInteger() ? DAG.getConstant(0, DL, VT) in EltsFromConsecutiveLoads()
6952 : DAG.getConstantFP(0.0, DL, VT); in EltsFromConsecutiveLoads()
6964 int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt); in EltsFromConsecutiveLoads()
6966 assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); in EltsFromConsecutiveLoads()
6978 int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); in EltsFromConsecutiveLoads()
6983 EltIdx - FirstLoadedElt); in EltsFromConsecutiveLoads()
7003 auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { in EltsFromConsecutiveLoads() argument
7004 auto MMOFlags = LDBase->getMemOperand()->getFlags(); in EltsFromConsecutiveLoads()
7005 assert(LDBase->isSimple() && in EltsFromConsecutiveLoads()
7008 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), in EltsFromConsecutiveLoads()
7009 LDBase->getPointerInfo(), LDBase->getOriginalAlign(), in EltsFromConsecutiveLoads()
7018 bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable( in EltsFromConsecutiveLoads()
7019 VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout()); in EltsFromConsecutiveLoads()
7021 // LOAD - all consecutive load/undefs (must start/end with a load or be in EltsFromConsecutiveLoads()
7029 if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) in EltsFromConsecutiveLoads()
7032 // Don't create 256-bit non-temporal aligned loads without AVX2 as these in EltsFromConsecutiveLoads()
7034 if (LDBase->isNonTemporal() && LDBase->getAlign() >= Align(32) && in EltsFromConsecutiveLoads()
7035 VT.is256BitVector() && !Subtarget.hasInt256()) in EltsFromConsecutiveLoads()
7039 return DAG.getBitcast(VT, Elts[FirstLoadedElt]); in EltsFromConsecutiveLoads()
7042 return CreateLoad(VT, LDBase); in EltsFromConsecutiveLoads()
7044 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded in EltsFromConsecutiveLoads()
7046 if (!IsAfterLegalize && VT.isVector()) { in EltsFromConsecutiveLoads()
7047 unsigned NumMaskElts = VT.getVectorNumElements(); in EltsFromConsecutiveLoads()
7050 SmallVector<int, 4> ClearMask(NumMaskElts, -1); in EltsFromConsecutiveLoads()
7058 SDValue V = CreateLoad(VT, LDBase); in EltsFromConsecutiveLoads()
7059 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) in EltsFromConsecutiveLoads()
7060 : DAG.getConstantFP(0.0, DL, VT); in EltsFromConsecutiveLoads()
7061 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); in EltsFromConsecutiveLoads()
7067 if (VT.is256BitVector() || VT.is512BitVector()) { in EltsFromConsecutiveLoads()
7071 EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems); in EltsFromConsecutiveLoads()
7076 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), in EltsFromConsecutiveLoads()
7081 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs. in EltsFromConsecutiveLoads()
7085 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) { in EltsFromConsecutiveLoads()
7086 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits) in EltsFromConsecutiveLoads()
7088 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits); in EltsFromConsecutiveLoads()
7090 // FIXME: Add more isel patterns so we can just use VT directly. in EltsFromConsecutiveLoads()
7091 if (!Subtarget.hasSSE2() && VT == MVT::v4f32) in EltsFromConsecutiveLoads()
7095 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; in EltsFromConsecutiveLoads()
7097 X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), in EltsFromConsecutiveLoads()
7098 LDBase->getOriginalAlign(), MachineMemOperand::MOLoad); in EltsFromConsecutiveLoads()
7102 return DAG.getBitcast(VT, ResNode); in EltsFromConsecutiveLoads()
7106 // BROADCAST - match the smallest possible repetition pattern, load that in EltsFromConsecutiveLoads()
7109 (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) { in EltsFromConsecutiveLoads()
7116 // Don't attempt a 1:N subvector broadcast - it should be caught by in EltsFromConsecutiveLoads()
7140 VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64)) in EltsFromConsecutiveLoads()
7148 VT.getSizeInBits() / ScalarSize); in EltsFromConsecutiveLoads()
7154 while (Broadcast.getValueSizeInBits() < VT.getSizeInBits()) in EltsFromConsecutiveLoads()
7166 return DAG.getBitcast(VT, Broadcast); in EltsFromConsecutiveLoads()
7177 // are consecutive, non-overlapping, and in the right order.
7178 static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, in combineToConsecutiveLoads() argument
7183 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { in combineToConsecutiveLoads()
7190 assert(Elts.size() == VT.getVectorNumElements()); in combineToConsecutiveLoads()
7191 return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget, in combineToConsecutiveLoads()
7195 static Constant *getConstantVector(MVT VT, ArrayRef<APInt> Bits, in getConstantVector() argument
7197 unsigned ScalarSize = VT.getScalarSizeInBits(); in getConstantVector()
7198 Type *Ty = EVT(VT.getScalarType()).getTypeForEVT(C); in getConstantVector()
7200 auto getConstantScalar = [&](const APInt &Val) -> Constant * { in getConstantVector()
7201 if (VT.isFloatingPoint()) { in getConstantVector()
7220 static Constant *getConstantVector(MVT VT, const APInt &SplatValue, in getConstantVector() argument
7222 unsigned ScalarSize = VT.getScalarSizeInBits(); in getConstantVector()
7224 auto getConstantScalar = [&](const APInt &Val) -> Constant * { in getConstantVector()
7225 if (VT.isFloatingPoint()) { in getConstantVector()
7249 for (auto *U : N->uses()) { in isFoldableUseOfShuffle()
7250 unsigned Opc = U->getOpcode(); in isFoldableUseOfShuffle()
7252 if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N) in isFoldableUseOfShuffle()
7254 if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N) in isFoldableUseOfShuffle()
7260 if (N->hasOneUse()) { in isFoldableUseOfShuffle()
7263 if (Opc == X86ISD::VPDPBUSD && U->getOperand(2).getNode() != N) in isFoldableUseOfShuffle()
7283 // TODO: Splats could be generated for non-AVX CPUs using SSE in lowerBuildVectorAsBroadcast()
7284 // instructions, but there's less potential gain for only 128-bit vectors. in lowerBuildVectorAsBroadcast()
7288 MVT VT = BVOp->getSimpleValueType(0); in lowerBuildVectorAsBroadcast() local
7289 unsigned NumElts = VT.getVectorNumElements(); in lowerBuildVectorAsBroadcast()
7291 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && in lowerBuildVectorAsBroadcast()
7298 if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) { in lowerBuildVectorAsBroadcast()
7325 MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen); in lowerBuildVectorAsBroadcast()
7329 if (!VT.is512BitVector() && !Subtarget.hasVLX()) { in lowerBuildVectorAsBroadcast()
7330 unsigned Scale = 512 / VT.getSizeInBits(); in lowerBuildVectorAsBroadcast()
7334 if (BcstVT.getSizeInBits() != VT.getSizeInBits()) in lowerBuildVectorAsBroadcast()
7335 Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits()); in lowerBuildVectorAsBroadcast()
7336 return DAG.getBitcast(VT, Bcst); in lowerBuildVectorAsBroadcast()
7342 if (!Ld || (NumElts - NumUndefElts) <= 1) { in lowerBuildVectorAsBroadcast()
7347 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) && in lowerBuildVectorAsBroadcast()
7348 SplatBitSize > VT.getScalarSizeInBits() && in lowerBuildVectorAsBroadcast()
7349 SplatBitSize < VT.getSizeInBits()) { in lowerBuildVectorAsBroadcast()
7361 Constant *C = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx); in lowerBuildVectorAsBroadcast()
7363 unsigned Repeat = VT.getSizeInBits() / SplatBitSize; in lowerBuildVectorAsBroadcast()
7365 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); in lowerBuildVectorAsBroadcast()
7373 return DAG.getBitcast(VT, Brdcst); in lowerBuildVectorAsBroadcast()
7377 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx); in lowerBuildVectorAsBroadcast()
7379 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); in lowerBuildVectorAsBroadcast()
7380 MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm); in lowerBuildVectorAsBroadcast()
7381 Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign(); in lowerBuildVectorAsBroadcast()
7382 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in lowerBuildVectorAsBroadcast()
7397 if (!Ld || NumElts - NumUndefElts != 1) in lowerBuildVectorAsBroadcast()
7408 // TODO: Handle broadcasts of non-constant sequences. in lowerBuildVectorAsBroadcast()
7410 // Make sure that all of the users of a non-constant load are from the in lowerBuildVectorAsBroadcast()
7412 // FIXME: Is the use count needed for non-constant, non-load case? in lowerBuildVectorAsBroadcast()
7413 if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) in lowerBuildVectorAsBroadcast()
7417 bool IsGE256 = (VT.getSizeInBits() >= 256); in lowerBuildVectorAsBroadcast()
7431 // TODO: Check if splatting is recommended for other AVX-capable CPUs. in lowerBuildVectorAsBroadcast()
7446 C = CI->getConstantIntValue(); in lowerBuildVectorAsBroadcast()
7448 C = CF->getConstantFPValue(); in lowerBuildVectorAsBroadcast()
7454 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); in lowerBuildVectorAsBroadcast()
7456 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in lowerBuildVectorAsBroadcast()
7465 // Handle AVX2 in-register broadcasts. in lowerBuildVectorAsBroadcast()
7468 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); in lowerBuildVectorAsBroadcast()
7474 // Make sure the non-chain result is only used by this build vector. in lowerBuildVectorAsBroadcast()
7475 if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) in lowerBuildVectorAsBroadcast()
7481 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in lowerBuildVectorAsBroadcast()
7482 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; in lowerBuildVectorAsBroadcast()
7485 LN->getMemoryVT(), LN->getMemOperand()); in lowerBuildVectorAsBroadcast()
7490 // The integer check is needed for the 64-bit into 128-bit so it doesn't match in lowerBuildVectorAsBroadcast()
7495 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in lowerBuildVectorAsBroadcast()
7496 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; in lowerBuildVectorAsBroadcast()
7499 LN->getMemoryVT(), LN->getMemOperand()); in lowerBuildVectorAsBroadcast()
7505 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); in lowerBuildVectorAsBroadcast()
7518 int Idx = ExtIdx->getAsZExtVal(); in getUnderlyingExtractedFromVec()
7522 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already in getUnderlyingExtractedFromVec()
7533 SDValue ShuffleVec = SVOp->getOperand(0); in getUnderlyingExtractedFromVec()
7538 int ShuffleIdx = SVOp->getMaskElt(Idx); in getUnderlyingExtractedFromVec()
7548 MVT VT = Op.getSimpleValueType(); in buildFromShuffleMostly() local
7552 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) in buildFromShuffleMostly()
7559 SmallVector<int, 8> Mask(NumElems, -1); in buildFromShuffleMostly()
7579 // Quit if non-constant index. in buildFromShuffleMostly()
7585 if (ExtractedFromVec.getValueType() != VT) in buildFromShuffleMostly()
7607 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); in buildFromShuffleMostly()
7608 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); in buildFromShuffleMostly()
7611 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), in buildFromShuffleMostly()
7620 MVT VT = Op.getSimpleValueType(); in LowerBUILD_VECTORvXbf16() local
7622 VT.changeVectorElementType(Subtarget.hasFP16() ? MVT::f16 : MVT::i16); in LowerBUILD_VECTORvXbf16()
7628 return DAG.getBitcast(VT, Res); in LowerBUILD_VECTORvXbf16()
7636 MVT VT = Op.getSimpleValueType(); in LowerBUILD_VECTORvXi1() local
7637 assert((VT.getVectorElementType() == MVT::i1) && in LowerBUILD_VECTORvXi1()
7647 int SplatIdx = -1; in LowerBUILD_VECTORvXi1()
7653 Immediate |= (InC->getZExtValue() & 0x1) << idx; in LowerBUILD_VECTORvXi1()
7664 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" in LowerBUILD_VECTORvXi1()
7671 assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); in LowerBUILD_VECTORvXi1()
7677 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { in LowerBUILD_VECTORvXi1()
7684 MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); in LowerBUILD_VECTORvXi1()
7688 MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; in LowerBUILD_VECTORvXi1()
7690 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select, in LowerBUILD_VECTORvXi1()
7698 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { in LowerBUILD_VECTORvXi1()
7705 MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); in LowerBUILD_VECTORvXi1()
7707 MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; in LowerBUILD_VECTORvXi1()
7709 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, in LowerBUILD_VECTORvXi1()
7713 DstVec = DAG.getUNDEF(VT); in LowerBUILD_VECTORvXi1()
7716 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, in LowerBUILD_VECTORvXi1()
7738 /// 128-bit partial horizontal operation on a 256-bit vector, but that operation
7739 /// may not match the layout of an x86 256-bit horizontal instruction.
7753 /// horizontal operations, but the index-matching logic is incorrect for that.
7755 /// code because it is only used for partial h-op matching now?
7760 EVT VT = N->getValueType(0); in isHorizontalBinOpPart() local
7761 assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); in isHorizontalBinOpPart()
7763 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && in isHorizontalBinOpPart()
7769 unsigned NumElts = LastIdx - BaseIdx; in isHorizontalBinOpPart()
7770 V0 = DAG.getUNDEF(VT); in isHorizontalBinOpPart()
7771 V1 = DAG.getUNDEF(VT); in isHorizontalBinOpPart()
7775 SDValue Op = N->getOperand(i + BaseIdx); in isHorizontalBinOpPart()
7778 if (Op->isUndef()) { in isHorizontalBinOpPart()
7786 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); in isHorizontalBinOpPart()
7810 if (V0.getValueType() != VT) in isHorizontalBinOpPart()
7816 if (V1.getValueType() != VT) in isHorizontalBinOpPart()
7839 /// Emit a sequence of two 128-bit horizontal add/sub followed by
7843 /// This function expects two 256-bit vectors called V0 and V1.
7844 /// At first, each vector is split into two separate 128-bit vectors.
7845 /// Then, the resulting 128-bit vectors are used to implement two
7850 /// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7853 /// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7854 /// horizontal binop dag node would take as input the lower 128-bit of V1
7855 /// and the upper 128-bit of V1.
7861 /// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7862 /// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7868 /// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7869 /// the upper 128-bits of the result.
7874 MVT VT = V0.getSimpleValueType(); in ExpandHorizontalBinOp() local
7875 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() && in ExpandHorizontalBinOp()
7878 unsigned NumElts = VT.getVectorNumElements(); in ExpandHorizontalBinOp()
7890 if (!isUndefLO && !V0->isUndef()) in ExpandHorizontalBinOp()
7892 if (!isUndefHI && !V1->isUndef()) in ExpandHorizontalBinOp()
7896 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef())) in ExpandHorizontalBinOp()
7899 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef())) in ExpandHorizontalBinOp()
7903 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); in ExpandHorizontalBinOp()
7908 /// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1
7909 /// (SUBADD = Opnd0 -+ Opnd1) operation are written to the parameters
7917 MVT VT = BV->getSimpleValueType(0); in isAddSubOrSubAdd() local
7918 if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) in isAddSubOrSubAdd()
7921 unsigned NumElts = VT.getVectorNumElements(); in isAddSubOrSubAdd()
7922 SDValue InVec0 = DAG.getUNDEF(VT); in isAddSubOrSubAdd()
7923 SDValue InVec1 = DAG.getUNDEF(VT); in isAddSubOrSubAdd()
7927 // Odd-numbered elements in the input build vector are obtained from in isAddSubOrSubAdd()
7929 // Even-numbered elements in the input build vector are obtained from in isAddSubOrSubAdd()
7933 SDValue Op = BV->getOperand(i); in isAddSubOrSubAdd()
7969 if (InVec0.getSimpleValueType() != VT) in isAddSubOrSubAdd()
7974 if (InVec1.getSimpleValueType() != VT) in isAddSubOrSubAdd()
8036 /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
8043 !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) in isFMAddSubOrFMSubAdd()
8077 MVT VT = BV->getSimpleValueType(0); in lowerToAddSubOrFMAddSub() local
8083 return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); in lowerToAddSubOrFMAddSub()
8090 // There are no known X86 targets with 512-bit ADDSUB instructions! in lowerToAddSubOrFMAddSub()
8092 if (VT.is512BitVector()) { in lowerToAddSubOrFMAddSub()
8094 for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) { in lowerToAddSubOrFMAddSub()
8098 SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1); in lowerToAddSubOrFMAddSub()
8099 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1); in lowerToAddSubOrFMAddSub()
8100 return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask); in lowerToAddSubOrFMAddSub()
8103 return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); in lowerToAddSubOrFMAddSub()
8109 MVT VT = BV->getSimpleValueType(0); in isHopBuildVector() local
8111 V0 = DAG.getUNDEF(VT); in isHopBuildVector()
8112 V1 = DAG.getUNDEF(VT); in isHopBuildVector()
8114 // x86 256-bit horizontal ops are defined in a non-obvious way. Each 128-bit in isHopBuildVector()
8115 // half of the result is calculated independently from the 128-bit halves of in isHopBuildVector()
8116 // the inputs, so that makes the index-checking logic below more complicated. in isHopBuildVector()
8117 unsigned NumElts = VT.getVectorNumElements(); in isHopBuildVector()
8119 unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1; in isHopBuildVector()
8125 SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j); in isHopBuildVector()
8137 // clang-format off in isHopBuildVector()
8143 // clang-format on in isHopBuildVector()
8156 // The source vector is chosen based on which 64-bit half of the in isHopBuildVector()
8199 // extract/insert the low bits to the correct size. in getHopForBuildVector()
8200 // This is free (examples: zmm --> xmm, xmm --> ymm). in getHopForBuildVector()
8201 MVT VT = BV->getSimpleValueType(0); in getHopForBuildVector() local
8202 unsigned Width = VT.getSizeInBits(); in getHopForBuildVector()
8206 V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, DL, Width); in getHopForBuildVector()
8211 V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, DL, Width); in getHopForBuildVector()
8213 unsigned NumElts = VT.getVectorNumElements(); in getHopForBuildVector()
8216 if (BV->getOperand(i).isUndef()) in getHopForBuildVector()
8221 if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { in getHopForBuildVector()
8222 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in getHopForBuildVector()
8226 return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, DL, 256); in getHopForBuildVector()
8229 return DAG.getNode(HOpcode, DL, VT, V0, V1); in getHopForBuildVector()
8236 // We need at least 2 non-undef elements to make this worthwhile by default. in LowerToHorizontalOp()
8238 count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); in LowerToHorizontalOp()
8243 // int/FP at 128-bit/256-bit. Each type was introduced with a different in LowerToHorizontalOp()
8245 MVT VT = BV->getSimpleValueType(0); in LowerToHorizontalOp() local
8246 if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || in LowerToHorizontalOp()
8247 ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || in LowerToHorizontalOp()
8248 ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || in LowerToHorizontalOp()
8249 ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { in LowerToHorizontalOp()
8256 // Try harder to match 256-bit ops by using extract/concat. in LowerToHorizontalOp()
8257 if (!Subtarget.hasAVX() || !VT.is256BitVector()) in LowerToHorizontalOp()
8261 unsigned NumElts = VT.getVectorNumElements(); in LowerToHorizontalOp()
8266 if (BV->getOperand(i)->isUndef()) in LowerToHorizontalOp()
8270 if (BV->getOperand(i)->isUndef()) in LowerToHorizontalOp()
8274 if (VT == MVT::v8i32 || VT == MVT::v16i16) { in LowerToHorizontalOp()
8306 assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); in LowerToHorizontalOp()
8314 if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || in LowerToHorizontalOp()
8315 VT == MVT::v16i16) { in LowerToHorizontalOp()
8360 MVT VT = Op->getSimpleValueType(0); in lowerBuildVectorToBitOp() local
8361 unsigned NumElems = VT.getVectorNumElements(); in lowerBuildVectorToBitOp()
8366 unsigned Opcode = Op->getOperand(0).getOpcode(); in lowerBuildVectorToBitOp()
8368 if (Opcode != Op->getOperand(i).getOpcode()) in lowerBuildVectorToBitOp()
8384 // Don't do this if the buildvector is a splat - we'd replace one in lowerBuildVectorToBitOp()
8386 if (Op->getSplatValue()) in lowerBuildVectorToBitOp()
8388 if (!TLI.isOperationLegalOrPromote(Opcode, VT)) in lowerBuildVectorToBitOp()
8394 for (SDValue Elt : Op->ops()) { in lowerBuildVectorToBitOp()
8403 if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) { in lowerBuildVectorToBitOp()
8406 RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType()); in lowerBuildVectorToBitOp()
8415 // TODO: Permit non-uniform XOP/AVX2/MULLO cases? in lowerBuildVectorToBitOp()
8419 SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); in lowerBuildVectorToBitOp()
8420 SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); in lowerBuildVectorToBitOp()
8421 SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); in lowerBuildVectorToBitOp()
8437 MVT VT = Op.getSimpleValueType(); in materializeVectorConstant() local
8443 // Vectors containing all ones can be matched by pcmpeqd on 128-bit width in materializeVectorConstant()
8444 // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use in materializeVectorConstant()
8445 // vpcmpeqd on 256-bit vectors. in materializeVectorConstant()
8447 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) in materializeVectorConstant()
8450 return getOnesVector(VT, DAG, DL); in materializeVectorConstant()
8459 static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, in createVariablePermute() argument
8462 MVT ShuffleVT = VT; in createVariablePermute()
8463 EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); in createVariablePermute()
8464 unsigned NumElts = VT.getVectorNumElements(); in createVariablePermute()
8465 unsigned SizeInBits = VT.getSizeInBits(); in createVariablePermute()
8467 // Adjust IndicesVec to match VT size. in createVariablePermute()
8474 NumElts * VT.getScalarSizeInBits()); in createVariablePermute()
8478 // Zero-extend the index elements within the vector. in createVariablePermute()
8485 // Handle SrcVec that don't match VT type. in createVariablePermute()
8490 VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts); in createVariablePermute()
8491 IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); in createVariablePermute()
8495 createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); in createVariablePermute()
8500 // Widen smaller SrcVec to match VT. in createVariablePermute()
8501 SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); in createVariablePermute()
8515 // e.g. v4i32 -> v16i8 (Scale = 4) in createVariablePermute()
8531 switch (VT.SimpleTy) { in createVariablePermute()
8564 // SSE41 can compare v2i64 - select between indices 0 and 1. in createVariablePermute()
8568 DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}), in createVariablePermute()
8569 DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}), in createVariablePermute()
8582 ISD::CONCAT_VECTORS, DL, VT, in createVariablePermute()
8588 SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo); in createVariablePermute()
8589 SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi); in createVariablePermute()
8596 EVT VT = Idx.getValueType(); in createVariablePermute() local
8597 return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT), in createVariablePermute()
8598 DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx), in createVariablePermute()
8599 DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx), in createVariablePermute()
8614 VT, createVariablePermute( in createVariablePermute()
8631 VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, in createVariablePermute()
8640 return DAG.getBitcast(VT, Res); in createVariablePermute()
8647 MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8); in createVariablePermute()
8667 VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, in createVariablePermute()
8676 return DAG.getBitcast(VT, Res); in createVariablePermute()
8698 assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && in createVariablePermute()
8699 (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && in createVariablePermute()
8702 uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); in createVariablePermute()
8713 return DAG.getBitcast(VT, Res); in createVariablePermute()
8716 // Tries to lower a BUILD_VECTOR composed of extract-extract chains that can be
8717 // reasoned to be a permutation of a vector by indices in a non-constant vector.
8721 // ->
8726 // construction of vectors with constant-0 elements.
8733 // This is done by checking that the i-th build_vector operand is of the form: in LowerBUILD_VECTORAsVariablePermute()
8747 SDValue ExtractedIndex = Op->getOperand(1); in LowerBUILD_VECTORAsVariablePermute()
8764 if (!PermIdx || PermIdx->getAPIntValue() != Idx) in LowerBUILD_VECTORAsVariablePermute()
8768 MVT VT = V.getSimpleValueType(); in LowerBUILD_VECTORAsVariablePermute() local
8769 return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); in LowerBUILD_VECTORAsVariablePermute()
8776 MVT VT = Op.getSimpleValueType(); in LowerBUILD_VECTOR() local
8777 MVT EltVT = VT.getVectorElementType(); in LowerBUILD_VECTOR()
8782 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) in LowerBUILD_VECTOR()
8785 if (VT.getVectorElementType() == MVT::bf16 && in LowerBUILD_VECTOR()
8808 OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse(); in LowerBUILD_VECTOR()
8815 NumConstants--; in LowerBUILD_VECTOR()
8826 return DAG.getUNDEF(VT); in LowerBUILD_VECTOR()
8830 return DAG.getFreeze(DAG.getUNDEF(VT)); in LowerBUILD_VECTOR()
8834 return getZeroVector(VT, Subtarget, DAG, dl); in LowerBUILD_VECTOR()
8836 // If we have multiple FREEZE-UNDEF operands, we are likely going to end up in LowerBUILD_VECTOR()
8838 // our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR, in LowerBUILD_VECTOR()
8839 // and blend the FREEZE-UNDEF operands back in. in LowerBUILD_VECTOR()
8840 // FIXME: is this worthwhile even for a single FREEZE-UNDEF operand? in LowerBUILD_VECTOR()
8843 SmallVector<int, 16> BlendMask(NumElems, -1); in LowerBUILD_VECTOR()
8847 BlendMask[i] = -1; in LowerBUILD_VECTOR()
8856 SDValue EltsBV = DAG.getBuildVector(VT, dl, Elts); in LowerBUILD_VECTOR()
8858 SDValue FrozenUndefBV = DAG.getSplatBuildVector(VT, dl, FrozenUndefElt); in LowerBUILD_VECTOR()
8859 return DAG.getVectorShuffle(VT, dl, EltsBV, FrozenUndefBV, BlendMask); in LowerBUILD_VECTOR()
8867 if ((VT.is256BitVector() || VT.is512BitVector()) && in LowerBUILD_VECTOR()
8873 if (VT.is512BitVector() && in LowerBUILD_VECTOR()
8874 NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4))) in LowerBUILD_VECTOR()
8875 UpperElems = NumElems - (NumElems / 4); in LowerBUILD_VECTOR()
8878 MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems); in LowerBUILD_VECTOR()
8880 DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems)); in LowerBUILD_VECTOR()
8881 return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl); in LowerBUILD_VECTOR()
8897 // If we are inserting one variable into a vector of non-zero constants, try in LowerBUILD_VECTOR()
8901 // constants. Insertion into a zero vector is handled as a special-case in LowerBUILD_VECTOR()
8903 if (NumConstants == NumElems - 1 && NumNonZero != 1 && in LowerBUILD_VECTOR()
8905 (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || in LowerBUILD_VECTOR()
8906 isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { in LowerBUILD_VECTOR()
8907 // Create an all-constant vector. The variable element in the old in LowerBUILD_VECTOR()
8918 ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); in LowerBUILD_VECTOR()
8920 ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); in LowerBUILD_VECTOR()
8929 SDValue DAGConstVec = DAG.getConstantPool(CV, VT); in LowerBUILD_VECTOR()
8940 SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); in LowerBUILD_VECTOR()
8941 unsigned InsertC = InsIndex->getAsZExtVal(); in LowerBUILD_VECTOR()
8942 unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); in LowerBUILD_VECTOR()
8944 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); in LowerBUILD_VECTOR()
8946 // There's no good way to insert into the high elements of a >128-bit in LowerBUILD_VECTOR()
8948 assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); in LowerBUILD_VECTOR()
8949 assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); in LowerBUILD_VECTOR()
8951 unsigned NumElts = VT.getVectorNumElements(); in LowerBUILD_VECTOR()
8954 SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); in LowerBUILD_VECTOR()
8955 return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); in LowerBUILD_VECTOR()
8958 // Special case for single non-zero, non-undef, element. in LowerBUILD_VECTOR()
8963 // If we have a constant or non-constant insertion into the low element of in LowerBUILD_VECTOR()
8969 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); in LowerBUILD_VECTOR()
8974 assert((VT.is128BitVector() || VT.is256BitVector() || in LowerBUILD_VECTOR()
8975 VT.is512BitVector()) && in LowerBUILD_VECTOR()
8977 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); in LowerBUILD_VECTOR()
8987 MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); in LowerBUILD_VECTOR()
8990 return DAG.getBitcast(VT, Item); in LowerBUILD_VECTOR()
8998 unsigned NumBits = VT.getSizeInBits(); in LowerBUILD_VECTOR()
8999 return getVShift(true, VT, in LowerBUILD_VECTOR()
9001 VT, Op.getOperand(1)), in LowerBUILD_VECTOR()
9009 // is a non-constant being inserted into an element other than the low one, in LowerBUILD_VECTOR()
9011 // movd/movss) to move this into the low element, then shuffle it into in LowerBUILD_VECTOR()
9014 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); in LowerBUILD_VECTOR()
9028 if (Op.getNode()->isOnlyUserOf(Item.getNode())) in LowerBUILD_VECTOR()
9029 return LowerAsSplatVectorLoad(Item, VT, dl, DAG); in LowerBUILD_VECTOR()
9035 // handled, so this is best done with a single constant-pool load. in LowerBUILD_VECTOR()
9044 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); in LowerBUILD_VECTOR()
9046 EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) in LowerBUILD_VECTOR()
9050 // If this is a splat of pairs of 32-bit elements, we can use a narrower in LowerBUILD_VECTOR()
9064 MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; in LowerBUILD_VECTOR()
9069 // Broadcast from v2i64/v2f64 and cast to final VT. in LowerBUILD_VECTOR()
9071 return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, in LowerBUILD_VECTOR()
9076 // For AVX-length vectors, build the individual 128-bit pieces and use in LowerBUILD_VECTOR()
9078 if (VT.getSizeInBits() > 128) { in LowerBUILD_VECTOR()
9083 DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); in LowerBUILD_VECTOR()
9085 HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); in LowerBUILD_VECTOR()
9091 // Let legalizer expand 2-wide build_vectors. in LowerBUILD_VECTOR()
9096 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, in LowerBUILD_VECTOR()
9103 // If element VT is < 32 bits, convert it to inserts into a zero vector. in LowerBUILD_VECTOR()
9114 // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS in LowerBUILD_VECTOR()
9119 // If element VT is == 32 bits, turn it into a number of shuffles. in LowerBUILD_VECTOR()
9125 Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); in LowerBUILD_VECTOR()
9127 Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); in LowerBUILD_VECTOR()
9137 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); in LowerBUILD_VECTOR()
9140 Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); in LowerBUILD_VECTOR()
9143 Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); in LowerBUILD_VECTOR()
9156 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); in LowerBUILD_VECTOR()
9159 assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); in LowerBUILD_VECTOR()
9165 // For SSE 4.1, use insertps to put the high elements into the low element. in LowerBUILD_VECTOR()
9169 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); in LowerBUILD_VECTOR()
9171 Result = DAG.getUNDEF(VT); in LowerBUILD_VECTOR()
9175 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, in LowerBUILD_VECTOR()
9182 // our (non-undef) elements to the full vector width with the element in the in LowerBUILD_VECTOR()
9187 Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); in LowerBUILD_VECTOR()
9189 Ops[i] = DAG.getUNDEF(VT); in LowerBUILD_VECTOR()
9203 Mask.append(NumElems - Mask.size(), SM_SentinelUndef); in LowerBUILD_VECTOR()
9206 Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); in LowerBUILD_VECTOR()
9211 // 256-bit AVX can use the vinsertf128 instruction
9212 // to create 256-bit vectors from two other 128-bit ones.
9220 ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); in LowerAVXCONCAT_VECTORS()
9247 // If we have more than 2 non-zeros, build each half separately. in LowerAVXCONCAT_VECTORS()
9250 ArrayRef<SDUse> Ops = Op->ops(); in LowerAVXCONCAT_VECTORS()
9279 // k-register.
9306 // If we are inserting non-zero vector and there are zeros in LSBs and undef in LowerCONCAT_VECTORSvXi1()
9310 Log2_64(NonZeros) != NumOperands - 1) { in LowerCONCAT_VECTORSvXi1()
9322 // If there are zero or one non-zeros we can handle this very simply. in LowerCONCAT_VECTORSvXi1()
9336 ArrayRef<SDUse> Ops = Op->ops(); in LowerCONCAT_VECTORSvXi1()
9359 MVT VT = Op.getSimpleValueType(); in LowerCONCAT_VECTORS() local
9360 if (VT.getVectorElementType() == MVT::i1) in LowerCONCAT_VECTORS()
9363 assert((VT.is256BitVector() && Op.getNumOperands() == 2) || in LowerCONCAT_VECTORS()
9364 (VT.is512BitVector() && (Op.getNumOperands() == 2 || in LowerCONCAT_VECTORS()
9367 // AVX can use the vinsertf128 instruction to create 256-bit vectors in LowerCONCAT_VECTORS()
9368 // from two other 128-bit ones. in LowerCONCAT_VECTORS()
9370 // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors in LowerCONCAT_VECTORS()
9374 //===----------------------------------------------------------------------===//
9383 //===----------------------------------------------------------------------===//
9385 /// Tiny helper function to identify a no-op mask.
9388 /// array input, which is assumed to be a single-input shuffle mask of the kind
9391 /// in-place shuffle are 'no-op's.
9394 assert(Mask[i] >= -1 && "Out of bound mask element!"); in isNoopShuffleMask()
9404 /// X86 divides up its shuffles into in-lane and cross-lane shuffle operations
9420 /// Test whether there are elements crossing 128-bit lanes in this
9422 static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { in is128BitLaneCrossingShuffleMask() argument
9423 return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask); in is128BitLaneCrossingShuffleMask()
9427 /// from multiple lanes - this is different to isLaneCrossingShuffleMask to
9440 int SrcLane = -1; in isMultiLaneShuffleMask()
9455 /// Test whether a shuffle mask is equivalent within each sub-lane.
9458 /// lane-relative shuffle in each sub-lane. This trivially implies
9459 /// that it is also not lane-crossing. It may however involve a blend from the
9463 /// non-trivial to compute in the face of undef lanes. The representation is
9464 /// suitable for use with existing 128-bit shuffles as entries from the second
9466 static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, in isRepeatedShuffleMask() argument
9469 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); in isRepeatedShuffleMask()
9470 RepeatedMask.assign(LaneSize, -1); in isRepeatedShuffleMask()
9480 // Ok, handle the in-lane shuffles by detecting if and when they repeat. in isRepeatedShuffleMask()
9485 // This is the first non-undef entry in this slot of a 128-bit lane. in isRepeatedShuffleMask()
9494 /// Test whether a shuffle mask is equivalent within each 128-bit lane.
9496 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, in is128BitLaneRepeatedShuffleMask() argument
9498 return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); in is128BitLaneRepeatedShuffleMask()
9502 is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) { in is128BitLaneRepeatedShuffleMask() argument
9504 return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); in is128BitLaneRepeatedShuffleMask()
9507 /// Test whether a shuffle mask is equivalent within each 256-bit lane.
9509 is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, in is256BitLaneRepeatedShuffleMask() argument
9511 return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); in is256BitLaneRepeatedShuffleMask()
9514 /// Test whether a target shuffle mask is equivalent within each sub-lane.
9537 // Handle the in-lane shuffles by detecting if and when they repeat. Adjust in isRepeatedTargetShuffleMask()
9542 // This is the first non-undef entry in this slot of a 128-bit lane. in isRepeatedTargetShuffleMask()
9551 /// Test whether a target shuffle mask is equivalent within each sub-lane.
9553 static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, in isRepeatedTargetShuffleMask() argument
9556 return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(), in isRepeatedTargetShuffleMask()
9593 MVT VT = Op.getSimpleValueType(); in IsElementEquivalent() local
9594 int NumElts = VT.getVectorNumElements(); in IsElementEquivalent()
9596 int NumLanes = VT.getSizeInBits() / 128; in IsElementEquivalent()
9620 /// each element of the mask is either -1 (signifying undef) or the value given
9630 assert(Mask[i] >= -1 && "Out of bound mask element!"); in isShuffleEquivalent()
9636 MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); in isShuffleEquivalent()
9637 ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); in isShuffleEquivalent()
9649 /// If an element in Mask matches SM_SentinelUndef (-1) then the corresponding
9654 static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, in isTargetShuffleEquivalent() argument
9666 // Check for out-of-range target shuffle mask indices. in isTargetShuffleEquivalent()
9671 if (V1 && (V1.getValueSizeInBits() != VT.getSizeInBits() || in isTargetShuffleEquivalent()
9674 if (V2 && (V2.getValueSizeInBits() != VT.getSizeInBits() || in isTargetShuffleEquivalent()
9693 int BitIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); in isTargetShuffleEquivalent()
9702 MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); in isTargetShuffleEquivalent()
9703 ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); in isTargetShuffleEquivalent()
9715 static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT, in isUnpackWdShuffleMask() argument
9717 if (VT != MVT::v8i32 && VT != MVT::v8f32) in isUnpackWdShuffleMask()
9726 bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd, DAG) || in isUnpackWdShuffleMask()
9727 isTargetShuffleEquivalent(VT, Mask, Unpckhwd, DAG)); in isUnpackWdShuffleMask()
9733 // Create 128-bit vector type based on mask size. in is128BitUnpackShuffleMask()
9735 MVT VT = MVT::getVectorVT(EltVT, Mask.size()); in is128BitUnpackShuffleMask() local
9741 // Match any of unary/binary or low/high. in is128BitUnpackShuffleMask()
9744 createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); in is128BitUnpackShuffleMask()
9745 if (isTargetShuffleEquivalent(VT, Mask, UnpackMask, DAG) || in is128BitUnpackShuffleMask()
9746 isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask, DAG)) in is128BitUnpackShuffleMask()
9766 /// Get a 4-lane 8-bit shuffle immediate for a mask.
9768 /// This helper function produces an 8-bit shuffle immediate corresponding to
9775 assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); in getV4X86ShuffleImm()
9776 assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); in getV4X86ShuffleImm()
9777 assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); in getV4X86ShuffleImm()
9778 assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); in getV4X86ShuffleImm()
9779 assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); in getV4X86ShuffleImm()
9781 // If the mask only uses one non-undef element, then fully 'splat' it to in getV4X86ShuffleImm()
9783 int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin(); in getV4X86ShuffleImm()
9808 // The function looks for a sub-mask that the nonzero elements are in
9809 // increasing order. If such sub-mask exist. The function returns true.
9813 int NextElement = -1; in isNonZeroElementsInOrder()
9817 assert(Mask[i] >= -1 && "Out of bound mask element!"); in isNonZeroElementsInOrder()
9836 static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, in lowerShuffleWithPSHUFB() argument
9842 int LaneSize = 128 / VT.getScalarSizeInBits(); in lowerShuffleWithPSHUFB()
9843 const int NumBytes = VT.getSizeInBits() / 8; in lowerShuffleWithPSHUFB()
9844 const int NumEltBytes = VT.getScalarSizeInBits() / 8; in lowerShuffleWithPSHUFB()
9846 assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || in lowerShuffleWithPSHUFB()
9847 (Subtarget.hasAVX2() && VT.is256BitVector()) || in lowerShuffleWithPSHUFB()
9848 (Subtarget.hasBWI() && VT.is512BitVector())); in lowerShuffleWithPSHUFB()
9885 VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V), in lowerShuffleWithPSHUFB()
9894 static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, in lowerShuffleToEXPAND() argument
9905 MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); in lowerShuffleToEXPAND()
9907 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleToEXPAND()
9912 SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); in lowerShuffleToEXPAND()
9914 return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); in lowerShuffleToEXPAND()
9917 static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, in matchShuffleWithUNPCK() argument
9922 int NumElts = VT.getVectorNumElements(); in matchShuffleWithUNPCK()
9938 createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary); in matchShuffleWithUNPCK()
9939 if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG, V1, in matchShuffleWithUNPCK()
9942 V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); in matchShuffleWithUNPCK()
9943 V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); in matchShuffleWithUNPCK()
9947 createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary); in matchShuffleWithUNPCK()
9948 if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG, V1, in matchShuffleWithUNPCK()
9951 V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); in matchShuffleWithUNPCK()
9952 V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); in matchShuffleWithUNPCK()
9959 if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && in matchShuffleWithUNPCK()
9978 V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; in matchShuffleWithUNPCK()
9979 V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; in matchShuffleWithUNPCK()
9987 if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, DAG)) { in matchShuffleWithUNPCK()
9994 if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, DAG)) { in matchShuffleWithUNPCK()
10006 static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, in lowerShuffleWithUNPCK() argument
10010 createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false); in lowerShuffleWithUNPCK()
10012 return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); in lowerShuffleWithUNPCK()
10015 createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false); in lowerShuffleWithUNPCK()
10017 return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); in lowerShuffleWithUNPCK()
10022 return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); in lowerShuffleWithUNPCK()
10026 return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); in lowerShuffleWithUNPCK()
10031 /// Check if the mask can be mapped to a preliminary shuffle (vperm 64-bit)
10032 /// followed by unpack 256-bit.
10033 static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, in lowerShuffleWithUNPCK256() argument
10037 createSplat2ShuffleMask(VT, Unpckl, /* Lo */ true); in lowerShuffleWithUNPCK256()
10038 createSplat2ShuffleMask(VT, Unpckh, /* Lo */ false); in lowerShuffleWithUNPCK256()
10048 // This is a "natural" unpack operation (rather than the 128-bit sectored in lowerShuffleWithUNPCK256()
10049 // operation implemented by AVX). We need to rearrange 64-bit chunks of the in lowerShuffleWithUNPCK256()
10053 V1 = DAG.getBitcast(VT, V1); in lowerShuffleWithUNPCK256()
10054 return DAG.getNode(UnpackOpcode, DL, VT, V1, V1); in lowerShuffleWithUNPCK256()
10059 static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, in matchShuffleAsVTRUNC() argument
10062 if (!VT.is512BitVector() && !Subtarget.hasVLX()) in matchShuffleAsVTRUNC()
10066 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in matchShuffleAsVTRUNC()
10076 unsigned UpperElts = NumElts - NumSrcElts; in matchShuffleAsVTRUNC()
10126 // Non-VLX targets must truncate from a 512-bit type, so we need to in getAVX512TruncNode()
10158 static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleWithVPMOV() argument
10163 assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type"); in lowerShuffleWithVPMOV()
10167 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleWithVPMOV()
10168 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in lowerShuffleWithVPMOV()
10173 unsigned UpperElts = NumElts - NumSrcElts; in lowerShuffleWithVPMOV()
10201 return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); in lowerShuffleWithVPMOV()
10208 static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsVTRUNC() argument
10213 assert((VT.is128BitVector() || VT.is256BitVector()) && in lowerShuffleAsVTRUNC()
10218 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleAsVTRUNC()
10219 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in lowerShuffleAsVTRUNC()
10222 // TODO: Support non-BWI VPMOVWB truncations? in lowerShuffleAsVTRUNC()
10237 unsigned UpperElts = NumElts - NumSrcElts; in lowerShuffleAsVTRUNC()
10264 // and truncate from the double-sized src. in lowerShuffleAsVTRUNC()
10265 MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2); in lowerShuffleAsVTRUNC()
10279 return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); in lowerShuffleAsVTRUNC()
10307 /// FIXME: Evaluate whether either AVX or AVX-512 have any opportunities here
10318 "We should only be called with masks with a power-of-2 size!"); in canLowerByDroppingElements()
10320 uint64_t ModMask = (uint64_t)ShuffleModulus - 1; in canLowerByDroppingElements()
10323 // We track whether the input is viable for all power-of-2 strides 2^1, 2^2, in canLowerByDroppingElements()
10340 if ((uint64_t)(Mask[i] - Offset) == (((uint64_t)i << N) & ModMask)) in canLowerByDroppingElements()
10362 static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, in matchShuffleWithPACK() argument
10367 unsigned NumElts = VT.getVectorNumElements(); in matchShuffleWithPACK()
10368 unsigned BitSize = VT.getScalarSizeInBits(); in matchShuffleWithPACK()
10374 unsigned NumPackedBits = NumSrcBits - BitSize; in matchShuffleWithPACK()
10417 createPackShuffleMask(VT, BinaryMask, false, NumStages); in matchShuffleWithPACK()
10418 if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, DAG, V1, V2)) in matchShuffleWithPACK()
10424 createPackShuffleMask(VT, UnaryMask, true, NumStages); in matchShuffleWithPACK()
10425 if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, DAG, V1)) in matchShuffleWithPACK()
10433 static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, in lowerShuffleWithPACK() argument
10438 unsigned SizeBits = VT.getSizeInBits(); in lowerShuffleWithPACK()
10439 unsigned EltBits = VT.getScalarSizeInBits(); in lowerShuffleWithPACK()
10441 if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, in lowerShuffleWithPACK()
10448 // Don't lower multi-stage packs on AVX512, truncation is better. in lowerShuffleWithPACK()
10453 // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB. in lowerShuffleWithPACK()
10473 assert(Res && Res.getValueType() == VT && in lowerShuffleWithPACK()
10482 static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsBitMask() argument
10487 MVT MaskVT = VT; in lowerShuffleAsBitMask()
10488 MVT EltVT = VT.getVectorElementType(); in lowerShuffleAsBitMask()
10496 MVT LogicVT = VT; in lowerShuffleAsBitMask()
10524 return SDValue(); // No non-zeroable elements! in lowerShuffleAsBitMask()
10530 return DAG.getBitcast(VT, And); in lowerShuffleAsBitMask()
10538 static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsBitBlend() argument
10541 assert(VT.isInteger() && "Only supports integer vector types!"); in lowerShuffleAsBitBlend()
10542 MVT EltVT = VT.getVectorElementType(); in lowerShuffleAsBitBlend()
10552 SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps); in lowerShuffleAsBitBlend()
10553 return getBitSelect(DL, VT, V1, V2, V1Mask, DAG); in lowerShuffleAsBitBlend()
10561 static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2, in matchShuffleAsBlend() argument
10575 int NumLanes = VT.getSizeInBits() / 128; in matchShuffleAsBlend()
10579 // For 32/64-bit elements, if we only reference one input (plus any undefs), in matchShuffleAsBlend()
10582 VT.is256BitVector() && VT.getScalarSizeInBits() >= 32; in matchShuffleAsBlend()
10604 IsElementEquivalent(NumElts, V2, V2, M - NumElts, Elt))) { in matchShuffleAsBlend()
10632 LaneBlendMask = (1ull << NumEltsPerLane) - 1; in matchShuffleAsBlend()
10645 static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsBlend() argument
10653 if (!matchShuffleAsBlend(VT, V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, in lowerShuffleAsBlend()
10657 // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. in lowerShuffleAsBlend()
10659 V1 = getZeroVector(VT, Subtarget, DAG, DL); in lowerShuffleAsBlend()
10661 V2 = getZeroVector(VT, Subtarget, DAG, DL); in lowerShuffleAsBlend()
10663 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleAsBlend()
10665 switch (VT.SimpleTy) { in lowerShuffleAsBlend()
10668 assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); in lowerShuffleAsBlend()
10672 assert(Subtarget.hasAVX() && "256-bit float blends require AVX!"); in lowerShuffleAsBlend()
10679 assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); in lowerShuffleAsBlend()
10680 return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, in lowerShuffleAsBlend()
10686 // We can lower these with PBLENDW which is mirrored across 128-bit lanes. in lowerShuffleAsBlend()
10696 // TODO - we should allow 2 PBLENDW here and leave shuffle combine to in lowerShuffleAsBlend()
10712 assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!"); in lowerShuffleAsBlend()
10715 assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!"); in lowerShuffleAsBlend()
10718 if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, in lowerShuffleAsBlend()
10731 lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) in lowerShuffleAsBlend()
10735 int Scale = VT.getScalarSizeInBits() / 8; in lowerShuffleAsBlend()
10739 MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); in lowerShuffleAsBlend()
10743 // If V2 can be load-folded and V1 cannot be load-folded, then commute to in lowerShuffleAsBlend()
10744 // allow that load-folding possibility. in lowerShuffleAsBlend()
10753 // are -1 for true and 0 for false. We then use the LLVM semantics of 'true' in lowerShuffleAsBlend()
10755 // reality in x86 is that vector masks (pre-AVX-512) use only the high bit in lowerShuffleAsBlend()
10766 : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, in lowerShuffleAsBlend()
10772 VT, in lowerShuffleAsBlend()
10785 if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, in lowerShuffleAsBlend()
10790 // Otherwise load an immediate into a GPR, cast to k-register, and use a in lowerShuffleAsBlend()
10802 /// a single-input permutation.
10805 /// then reduce the shuffle to a single-input permutation.
10806 static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, in lowerShuffleAsBlendAndPermute() argument
10813 SmallVector<int, 32> BlendMask(Mask.size(), -1); in lowerShuffleAsBlendAndPermute()
10814 SmallVector<int, 32> PermuteMask(Mask.size(), -1); in lowerShuffleAsBlendAndPermute()
10832 unsigned EltSize = VT.getScalarSizeInBits(); in lowerShuffleAsBlendAndPermute()
10836 SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); in lowerShuffleAsBlendAndPermute()
10837 return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); in lowerShuffleAsBlendAndPermute()
10841 /// a single-input permutation.
10844 /// then reduce the shuffle to a single-input (wider) permutation.
10845 static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, in lowerShuffleAsUNPCKAndPermute() argument
10850 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsUNPCKAndPermute()
10855 SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; in lowerShuffleAsUNPCKAndPermute()
10870 NormM -= NumElts; in lowerShuffleAsUNPCKAndPermute()
10895 SmallVector<int, 32> PermuteMask(NumElts, -1); in lowerShuffleAsUNPCKAndPermute()
10902 NormM -= NumElts; in lowerShuffleAsUNPCKAndPermute()
10910 assert(PermuteMask[Elt] != -1 && in lowerShuffleAsUNPCKAndPermute()
10915 SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops); in lowerShuffleAsUNPCKAndPermute()
10916 return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask); in lowerShuffleAsUNPCKAndPermute()
10928 static SDValue lowerShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, in lowerShuffleAsPermuteAndUnpack() argument
10936 // This routine only supports 128-bit integer dual input vectors. in lowerShuffleAsPermuteAndUnpack()
10937 if (VT.isFloatingPoint() || !VT.is128BitVector() || V2.isUndef()) in lowerShuffleAsPermuteAndUnpack()
10948 SmallVector<int, 16> V1Mask((unsigned)Size, -1); in lowerShuffleAsPermuteAndUnpack()
10949 SmallVector<int, 16> V2Mask((unsigned)Size, -1); in lowerShuffleAsPermuteAndUnpack()
10977 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); in lowerShuffleAsPermuteAndUnpack()
10978 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); in lowerShuffleAsPermuteAndUnpack()
10988 VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, in lowerShuffleAsPermuteAndUnpack()
10994 int OrigScalarSize = VT.getScalarSizeInBits(); in lowerShuffleAsPermuteAndUnpack()
11005 // If none of the unpack-rooted lowerings worked (or were profitable) try an in lowerShuffleAsPermuteAndUnpack()
11014 // half-crossings are created. in lowerShuffleAsPermuteAndUnpack()
11017 SmallVector<int, 32> PermMask((unsigned)Size, -1); in lowerShuffleAsPermuteAndUnpack()
11025 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); in lowerShuffleAsPermuteAndUnpack()
11028 VT, DL, in lowerShuffleAsPermuteAndUnpack()
11029 DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, DL, VT, in lowerShuffleAsPermuteAndUnpack()
11031 DAG.getUNDEF(VT), PermMask); in lowerShuffleAsPermuteAndUnpack()
11037 /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then
11040 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsByteRotateAndPermute() argument
11042 if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || in lowerShuffleAsByteRotateAndPermute()
11043 (VT.is256BitVector() && !Subtarget.hasAVX2()) || in lowerShuffleAsByteRotateAndPermute()
11044 (VT.is512BitVector() && !Subtarget.hasBWI())) in lowerShuffleAsByteRotateAndPermute()
11048 if (is128BitLaneCrossingShuffleMask(VT, Mask)) in lowerShuffleAsByteRotateAndPermute()
11051 int Scale = VT.getScalarSizeInBits() / 8; in lowerShuffleAsByteRotateAndPermute()
11052 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsByteRotateAndPermute()
11053 int NumElts = VT.getVectorNumElements(); in lowerShuffleAsByteRotateAndPermute()
11073 M -= NumElts; in lowerShuffleAsByteRotateAndPermute()
11084 // TODO - it might be worth doing this for unary shuffles if the permute in lowerShuffleAsByteRotateAndPermute()
11090 if (VT.getSizeInBits() > 128 && (Blend1 || Blend2)) in lowerShuffleAsByteRotateAndPermute()
11095 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); in lowerShuffleAsByteRotateAndPermute()
11097 VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), in lowerShuffleAsByteRotateAndPermute()
11107 PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane); in lowerShuffleAsByteRotateAndPermute()
11109 PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane); in lowerShuffleAsByteRotateAndPermute()
11112 return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask); in lowerShuffleAsByteRotateAndPermute()
11157 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsDecomposedShuffleMerge() argument
11160 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsDecomposedShuffleMerge()
11166 SmallVector<int, 32> V1Mask(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11167 SmallVector<int, 32> V2Mask(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11168 SmallVector<int, 32> FinalMask(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11176 V2Mask[i] = M - NumElts; in lowerShuffleAsDecomposedShuffleMerge()
11184 // and change \p InputMask to be a no-op (identity) mask. in lowerShuffleAsDecomposedShuffleMerge()
11185 auto canonicalizeBroadcastableInput = [DL, VT, &Subtarget, in lowerShuffleAsDecomposedShuffleMerge()
11196 Input = DAG.getNode(X86ISD::VBROADCAST, DL, VT, Input); in lowerShuffleAsDecomposedShuffleMerge()
11205 // It is possible that the shuffle for one of the inputs is already a no-op. in lowerShuffleAsDecomposedShuffleMerge()
11206 // See if we can simplify non-no-op shuffles into broadcasts, in lowerShuffleAsDecomposedShuffleMerge()
11215 // one of the input shuffles would be a no-op. We prefer to shuffle inputs as in lowerShuffleAsDecomposedShuffleMerge()
11217 // we'll have to do 2x as many shuffles in order to achieve this, a 2-input in lowerShuffleAsDecomposedShuffleMerge()
11218 // pre-shuffle first is a better strategy. in lowerShuffleAsDecomposedShuffleMerge()
11221 if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, in lowerShuffleAsDecomposedShuffleMerge()
11233 lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG)) in lowerShuffleAsDecomposedShuffleMerge()
11236 DL, VT, V1, V2, Mask, Subtarget, DAG)) in lowerShuffleAsDecomposedShuffleMerge()
11238 // Unpack/rotate failed - try again with variable blends. in lowerShuffleAsDecomposedShuffleMerge()
11239 if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, in lowerShuffleAsDecomposedShuffleMerge()
11242 if (VT.getScalarSizeInBits() >= 32) in lowerShuffleAsDecomposedShuffleMerge()
11244 DL, VT, V1, V2, Mask, Subtarget, DAG)) in lowerShuffleAsDecomposedShuffleMerge()
11250 // TODO: It doesn't have to be alternating - but each lane mustn't have more in lowerShuffleAsDecomposedShuffleMerge()
11252 if (IsAlternating && VT.getScalarSizeInBits() < 32) { in lowerShuffleAsDecomposedShuffleMerge()
11253 V1Mask.assign(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11254 V2Mask.assign(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11255 FinalMask.assign(NumElts, -1); in lowerShuffleAsDecomposedShuffleMerge()
11263 V2Mask[i + (j / 2)] = M - NumElts; in lowerShuffleAsDecomposedShuffleMerge()
11269 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); in lowerShuffleAsDecomposedShuffleMerge()
11270 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); in lowerShuffleAsDecomposedShuffleMerge()
11271 return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask); in lowerShuffleAsDecomposedShuffleMerge()
11277 assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); in matchShuffleAsBitRotate()
11278 assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers"); in matchShuffleAsBitRotate()
11286 return -1; in matchShuffleAsBitRotate()
11294 static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsBitRotate() argument
11301 (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512(); in lowerShuffleAsBitRotate()
11306 int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(), in lowerShuffleAsBitRotate()
11311 // For pre-SSSE3 targets, if we are shuffling vXi8 elts then ISD::ROTL, in lowerShuffleAsBitRotate()
11319 unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt; in lowerShuffleAsBitRotate()
11326 return DAG.getBitcast(VT, Rot); in lowerShuffleAsBitRotate()
11332 return DAG.getBitcast(VT, Rot); in lowerShuffleAsBitRotate()
11344 // [-1, 12, 13, 14, -1, -1, 1, -1] in matchShuffleAsElementRotate()
11345 // [-1, -1, -1, -1, -1, -1, 1, 2] in matchShuffleAsElementRotate()
11347 // [-1, 4, 5, 6, -1, -1, 9, -1] in matchShuffleAsElementRotate()
11348 // [-1, 4, 5, 6, -1, -1, -1, -1] in matchShuffleAsElementRotate()
11359 int StartIdx = i - (M % NumElts); in matchShuffleAsElementRotate()
11362 return -1; in matchShuffleAsElementRotate()
11367 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; in matchShuffleAsElementRotate()
11373 return -1; in matchShuffleAsElementRotate()
11379 // to. This reflects whether the high elements are remaining or the low in matchShuffleAsElementRotate()
11390 return -1; in matchShuffleAsElementRotate()
11410 /// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
11420 /// elements, and takes the low elements as the result. Note that while this is
11421 /// specified as a *right shift* because x86 is little-endian, it is a *left
11423 static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, in matchShuffleAsByteRotate() argument
11427 return -1; in matchShuffleAsByteRotate()
11429 // PALIGNR works on 128-bit lanes. in matchShuffleAsByteRotate()
11431 if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) in matchShuffleAsByteRotate()
11432 return -1; in matchShuffleAsByteRotate()
11436 return -1; in matchShuffleAsByteRotate()
11445 static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsByteRotate() argument
11449 assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); in lowerShuffleAsByteRotate()
11452 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask); in lowerShuffleAsByteRotate()
11458 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); in lowerShuffleAsByteRotate()
11464 assert((!VT.is512BitVector() || Subtarget.hasBWI()) && in lowerShuffleAsByteRotate()
11465 "512-bit PALIGNR requires BWI instructions"); in lowerShuffleAsByteRotate()
11467 VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, in lowerShuffleAsByteRotate()
11471 assert(VT.is128BitVector() && in lowerShuffleAsByteRotate()
11472 "Rotate-based lowering only supports 128-bit lowering!"); in lowerShuffleAsByteRotate()
11474 "Can shuffle at most 16 bytes in a 128-bit vector!"); in lowerShuffleAsByteRotate()
11479 int LoByteShift = 16 - ByteRotation; in lowerShuffleAsByteRotate()
11488 return DAG.getBitcast(VT, in lowerShuffleAsByteRotate()
11499 /// elements, and takes the low elements as the result. Note that while this is
11500 /// specified as a *right shift* because x86 is little-endian, it is a *left
11502 static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsVALIGN() argument
11507 assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && in lowerShuffleAsVALIGN()
11508 "Only 32-bit and 64-bit elements are supported!"); in lowerShuffleAsVALIGN()
11510 // 128/256-bit vectors are only supported with VLX. in lowerShuffleAsVALIGN()
11511 assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) in lowerShuffleAsVALIGN()
11512 && "VLX required for 128/256-bit vectors"); in lowerShuffleAsVALIGN()
11517 return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, in lowerShuffleAsVALIGN()
11520 // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ. in lowerShuffleAsVALIGN()
11522 // TODO: We can probably make this more aggressive and use shift-pairs like in lowerShuffleAsVALIGN()
11533 int Low = Mask[ZeroLo] < (int)NumElts ? 0 : NumElts; in lowerShuffleAsVALIGN() local
11534 if (isSequentialOrUndefInRange(Mask, ZeroLo, NumElts - ZeroLo, Low)) in lowerShuffleAsVALIGN()
11535 return DAG.getNode(X86ISD::VALIGN, DL, VT, Src, in lowerShuffleAsVALIGN()
11536 getZeroVector(VT, Subtarget, DAG, DL), in lowerShuffleAsVALIGN()
11537 DAG.getTargetConstant(NumElts - ZeroLo, DL, MVT::i8)); in lowerShuffleAsVALIGN()
11542 int Low = Mask[0] < (int)NumElts ? 0 : NumElts; in lowerShuffleAsVALIGN() local
11543 if (isSequentialOrUndefInRange(Mask, 0, NumElts - ZeroHi, Low + ZeroHi)) in lowerShuffleAsVALIGN()
11544 return DAG.getNode(X86ISD::VALIGN, DL, VT, in lowerShuffleAsVALIGN()
11545 getZeroVector(VT, Subtarget, DAG, DL), Src, in lowerShuffleAsVALIGN()
11553 static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsByteShiftMask() argument
11558 assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); in lowerShuffleAsByteShiftMask()
11559 assert(VT.is128BitVector() && "Only 128-bit vectors supported"); in lowerShuffleAsByteShiftMask()
11569 unsigned Len = NumElts - (ZeroLo + ZeroHi); in lowerShuffleAsByteShiftMask()
11573 unsigned Scale = VT.getScalarSizeInBits() / 8; in lowerShuffleAsByteShiftMask()
11584 // 01234567 --> zzzzzz01 --> 1zzzzzzz in lowerShuffleAsByteShiftMask()
11585 // 01234567 --> 4567zzzz --> zzzzz456 in lowerShuffleAsByteShiftMask()
11586 // 01234567 --> z0123456 --> 3456zzzz --> zz3456zz in lowerShuffleAsByteShiftMask()
11588 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); in lowerShuffleAsByteShiftMask()
11603 unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); in lowerShuffleAsByteShiftMask()
11614 return DAG.getBitcast(VT, Res); in lowerShuffleAsByteShiftMask()
11620 /// PSRL(W/D/Q/DQ) SSE2 and AVX2 logical bit-shift instructions. The function
11623 /// bit-wise element shifts and the byte shift across an entire 128-bit double
11626 /// PSHL : (little-endian) left bit shift.
11628 /// [ -1, 4, zz, -1 ]
11629 /// PSRL : (little-endian) right bit shift.
11631 /// [ -1, -1, 7, zz]
11632 /// PSLLDQ : (little-endian) left byte shift
11634 /// [ zz, zz, -1, -1, 2, 3, 4, -1]
11635 /// [ zz, zz, zz, zz, zz, zz, -1, 1]
11636 /// PSRLDQ : (little-endian) right byte shift
11638 /// [ -1, 5, 6, 7, zz, zz, zz, zz]
11639 /// [ 1, 2, -1, -1, -1, -1, zz, zz]
11650 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) in matchShuffleAsShift()
11659 unsigned Low = Left ? i : i + Shift; in matchShuffleAsShift() local
11660 unsigned Len = Scale - Shift; in matchShuffleAsShift()
11661 if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) in matchShuffleAsShift()
11662 return -1; in matchShuffleAsShift()
11682 // SSE/AVX supports logical shifts up to 64-bit integers - so we can just in matchShuffleAsShift()
11699 return -1; in matchShuffleAsShift()
11702 static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsShift() argument
11708 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); in lowerShuffleAsShift()
11715 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), in lowerShuffleAsShift()
11720 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), in lowerShuffleAsShift()
11736 return DAG.getBitcast(VT, V); in lowerShuffleAsShift()
11741 static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, in matchShuffleAsEXTRQ() argument
11746 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); in matchShuffleAsEXTRQ()
11756 for (; Len > 0; --Len) in matchShuffleAsEXTRQ()
11757 if (!Zeroable[Len - 1]) in matchShuffleAsEXTRQ()
11763 int Idx = -1; in matchShuffleAsEXTRQ()
11776 if (Idx < 0 || (Src == V && Idx == (M - i))) { in matchShuffleAsEXTRQ()
11778 Idx = M - i; in matchShuffleAsEXTRQ()
11788 BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; in matchShuffleAsEXTRQ()
11789 BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; in matchShuffleAsEXTRQ()
11796 // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
11797 static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, in matchShuffleAsINSERTQ() argument
11802 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); in matchShuffleAsINSERTQ()
11826 int Len = Hi - Idx; in matchShuffleAsINSERTQ()
11838 if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { in matchShuffleAsINSERTQ()
11841 isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { in matchShuffleAsINSERTQ()
11844 isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, in matchShuffleAsINSERTQ()
11851 BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; in matchShuffleAsINSERTQ()
11852 BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; in matchShuffleAsINSERTQ()
11863 static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleWithSSE4A() argument
11867 if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) in lowerShuffleWithSSE4A()
11868 return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, in lowerShuffleWithSSE4A()
11872 if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) in lowerShuffleWithSSE4A()
11873 return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), in lowerShuffleWithSSE4A()
11874 V2 ? V2 : DAG.getUNDEF(VT), in lowerShuffleWithSSE4A()
11891 const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, in lowerShuffleAsSpecificZeroOrAnyExtend() argument
11894 int EltBits = VT.getScalarSizeInBits(); in lowerShuffleAsSpecificZeroOrAnyExtend()
11895 int NumElements = VT.getVectorNumElements(); in lowerShuffleAsSpecificZeroOrAnyExtend()
11915 SmallVector<int, 8> ShMask((unsigned)NumElements, -1); in lowerShuffleAsSpecificZeroOrAnyExtend()
11918 ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; in lowerShuffleAsSpecificZeroOrAnyExtend()
11920 return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); in lowerShuffleAsSpecificZeroOrAnyExtend()
11926 // Not worth offsetting 128-bit vectors if scale == 2, a pattern using in lowerShuffleAsSpecificZeroOrAnyExtend()
11928 if (Offset && Scale == 2 && VT.is128BitVector()) in lowerShuffleAsSpecificZeroOrAnyExtend()
11932 InputV = DAG.getBitcast(VT, InputV); in lowerShuffleAsSpecificZeroOrAnyExtend()
11936 return DAG.getBitcast(VT, InputV); in lowerShuffleAsSpecificZeroOrAnyExtend()
11939 assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); in lowerShuffleAsSpecificZeroOrAnyExtend()
11940 InputV = DAG.getBitcast(VT, InputV); in lowerShuffleAsSpecificZeroOrAnyExtend()
11945 int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, in lowerShuffleAsSpecificZeroOrAnyExtend()
11946 -1}; in lowerShuffleAsSpecificZeroOrAnyExtend()
11948 VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, in lowerShuffleAsSpecificZeroOrAnyExtend()
11953 int PSHUFDMask[4] = {Offset / 2, -1, in lowerShuffleAsSpecificZeroOrAnyExtend()
11954 SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; in lowerShuffleAsSpecificZeroOrAnyExtend()
11958 int PSHUFWMask[4] = {1, -1, -1, -1}; in lowerShuffleAsSpecificZeroOrAnyExtend()
11961 VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, in lowerShuffleAsSpecificZeroOrAnyExtend()
11967 // to 64-bits. in lowerShuffleAsSpecificZeroOrAnyExtend()
11970 assert(VT.is128BitVector() && "Unexpected vector width!"); in lowerShuffleAsSpecificZeroOrAnyExtend()
11974 MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, in lowerShuffleAsSpecificZeroOrAnyExtend()
11979 return DAG.getBitcast(VT, Lo); in lowerShuffleAsSpecificZeroOrAnyExtend()
11983 MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, in lowerShuffleAsSpecificZeroOrAnyExtend()
11986 return DAG.getBitcast(VT, in lowerShuffleAsSpecificZeroOrAnyExtend()
12007 VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, in lowerShuffleAsSpecificZeroOrAnyExtend()
12015 SmallVector<int, 8> ShMask((unsigned)NumElements, -1); in lowerShuffleAsSpecificZeroOrAnyExtend()
12017 ShMask[i - AlignToUnpack] = i; in lowerShuffleAsSpecificZeroOrAnyExtend()
12018 InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); in lowerShuffleAsSpecificZeroOrAnyExtend()
12019 Offset -= AlignToUnpack; in lowerShuffleAsSpecificZeroOrAnyExtend()
12027 Offset -= (NumElements / 2); in lowerShuffleAsSpecificZeroOrAnyExtend()
12039 return DAG.getBitcast(VT, InputV); in lowerShuffleAsSpecificZeroOrAnyExtend()
12047 /// match this pattern. It will use all of the micro-architectural details it
12048 /// can to emit an efficient lowering. It handles both blends with all-zero
12049 /// inputs to explicitly zero-extend and undef-lanes (sometimes undef due to
12052 /// The reason we have dedicated lowering for zext-style shuffles is that they
12055 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsZeroOrAnyExtend() argument
12058 int Bits = VT.getSizeInBits(); in lowerShuffleAsZeroOrAnyExtend()
12060 int NumElements = VT.getVectorNumElements(); in lowerShuffleAsZeroOrAnyExtend()
12062 assert(VT.getScalarSizeInBits() <= 32 && in lowerShuffleAsZeroOrAnyExtend()
12063 "Exceeds 32-bit integer zero extension limit"); in lowerShuffleAsZeroOrAnyExtend()
12066 // Define a helper function to check a particular ext-scale and lower to it if in lowerShuffleAsZeroOrAnyExtend()
12068 auto Lower = [&](int Scale) -> SDValue { in lowerShuffleAsZeroOrAnyExtend()
12093 Offset = M - (i / Scale); in lowerShuffleAsZeroOrAnyExtend()
12095 return SDValue(); // Flip-flopping inputs. in lowerShuffleAsZeroOrAnyExtend()
12097 // Offset must start in the lowest 128-bit lane or at the start of an in lowerShuffleAsZeroOrAnyExtend()
12110 return SDValue(); // Non-consecutive strided elements. in lowerShuffleAsZeroOrAnyExtend()
12114 // If we fail to find an input, we have a zero-shuffle which should always in lowerShuffleAsZeroOrAnyExtend()
12125 return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt, in lowerShuffleAsZeroOrAnyExtend()
12129 // The widest scale possible for extending is to a 64-bit integer. in lowerShuffleAsZeroOrAnyExtend()
12143 // General extends failed, but 128-bit vectors may be able to use MOVQ. in lowerShuffleAsZeroOrAnyExtend()
12148 // MOVQ, copying the lower 64-bits and zero-extending to the upper 64-bits. in lowerShuffleAsZeroOrAnyExtend()
12163 return DAG.getBitcast(VT, V); in lowerShuffleAsZeroOrAnyExtend()
12175 MVT VT = V.getSimpleValueType(); in getScalarValueForVectorElement() local
12176 MVT EltVT = VT.getVectorElementType(); in getScalarValueForVectorElement()
12182 if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) in getScalarValueForVectorElement()
12202 return V->hasOneUse() && in isShuffleFoldableLoad()
12207 static bool isSoftF16(T VT, const X86Subtarget &Subtarget) { in isSoftF16() argument
12208 T EltVT = VT.getScalarType(); in isSoftF16()
12217 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsElementInsertion() argument
12220 MVT ExtVT = VT; in lowerShuffleAsElementInsertion()
12221 MVT EltVT = VT.getVectorElementType(); in lowerShuffleAsElementInsertion()
12222 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleAsElementInsertion()
12223 unsigned EltBits = VT.getScalarSizeInBits(); in lowerShuffleAsElementInsertion()
12229 find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - in lowerShuffleAsElementInsertion()
12239 // Bail if a non-zero V1 isn't used in place. in lowerShuffleAsElementInsertion()
12242 V1Mask[V2Index] = -1; in lowerShuffleAsElementInsertion()
12252 SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), in lowerShuffleAsElementInsertion()
12258 // Using zext to expand a narrow element won't work for non-zero in lowerShuffleAsElementInsertion()
12264 // Zero-extend directly to i32. in lowerShuffleAsElementInsertion()
12269 // and OR with the zero-extended scalar. in lowerShuffleAsElementInsertion()
12273 SDValue BitMask = getConstVector(Bits, VT, DAG, DL); in lowerShuffleAsElementInsertion()
12274 V1 = DAG.getNode(ISD::AND, DL, VT, V1, BitMask); in lowerShuffleAsElementInsertion()
12276 V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2)); in lowerShuffleAsElementInsertion()
12277 return DAG.getNode(ISD::OR, DL, VT, V1, V2); in lowerShuffleAsElementInsertion()
12283 // Either not inserting from the low element of the input or the input in lowerShuffleAsElementInsertion()
12290 // this. We can't support integer vectors or non-zero targets cheaply. in lowerShuffleAsElementInsertion()
12291 assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); in lowerShuffleAsElementInsertion()
12292 if (!VT.isFloatingPoint() || V2Index != 0) in lowerShuffleAsElementInsertion()
12294 if (!VT.is128BitVector()) in lowerShuffleAsElementInsertion()
12310 // This lowering only works for the low element with floating point vectors. in lowerShuffleAsElementInsertion()
12311 if (VT.isFloatingPoint() && V2Index != 0) in lowerShuffleAsElementInsertion()
12315 if (ExtVT != VT) in lowerShuffleAsElementInsertion()
12316 V2 = DAG.getBitcast(VT, V2); in lowerShuffleAsElementInsertion()
12323 if (VT.isFloatingPoint() || NumElts <= 4) { in lowerShuffleAsElementInsertion()
12326 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); in lowerShuffleAsElementInsertion()
12332 V2 = DAG.getBitcast(VT, V2); in lowerShuffleAsElementInsertion()
12338 /// Try to lower broadcast of a single - truncated - integer element,
12342 static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, in lowerShuffleAsTruncBroadcast() argument
12349 MVT EltVT = VT.getVectorElementType(); in lowerShuffleAsTruncBroadcast()
12352 assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); in lowerShuffleAsTruncBroadcast()
12353 assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); in lowerShuffleAsTruncBroadcast()
12379 // If we're extracting non-least-significant bits, shift so we can truncate. in lowerShuffleAsTruncBroadcast()
12387 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, in lowerShuffleAsTruncBroadcast()
12396 // This routine only handles 128-bit shufps. in isSingleSHUFPSMask()
12398 assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); in isSingleSHUFPSMask()
12399 assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); in isSingleSHUFPSMask()
12400 assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); in isSingleSHUFPSMask()
12401 assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); in isSingleSHUFPSMask()
12403 // To lower with a single SHUFPS we need to have the low half and high half in isSingleSHUFPSMask()
12413 /// Test whether the specified input (0 or 1) is in-place blended by the
12428 /// If we are extracting two 128-bit halves of a vector and shuffling the
12429 /// result, match that to a 256-bit AVX2 vperm* instruction to avoid a
12430 /// multi-shuffle lowering.
12434 MVT VT = N0.getSimpleValueType(); in lowerShuffleOfExtractsAsVperm() local
12435 assert((VT.is128BitVector() && in lowerShuffleOfExtractsAsVperm()
12436 (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) && in lowerShuffleOfExtractsAsVperm()
12437 "VPERM* family of shuffles requires 32-bit or 64-bit elements"); in lowerShuffleOfExtractsAsVperm()
12452 // if the extract of the low half is N1. in lowerShuffleOfExtractsAsVperm()
12453 unsigned NumElts = VT.getVectorNumElements(); in lowerShuffleOfExtractsAsVperm()
12470 NewMask.append(NumElts, -1); in lowerShuffleOfExtractsAsVperm()
12472 // shuf (extract X, 0), (extract X, 4), M --> extract (shuf X, undef, M'), 0 in lowerShuffleOfExtractsAsVperm()
12475 // This is free: ymm -> xmm. in lowerShuffleOfExtractsAsVperm()
12476 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, in lowerShuffleOfExtractsAsVperm()
12483 /// filtering. While a little annoying to re-dispatch on type here, there isn't
12485 static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsBroadcast() argument
12489 MVT EltVT = VT.getVectorElementType(); in lowerShuffleAsBroadcast()
12490 if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || in lowerShuffleAsBroadcast()
12492 (Subtarget.hasAVX2() && (VT.isInteger() || EltVT == MVT::f16)))) in lowerShuffleAsBroadcast()
12497 unsigned NumEltBits = VT.getScalarSizeInBits(); in lowerShuffleAsBroadcast()
12498 unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) in lowerShuffleAsBroadcast()
12547 BitOffset -= BeginOffset; in lowerShuffleAsBroadcast()
12557 assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset"); in lowerShuffleAsBroadcast()
12568 if (BitCastSrc && VT.isInteger()) in lowerShuffleAsBroadcast()
12570 DL, VT, V, BroadcastIdx, Subtarget, DAG)) in lowerShuffleAsBroadcast()
12583 cast<LoadSDNode>(V)->isSimple()) { in lowerShuffleAsBroadcast()
12584 // We do not check for one-use of the vector load because a broadcast load in lowerShuffleAsBroadcast()
12590 SDValue BaseAddr = Ld->getOperand(1); in lowerShuffleAsBroadcast()
12591 MVT SVT = VT.getScalarType(); in lowerShuffleAsBroadcast()
12593 assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); in lowerShuffleAsBroadcast()
12599 // FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX? in lowerShuffleAsBroadcast()
12601 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in lowerShuffleAsBroadcast()
12602 SDValue Ops[] = {Ld->getChain(), NewAddr}; in lowerShuffleAsBroadcast()
12606 Ld->getMemOperand(), Offset, SVT.getStoreSize())); in lowerShuffleAsBroadcast()
12608 return DAG.getBitcast(VT, V); in lowerShuffleAsBroadcast()
12610 assert(SVT == MVT::f64 && "Unexpected VT!"); in lowerShuffleAsBroadcast()
12611 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, in lowerShuffleAsBroadcast()
12613 Ld->getMemOperand(), Offset, SVT.getStoreSize())); in lowerShuffleAsBroadcast()
12619 // We can only broadcast from the zero-element of a vector register, in lowerShuffleAsBroadcast()
12620 // but it can be advantageous to broadcast from the zero-element of a in lowerShuffleAsBroadcast()
12622 if (!VT.is256BitVector() && !VT.is512BitVector()) in lowerShuffleAsBroadcast()
12625 // VPERMQ/VPERMPD can perform the cross-lane shuffle directly. in lowerShuffleAsBroadcast()
12626 if (VT == MVT::v4f64 || VT == MVT::v4i64) in lowerShuffleAsBroadcast()
12629 // Only broadcast the zero-element of a 128-bit subvector. in lowerShuffleAsBroadcast()
12634 "Unexpected bit-offset"); in lowerShuffleAsBroadcast()
12646 return DAG.getBitcast(VT, V); in lowerShuffleAsBroadcast()
12656 VT.getVectorNumElements()); in lowerShuffleAsBroadcast()
12657 return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); in lowerShuffleAsBroadcast()
12660 // We only support broadcasting from 128-bit vectors to minimize the in lowerShuffleAsBroadcast()
12662 // 128-bits, removing as many bitcasts as possible. in lowerShuffleAsBroadcast()
12666 // Otherwise cast V to a vector with the same element type as VT, but in lowerShuffleAsBroadcast()
12667 // possibly narrower than VT. Then perform the broadcast. in lowerShuffleAsBroadcast()
12669 MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts); in lowerShuffleAsBroadcast()
12670 return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V)); in lowerShuffleAsBroadcast()
12693 int VADstIndex = -1; in matchShuffleAsInsertPS()
12694 int VBDstIndex = -1; in matchShuffleAsInsertPS()
12710 // We can only insert a single non-zeroable element. in matchShuffleAsInsertPS()
12723 // Don't bother if we have no (non-zeroable) element for insertion. in matchShuffleAsInsertPS()
12737 VBSrcIndex = CandidateMask[VBDstIndex] - 4; in matchShuffleAsInsertPS()
12741 // the zero mask and the V2 insertion - so remove V1 dependency. in matchShuffleAsInsertPS()
12783 /// Handle lowering of 2-lane 64-bit floating point shuffles.
12785 /// This is the basis function for the 2-lane 64-bit shuffles as we have full
12821 assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); in lowerV2F64Shuffle()
12822 assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); in lowerV2F64Shuffle()
12837 int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), in lowerV2F64Shuffle()
12838 Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; in lowerV2F64Shuffle()
12844 // blend patterns if a zero-blend above didn't work. in lowerV2F64Shuffle()
12848 // We can either use a special instruction to load over the low double or in lowerV2F64Shuffle()
12849 // to move just the low double. in lowerV2F64Shuffle()
12863 unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); in lowerV2F64Shuffle()
12868 /// Handle lowering of 2-lane 64-bit integer shuffles.
12870 /// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
12892 int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2), in lowerV2I64Shuffle()
12893 Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1), in lowerV2I64Shuffle()
12894 Mask[1] < 0 ? -1 : (Mask[1] * 2), in lowerV2I64Shuffle()
12895 Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)}; in lowerV2I64Shuffle()
12901 assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); in lowerV2I64Shuffle()
12902 assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); in lowerV2I64Shuffle()
12941 // Its more profitable for pre-SSSE3 to use shuffles/unpacks. in lowerV2I64Shuffle()
12974 static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, in lowerShuffleWithSHUFPS() argument
12982 int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin(); in lowerShuffleWithSHUFPS()
12985 // the low bit. in lowerShuffleWithSHUFPS()
12994 NewMask[V2Index] -= 4; in lowerShuffleWithSHUFPS()
12999 int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; in lowerShuffleWithSHUFPS()
13000 V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, in lowerShuffleWithSHUFPS()
13004 // high or low half formed. in lowerShuffleWithSHUFPS()
13016 // Handle the easy case where we have V1 in the low lanes and V2 in the in lowerShuffleWithSHUFPS()
13018 NewMask[2] -= 4; in lowerShuffleWithSHUFPS()
13019 NewMask[3] -= 4; in lowerShuffleWithSHUFPS()
13024 NewMask[0] -= 4; in lowerShuffleWithSHUFPS()
13025 NewMask[1] -= 4; in lowerShuffleWithSHUFPS()
13029 // We have a mixture of V1 and V2 in both low and high lanes. Rather than in lowerShuffleWithSHUFPS()
13037 (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, in lowerShuffleWithSHUFPS()
13038 (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; in lowerShuffleWithSHUFPS()
13039 V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, in lowerShuffleWithSHUFPS()
13055 return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG); in lowerShuffleWithSHUFPS()
13057 return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, in lowerShuffleWithSHUFPS()
13061 /// Lower 4-lane 32-bit floating point shuffles.
13128 // There are special ways we can lower some single-element blends. However, we in lowerV4F32Shuffle()
13129 // have custom ways we can lower more complex single-element blends below that in lowerV4F32Shuffle()
13131 // when the V2 input is targeting element 0 of the mask -- that is the fast in lowerV4F32Shuffle()
13149 // Use low/high mov instructions. These are only valid in SSE1 because in lowerV4F32Shuffle()
13166 /// Lower 4-lane i32 vector shuffles.
13168 /// We try to handle these with integer-domain shuffles where we can, but for
13200 // Try to use broadcast unless the mask only has one non-undef element. in lowerV4I32Shuffle()
13233 // There are special ways we can lower some single-element blends. in lowerV4I32Shuffle()
13256 // Its more profitable for pre-SSSE3 to use shuffles/unpacks. in lowerV4I32Shuffle()
13295 /// Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
13307 /// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
13309 /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
13310 /// vector, form the analogous 128-bit 8-element Mask.
13312 const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask, in lowerV8I16GeneralSingleInputShuffle() argument
13314 assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); in lowerV8I16GeneralSingleInputShuffle()
13315 MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); in lowerV8I16GeneralSingleInputShuffle()
13324 return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13330 HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); in lowerV8I16GeneralSingleInputShuffle()
13331 return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13343 int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin(); in lowerV8I16GeneralSingleInputShuffle()
13344 int NumHToL = LoInputs.size() - NumLToL; in lowerV8I16GeneralSingleInputShuffle()
13345 int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin(); in lowerV8I16GeneralSingleInputShuffle()
13346 int NumHToH = HiInputs.size() - NumLToH; in lowerV8I16GeneralSingleInputShuffle()
13352 // If we are shuffling values from one half - check how many different DWORD in lowerV8I16GeneralSingleInputShuffle()
13357 V = DAG.getNode(ShufWOp, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13362 return DAG.getBitcast(VT, V); in lowerV8I16GeneralSingleInputShuffle()
13366 int PSHUFDMask[4] = { -1, -1, -1, -1 }; in lowerV8I16GeneralSingleInputShuffle()
13398 DWordPairs.resize(2, std::make_pair(-1, -1)); in lowerV8I16GeneralSingleInputShuffle()
13408 // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all in lowerV8I16GeneralSingleInputShuffle()
13413 // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] in lowerV8I16GeneralSingleInputShuffle()
13414 // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5] in lowerV8I16GeneralSingleInputShuffle()
13416 // However in some very rare cases we have a 1-into-3 or 3-into-1 on one half in lowerV8I16GeneralSingleInputShuffle()
13417 // and an existing 2-into-2 on the other half. In this case we may have to in lowerV8I16GeneralSingleInputShuffle()
13418 // pre-shuffle the 2-into-2 half to avoid turning it into a 3-into-1 or in lowerV8I16GeneralSingleInputShuffle()
13419 // 1-into-3 which could cause us to cycle endlessly fixing each side in turn. in lowerV8I16GeneralSingleInputShuffle()
13420 // Fortunately, we don't have to handle anything but a 2-into-2 pattern in lowerV8I16GeneralSingleInputShuffle()
13421 // because any other situation (including a 3-into-1 or 1-into-3 in the other in lowerV8I16GeneralSingleInputShuffle()
13422 // half than the one we target for fixing) will be fixed when we re-enter this in lowerV8I16GeneralSingleInputShuffle()
13426 // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] in lowerV8I16GeneralSingleInputShuffle()
13427 // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -THIS-IS-BAD!!!!-> [5, 7, 1, 0, 4, 7, 5, 3] in lowerV8I16GeneralSingleInputShuffle()
13429 // This now has a 1-into-3 in the high half! Instead, we do two shuffles: in lowerV8I16GeneralSingleInputShuffle()
13431 // Input: [a, b, c, d, e, f, g, h] PSHUFHW[0,2,1,3]-> [a, b, c, d, e, g, f, h] in lowerV8I16GeneralSingleInputShuffle()
13432 // Mask: [3, 7, 1, 0, 2, 7, 3, 5] -----------------> [3, 7, 1, 0, 2, 7, 3, 6] in lowerV8I16GeneralSingleInputShuffle()
13434 // Input: [a, b, c, d, e, g, f, h] -PSHUFD[0,2,1,3]-> [a, b, e, g, c, d, f, h] in lowerV8I16GeneralSingleInputShuffle()
13435 // Mask: [3, 7, 1, 0, 2, 7, 3, 6] -----------------> [5, 7, 1, 0, 4, 7, 5, 6] in lowerV8I16GeneralSingleInputShuffle()
13462 TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); in lowerV8I16GeneralSingleInputShuffle()
13469 // Check for one tricky case: We're fixing a 3<-1 or a 1<-3 shuffle for AToA in lowerV8I16GeneralSingleInputShuffle()
13471 // inputs, we don't try to fix it necessarily -- we'll recurse and see it in in lowerV8I16GeneralSingleInputShuffle()
13472 // the next pass. However, if we have a 2<-2 in the BToB and AToB inputs, it in lowerV8I16GeneralSingleInputShuffle()
13473 // is essential that we don't *create* a 3<-1 as then we might oscillate. in lowerV8I16GeneralSingleInputShuffle()
13477 // to balance this to ensure we don't form a 3-1 shuffle in the other in lowerV8I16GeneralSingleInputShuffle()
13534 VT, in lowerV8I16GeneralSingleInputShuffle()
13545 // Recurse back into this routine to re-compute state now that this isn't in lowerV8I16GeneralSingleInputShuffle()
13547 return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG); in lowerV8I16GeneralSingleInputShuffle()
13554 // At this point there are at most two inputs to the low and high halves from in lowerV8I16GeneralSingleInputShuffle()
13557 // We use at most one low and one high word shuffle to collect these paired in lowerV8I16GeneralSingleInputShuffle()
13559 int PSHUFLMask[4] = {-1, -1, -1, -1}; in lowerV8I16GeneralSingleInputShuffle()
13560 int PSHUFHMask[4] = {-1, -1, -1, -1}; in lowerV8I16GeneralSingleInputShuffle()
13561 int PSHUFDMask[4] = {-1, -1, -1, -1}; in lowerV8I16GeneralSingleInputShuffle()
13564 // original halves. This will then dictate the targets of the cross-half in lowerV8I16GeneralSingleInputShuffle()
13573 SourceHalfMask[InPlaceInputs[0] - HalfOffset] = in lowerV8I16GeneralSingleInputShuffle()
13574 InPlaceInputs[0] - HalfOffset; in lowerV8I16GeneralSingleInputShuffle()
13581 SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; in lowerV8I16GeneralSingleInputShuffle()
13588 SourceHalfMask[InPlaceInputs[0] - HalfOffset] = in lowerV8I16GeneralSingleInputShuffle()
13589 InPlaceInputs[0] - HalfOffset; in lowerV8I16GeneralSingleInputShuffle()
13591 // a dword. We find the adjacent index by toggling the low bit. in lowerV8I16GeneralSingleInputShuffle()
13593 SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; in lowerV8I16GeneralSingleInputShuffle()
13600 // Now gather the cross-half inputs and place them into a free dword of in lowerV8I16GeneralSingleInputShuffle()
13603 // look more like the 3-1 fixing operation. in lowerV8I16GeneralSingleInputShuffle()
13628 if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { in lowerV8I16GeneralSingleInputShuffle()
13629 if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) { in lowerV8I16GeneralSingleInputShuffle()
13630 SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = in lowerV8I16GeneralSingleInputShuffle()
13631 Input - SourceOffset; in lowerV8I16GeneralSingleInputShuffle()
13634 if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) in lowerV8I16GeneralSingleInputShuffle()
13637 M = SourceHalfMask[Input - SourceOffset] + SourceOffset; in lowerV8I16GeneralSingleInputShuffle()
13639 assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == in lowerV8I16GeneralSingleInputShuffle()
13640 Input - SourceOffset && in lowerV8I16GeneralSingleInputShuffle()
13643 // Note that this correctly re-maps both when we do a swap and when in lowerV8I16GeneralSingleInputShuffle()
13646 Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; in lowerV8I16GeneralSingleInputShuffle()
13650 if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0) in lowerV8I16GeneralSingleInputShuffle()
13651 PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; in lowerV8I16GeneralSingleInputShuffle()
13653 assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == in lowerV8I16GeneralSingleInputShuffle()
13658 // And just directly shift any other-half mask elements to be same-half in lowerV8I16GeneralSingleInputShuffle()
13663 M = M - SourceOffset + DestOffset; in lowerV8I16GeneralSingleInputShuffle()
13673 if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { in lowerV8I16GeneralSingleInputShuffle()
13674 int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) + in lowerV8I16GeneralSingleInputShuffle()
13676 SourceHalfMask[InputFixed - SourceOffset] = in lowerV8I16GeneralSingleInputShuffle()
13677 IncomingInputs[0] - SourceOffset; in lowerV8I16GeneralSingleInputShuffle()
13684 isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { in lowerV8I16GeneralSingleInputShuffle()
13685 // We have two non-adjacent or clobbered inputs we need to extract from in lowerV8I16GeneralSingleInputShuffle()
13688 int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, in lowerV8I16GeneralSingleInputShuffle()
13689 IncomingInputs[1] - SourceOffset}; in lowerV8I16GeneralSingleInputShuffle()
13715 // (because there are no off-half inputs to this half) and there is no in lowerV8I16GeneralSingleInputShuffle()
13717 // swap an input with a non-input. in lowerV8I16GeneralSingleInputShuffle()
13769 V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13772 V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13776 VT, in lowerV8I16GeneralSingleInputShuffle()
13783 "Failed to lift all the high half inputs to the low mask!"); in lowerV8I16GeneralSingleInputShuffle()
13785 "Failed to lift all the low half inputs to the high mask!"); in lowerV8I16GeneralSingleInputShuffle()
13787 // Do a half shuffle for the low mask. in lowerV8I16GeneralSingleInputShuffle()
13789 V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13795 M -= 4; in lowerV8I16GeneralSingleInputShuffle()
13797 V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, in lowerV8I16GeneralSingleInputShuffle()
13803 /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the
13806 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsBlendOfPSHUFBs() argument
13808 assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && in lowerShuffleAsBlendOfPSHUFBs()
13811 int NumBytes = VT.getSizeInBits() / 8; in lowerShuffleAsBlendOfPSHUFBs()
13827 int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale; in lowerShuffleAsBlendOfPSHUFBs()
13853 return DAG.getBitcast(VT, V); in lowerShuffleAsBlendOfPSHUFBs()
13856 /// Generic lowering of 8-lane i16 shuffles.
13858 /// This handles both single-input shuffles and combined shuffle/blends with
13863 /// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
13865 /// the two inputs, try to interleave them. Otherwise, blend the low and high
13927 "All single-input shuffles should be canonicalized to be V1-input " in lowerV8I16Shuffle()
13942 // There are special ways we can lower some single-element blends. in lowerV8I16Shuffle()
13992 // Check if this is part of a 256-bit vector truncation. in lowerV8I16Shuffle()
14008 for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) in lowerV8I16Shuffle()
14038 // When compacting odd (upper) elements, use PACKSS pre-SSE41. in lowerV8I16Shuffle()
14065 // We can always bit-blend if we have to so the fallback strategy is to in lowerV8I16Shuffle()
14066 // decompose into single-input permutes and blends/unpacks. in lowerV8I16Shuffle()
14071 /// Lower 8-lane 16-bit floating point shuffles.
14100 // Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
14101 // sub-512-bit shuffles are padded to 512-bits for the shuffle and then
14103 static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, in lowerShuffleWithPERMV() argument
14107 MVT MaskVT = VT.changeTypeToInteger(); in lowerShuffleWithPERMV()
14109 MVT ShuffleVT = VT; in lowerShuffleWithPERMV()
14110 if (!VT.is512BitVector() && !Subtarget.hasVLX()) { in lowerShuffleWithPERMV()
14116 int NumElts = VT.getVectorNumElements(); in lowerShuffleWithPERMV()
14117 unsigned Scale = 512 / VT.getSizeInBits(); in lowerShuffleWithPERMV()
14121 M += (Scale - 1) * NumElts; in lowerShuffleWithPERMV()
14134 if (VT != ShuffleVT) in lowerShuffleWithPERMV()
14135 Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits()); in lowerShuffleWithPERMV()
14144 /// UNPCK to spread the i8 elements across two i16-element vectors, and uses
14145 /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
14193 // For single-input shuffles, there are some nicer lowering tricks we can use. in lowerV16I8Shuffle()
14209 // Notably, this handles splat and partial-splat shuffles more efficiently. in lowerV16I8Shuffle()
14210 // However, it only makes sense if the pre-duplication shuffle simplifies in lowerV16I8Shuffle()
14212 // express the pre-duplication shuffle as an i16 shuffle. in lowerV16I8Shuffle()
14223 auto tryToWidenViaDuplication = [&]() -> SDValue { in lowerV16I8Shuffle()
14240 int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; in lowerV16I8Shuffle()
14249 // there are two adjacent bytes after we move the low one. in lowerV16I8Shuffle()
14284 int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; in lowerV16I8Shuffle()
14287 int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); in lowerV16I8Shuffle()
14323 // blends but after all of the single-input lowerings. If the single input in lowerV16I8Shuffle()
14332 // as a PACKUS(AND(),AND()) - which is quicker than UNPACK(PSHUFB(),PSHUFB()). in lowerV16I8Shuffle()
14345 // do so. This avoids using them to handle blends-with-zero which is in lowerV16I8Shuffle()
14358 // FIXME: It might be worth trying to detect if the unpack-feeding in lowerV16I8Shuffle()
14365 // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8). in lowerV16I8Shuffle()
14376 // Use PALIGNR+Permute if possible - permute might become PSHUFB but the in lowerV16I8Shuffle()
14386 // There are special ways we can lower some single-element blends. in lowerV16I8Shuffle()
14411 for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1)) in lowerV16I8Shuffle()
14443 // Handle multi-input cases by blending/unpacking single-input shuffles. in lowerV16I8Shuffle()
14448 // The fallback path for single-input shuffles widens this into two v8i16 in lowerV16I8Shuffle()
14453 std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; in lowerV16I8Shuffle()
14454 std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; in lowerV16I8Shuffle()
14481 // Otherwise just unpack the low half of V into VLoHalf and the high half into in lowerV16I8Shuffle()
14497 /// Dispatching routine to lower various 128-bit x86 vector shuffles.
14499 /// This routine breaks down the specific type of 128-bit shuffle and
14502 MVT VT, SDValue V1, SDValue V2, in lower128BitShuffle() argument
14506 if (VT == MVT::v8bf16) { in lower128BitShuffle()
14509 return DAG.getBitcast(VT, in lower128BitShuffle()
14513 switch (VT.SimpleTy) { in lower128BitShuffle()
14534 /// Generic routine to split vector shuffle into half-sized shuffles.
14539 static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, in splitAndLowerShuffle() argument
14542 assert(VT.getSizeInBits() >= 256 && in splitAndLowerShuffle()
14543 "Only for 256-bit or wider vector shuffles!"); in splitAndLowerShuffle()
14544 assert(V1.getSimpleValueType() == VT && "Bad operand type!"); in splitAndLowerShuffle()
14545 assert(V2.getSimpleValueType() == VT && "Bad operand type!"); in splitAndLowerShuffle()
14550 int NumElements = VT.getVectorNumElements(); in splitAndLowerShuffle()
14552 MVT ScalarVT = VT.getVectorElementType(); in splitAndLowerShuffle()
14555 // Use splitVector/extractSubVector so that split build-vectors just build two in splitAndLowerShuffle()
14568 // Now create two 4-way blends of these half-width vectors. in splitAndLowerShuffle()
14589 auto CheckHalfBlendUsable = [&](const ArrayRef<int> &HalfMask) -> bool { in splitAndLowerShuffle()
14600 SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1); in splitAndLowerShuffle()
14601 SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1); in splitAndLowerShuffle()
14602 SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1); in splitAndLowerShuffle()
14606 V2BlendMask[i] = M - NumElements; in splitAndLowerShuffle()
14619 // a minimal number of high-level vector shuffle nodes. in splitAndLowerShuffle()
14638 BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); in splitAndLowerShuffle()
14657 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); in splitAndLowerShuffle()
14663 /// This is provided as a good fallback for many lowerings of non-single-input
14664 /// shuffles with more than one 128-bit lane. In those cases, we want to select
14665 /// between splitting the shuffle into 128-bit components and stitching those
14666 /// back together vs. extracting the single-input shuffles and blending those
14668 static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleAsSplitOrBlend() argument
14672 assert(!V2.isUndef() && "This routine must not be used to lower single-input " in lowerShuffleAsSplitOrBlend()
14680 int V1BroadcastIdx = -1, V2BroadcastIdx = -1; in lowerShuffleAsSplitOrBlend()
14684 V2BroadcastIdx = M - Size; in lowerShuffleAsSplitOrBlend()
14685 else if (M - Size != V2BroadcastIdx) in lowerShuffleAsSplitOrBlend()
14696 return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, in lowerShuffleAsSplitOrBlend()
14699 // If the inputs all stem from a single 128-bit lane of each input, then we in lowerShuffleAsSplitOrBlend()
14702 int LaneCount = VT.getSizeInBits() / 128; in lowerShuffleAsSplitOrBlend()
14711 return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, in lowerShuffleAsSplitOrBlend()
14715 // requires that the decomposed single-input shuffles don't end up here. in lowerShuffleAsSplitOrBlend()
14716 return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, in lowerShuffleAsSplitOrBlend()
14721 // TODO: Extend to support v8f32 (+ 512-bit shuffles).
14722 static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, in lowerShuffleAsLanePermuteAndSHUFP() argument
14726 assert(VT == MVT::v4f64 && "Only for v4f64 shuffles"); in lowerShuffleAsLanePermuteAndSHUFP()
14728 int LHSMask[4] = {-1, -1, -1, -1}; in lowerShuffleAsLanePermuteAndSHUFP()
14729 int RHSMask[4] = {-1, -1, -1, -1}; in lowerShuffleAsLanePermuteAndSHUFP()
14744 SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask); in lowerShuffleAsLanePermuteAndSHUFP()
14745 SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask); in lowerShuffleAsLanePermuteAndSHUFP()
14746 return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS, in lowerShuffleAsLanePermuteAndSHUFP()
14750 /// Lower a vector shuffle crossing multiple 128-bit lanes as
14751 /// a lane permutation followed by a per-lane permutation.
14753 /// This is mainly for cases where we can have non-repeating permutes
14759 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsLanePermuteAndPermute() argument
14761 int NumElts = VT.getVectorNumElements(); in lowerShuffleAsLanePermuteAndPermute()
14762 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsLanePermuteAndPermute()
14771 auto getSublanePermute = [&](int NumSublanes) -> SDValue { in lowerShuffleAsLanePermuteAndPermute()
14813 // TODO - isShuffleMaskInputInPlace could be extended to something like in lowerShuffleAsLanePermuteAndPermute()
14825 if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) in lowerShuffleAsLanePermuteAndPermute()
14835 SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); in lowerShuffleAsLanePermuteAndPermute()
14836 return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), in lowerShuffleAsLanePermuteAndPermute()
14848 // Then attempt a solution with 64-bit sublanes (vpermq). in lowerShuffleAsLanePermuteAndPermute()
14852 // If that doesn't work and we have fast variable cross-lane shuffle, in lowerShuffleAsLanePermuteAndPermute()
14853 // attempt 32-bit sublanes (vpermd). in lowerShuffleAsLanePermuteAndPermute()
14874 /// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
14878 /// single-input cross lane shuffle which is lower than any other fully general
14879 /// cross-lane shuffle strategy I'm aware of. Special cases for each particular
14882 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsLanePermuteAndShuffle() argument
14884 // FIXME: This should probably be generalized for 512-bit vectors as well. in lowerShuffleAsLanePermuteAndShuffle()
14885 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); in lowerShuffleAsLanePermuteAndShuffle()
14892 if (VT == MVT::v4f64 && in lowerShuffleAsLanePermuteAndShuffle()
14894 return lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG); in lowerShuffleAsLanePermuteAndShuffle()
14896 // If there are only inputs from one 128-bit lane, splitting will in fact be in lowerShuffleAsLanePermuteAndShuffle()
14914 // TODO - we could support shuffling V2 in the Flipped input. in lowerShuffleAsLanePermuteAndShuffle()
14921 assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && in lowerShuffleAsLanePermuteAndShuffle()
14922 "In-lane shuffle mask expected"); in lowerShuffleAsLanePermuteAndShuffle()
14926 if (!AllLanes && !is128BitLaneRepeatedShuffleMask(VT, InLaneMask)) in lowerShuffleAsLanePermuteAndShuffle()
14927 return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, in lowerShuffleAsLanePermuteAndShuffle()
14930 // Flip the lanes, and shuffle the results which should now be in-lane. in lowerShuffleAsLanePermuteAndShuffle()
14931 MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; in lowerShuffleAsLanePermuteAndShuffle()
14935 Flipped = DAG.getBitcast(VT, Flipped); in lowerShuffleAsLanePermuteAndShuffle()
14936 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); in lowerShuffleAsLanePermuteAndShuffle()
14939 /// Handle lowering 2-lane 128-bit shuffles.
14940 static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, in lowerV2X128Shuffle() argument
14951 MVT MemVT = VT.getHalfNumVectorElementsVT(); in lowerV2X128Shuffle()
14955 VT, MemVT, Ld, Ofs, DAG)) in lowerV2X128Shuffle()
14975 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); in lowerV2X128Shuffle()
14978 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, in lowerV2X128Shuffle()
14979 getZeroVector(VT, Subtarget, DAG, DL), LoV, in lowerV2X128Shuffle()
14987 // Blends are faster and handle all the non-lane-crossing cases. in lowerV2X128Shuffle()
14988 if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, in lowerV2X128Shuffle()
14995 // Check for patterns which can be matched with a single insert of a 128-bit in lowerV2X128Shuffle()
15001 // this will likely become vinsertf128 which can't fold a 256-bit memop. in lowerV2X128Shuffle()
15003 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); in lowerV2X128Shuffle()
15007 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, in lowerV2X128Shuffle()
15017 return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, in lowerV2X128Shuffle()
15023 // Otherwise form a 128-bit permutation. After accounting for undefs, in lowerV2X128Shuffle()
15024 // convert the 64-bit shuffle mask selection values into 128-bit in lowerV2X128Shuffle()
15029 // [1:0] - select 128 bits from sources for low half of destination in lowerV2X128Shuffle()
15030 // [2] - ignore in lowerV2X128Shuffle()
15031 // [3] - zero low half of destination in lowerV2X128Shuffle()
15032 // [5:4] - select 128 bits from sources for high half of destination in lowerV2X128Shuffle()
15033 // [6] - ignore in lowerV2X128Shuffle()
15034 // [7] - zero high half of destination in lowerV2X128Shuffle()
15045 V1 = DAG.getUNDEF(VT); in lowerV2X128Shuffle()
15047 V2 = DAG.getUNDEF(VT); in lowerV2X128Shuffle()
15049 return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, in lowerV2X128Shuffle()
15053 /// Lower a vector shuffle by first fixing the 128-bit lanes and then
15061 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsLanePermuteAndRepeatedMask() argument
15065 if (is128BitLaneRepeatedShuffleMask(VT, Mask)) in lowerShuffleAsLanePermuteAndRepeatedMask()
15069 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsLanePermuteAndRepeatedMask()
15070 int NumLaneElts = 128 / VT.getScalarSizeInBits(); in lowerShuffleAsLanePermuteAndRepeatedMask()
15071 SmallVector<int, 16> RepeatMask(NumLaneElts, -1); in lowerShuffleAsLanePermuteAndRepeatedMask()
15072 SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}}); in lowerShuffleAsLanePermuteAndRepeatedMask()
15077 int Srcs[2] = {-1, -1}; in lowerShuffleAsLanePermuteAndRepeatedMask()
15078 SmallVector<int, 16> InLaneMask(NumLaneElts, -1); in lowerShuffleAsLanePermuteAndRepeatedMask()
15177 SmallVector<int, 16> NewMask(NumElts, -1); in lowerShuffleAsLanePermuteAndRepeatedMask()
15181 int M = -1; in lowerShuffleAsLanePermuteAndRepeatedMask()
15187 SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); in lowerShuffleAsLanePermuteAndRepeatedMask()
15192 cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask) in lowerShuffleAsLanePermuteAndRepeatedMask()
15198 int M = -1; in lowerShuffleAsLanePermuteAndRepeatedMask()
15204 SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); in lowerShuffleAsLanePermuteAndRepeatedMask()
15209 cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask) in lowerShuffleAsLanePermuteAndRepeatedMask()
15214 NewMask[i] = -1; in lowerShuffleAsLanePermuteAndRepeatedMask()
15223 return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); in lowerShuffleAsLanePermuteAndRepeatedMask()
15246 HalfIdx1 = -1; in getHalfShuffleMask()
15247 HalfIdx2 = -1; in getHalfShuffleMask()
15291 MVT VT = V1.getSimpleValueType(); in getShuffleHalfVectors() local
15292 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in getShuffleHalfVectors()
15313 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); in getShuffleHalfVectors()
15317 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, in getShuffleHalfVectors()
15321 /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
15324 static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleWithUndefHalf() argument
15328 assert((VT.is256BitVector() || VT.is512BitVector()) && in lowerShuffleWithUndefHalf()
15329 "Expected 256-bit or 512-bit vector"); in lowerShuffleWithUndefHalf()
15340 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in lowerShuffleWithUndefHalf()
15346 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, in lowerShuffleWithUndefHalf()
15356 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, in lowerShuffleWithUndefHalf()
15376 unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); in lowerShuffleWithUndefHalf()
15379 // Always extract lowers when setting lower - these are all free subreg ops. in lowerShuffleWithUndefHalf()
15385 // AVX2 has efficient 32/64-bit element cross-lane shuffles. in lowerShuffleWithUndefHalf()
15400 // AVX512 has efficient cross-lane shuffles for all legal 512-bit types. in lowerShuffleWithUndefHalf()
15401 if (Subtarget.hasAVX512() && VT.is512BitVector()) in lowerShuffleWithUndefHalf()
15413 // UndefLower - uuuuXXXX: an insert to high half is required if we split this. in lowerShuffleWithUndefHalf()
15415 // AVX2 has efficient 64-bit element cross-lane shuffles. in lowerShuffleWithUndefHalf()
15419 // AVX512 has efficient cross-lane shuffles for all legal 512-bit types. in lowerShuffleWithUndefHalf()
15420 if (Subtarget.hasAVX512() && VT.is512BitVector()) in lowerShuffleWithUndefHalf()
15431 /// Handle case where shuffle sources are coming from the same 128-bit lane and
15432 /// every lane can be represented as the same repeating mask - allowing us to
15436 const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, in lowerShuffleAsRepeatedMaskAndLanePermute() argument
15438 int NumElts = VT.getVectorNumElements(); in lowerShuffleAsRepeatedMaskAndLanePermute()
15439 int NumLanes = VT.getSizeInBits() / 128; in lowerShuffleAsRepeatedMaskAndLanePermute()
15446 if (BroadcastSize <= VT.getScalarSizeInBits()) in lowerShuffleAsRepeatedMaskAndLanePermute()
15448 int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits(); in lowerShuffleAsRepeatedMaskAndLanePermute()
15451 // accounting for UNDEFs but only references the lowest 128-bit in lowerShuffleAsRepeatedMaskAndLanePermute()
15469 SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15474 SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask); in lowerShuffleAsRepeatedMaskAndLanePermute()
15477 SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15487 return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), in lowerShuffleAsRepeatedMaskAndLanePermute()
15492 // Bail if the shuffle mask doesn't cross 128-bit lanes. in lowerShuffleAsRepeatedMaskAndLanePermute()
15493 if (!is128BitLaneCrossingShuffleMask(VT, Mask)) in lowerShuffleAsRepeatedMaskAndLanePermute()
15497 if (is128BitLaneRepeatedShuffleMask(VT, Mask)) in lowerShuffleAsRepeatedMaskAndLanePermute()
15507 // can form a repeating shuffle mask (local to each sub-lane). At the same in lowerShuffleAsRepeatedMaskAndLanePermute()
15508 // time, determine the source sub-lane for each destination sub-lane. in lowerShuffleAsRepeatedMaskAndLanePermute()
15509 int TopSrcSubLane = -1; in lowerShuffleAsRepeatedMaskAndLanePermute()
15510 SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15516 // Extract the sub-lane mask, check that it all comes from the same lane in lowerShuffleAsRepeatedMaskAndLanePermute()
15518 int SrcLane = -1; in lowerShuffleAsRepeatedMaskAndLanePermute()
15519 SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15532 // Whole sub-lane is UNDEF. in lowerShuffleAsRepeatedMaskAndLanePermute()
15536 // Attempt to match against the candidate repeated sub-lane masks. in lowerShuffleAsRepeatedMaskAndLanePermute()
15552 // Merge the sub-lane mask into the matching repeated sub-lane mask. in lowerShuffleAsRepeatedMaskAndLanePermute()
15562 // Track the top most source sub-lane - by setting the remaining to in lowerShuffleAsRepeatedMaskAndLanePermute()
15570 // Bail if we failed to find a matching repeated sub-lane mask. in lowerShuffleAsRepeatedMaskAndLanePermute()
15578 SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15591 // Shuffle each source sub-lane to its destination. in lowerShuffleAsRepeatedMaskAndLanePermute()
15592 SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1); in lowerShuffleAsRepeatedMaskAndLanePermute()
15607 DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); in lowerShuffleAsRepeatedMaskAndLanePermute()
15609 return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), in lowerShuffleAsRepeatedMaskAndLanePermute()
15613 // On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes in lowerShuffleAsRepeatedMaskAndLanePermute()
15614 // (with PERMQ/PERMPD). On AVX2/AVX512BW targets, permuting 32-bit sub-lanes, in lowerShuffleAsRepeatedMaskAndLanePermute()
15616 // Otherwise we can only permute whole 128-bit lanes. in lowerShuffleAsRepeatedMaskAndLanePermute()
15618 if (Subtarget.hasAVX2() && VT.is256BitVector()) { in lowerShuffleAsRepeatedMaskAndLanePermute()
15622 (!OnlyLowestElts && V2.isUndef() && VT == MVT::v32i8) ? 4 : 2; in lowerShuffleAsRepeatedMaskAndLanePermute()
15624 if (Subtarget.hasBWI() && VT == MVT::v64i8) in lowerShuffleAsRepeatedMaskAndLanePermute()
15634 static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, in matchShuffleWithSHUFPD() argument
15638 int NumElts = VT.getVectorNumElements(); in matchShuffleWithSHUFPD()
15639 assert(VT.getScalarSizeInBits() == 64 && in matchShuffleWithSHUFPD()
15679 static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, in lowerShuffleWithSHUFPD() argument
15684 assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && in lowerShuffleWithSHUFPD()
15689 if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, in lowerShuffleWithSHUFPD()
15693 // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs. in lowerShuffleWithSHUFPD()
15695 V1 = getZeroVector(VT, Subtarget, DAG, DL); in lowerShuffleWithSHUFPD()
15697 V2 = getZeroVector(VT, Subtarget, DAG, DL); in lowerShuffleWithSHUFPD()
15699 return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, in lowerShuffleWithSHUFPD()
15706 static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, in lowerShuffleAsVTRUNCAndUnpack() argument
15711 assert(VT == MVT::v32i8 && "Unexpected type!"); in lowerShuffleAsVTRUNCAndUnpack()
15718 if (Zeroable.countl_one() < (Mask.size() - 8)) in lowerShuffleAsVTRUNCAndUnpack()
15746 // Pattern-match interleave(256b v1, 256b v2) -> 512b v3 and lower it into unpck
15748 // 256-bit vectors in earlier isel stages. Therefore, this function matches a
15749 // pair of 256-bit shuffles and makes sure the masks are consecutive.
15754 static SDValue lowerShufflePairAsUNPCKAndPermute(const SDLoc &DL, MVT VT, in lowerShufflePairAsUNPCKAndPermute() argument
15758 if (VT != MVT::v8f32 && VT != MVT::v8i32 && VT != MVT::v16i16 && in lowerShufflePairAsUNPCKAndPermute()
15759 VT != MVT::v32i8) in lowerShufflePairAsUNPCKAndPermute()
15774 int NumElts = VT.getVectorNumElements(); in lowerShufflePairAsUNPCKAndPermute()
15784 for (SDNode *User : V1->uses()) in lowerShufflePairAsUNPCKAndPermute()
15785 if (User->getOpcode() == ISD::VECTOR_SHUFFLE && User->getOperand(0) == V1 && in lowerShufflePairAsUNPCKAndPermute()
15786 User->getOperand(1) == V2) in lowerShufflePairAsUNPCKAndPermute()
15791 // Find out which half of the 512-bit shuffles is each smaller shuffle in lowerShufflePairAsUNPCKAndPermute()
15796 if (IsInterleavingPattern(SVN1->getMask(), 0, NumElts) && in lowerShufflePairAsUNPCKAndPermute()
15797 IsInterleavingPattern(SVN2->getMask(), FirstQtr, ThirdQtr)) { in lowerShufflePairAsUNPCKAndPermute()
15800 } else if (IsInterleavingPattern(SVN1->getMask(), FirstQtr, ThirdQtr) && in lowerShufflePairAsUNPCKAndPermute()
15801 IsInterleavingPattern(SVN2->getMask(), 0, NumElts)) { in lowerShufflePairAsUNPCKAndPermute()
15809 SDValue Unpckl = DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); in lowerShufflePairAsUNPCKAndPermute()
15810 SDValue Unpckh = DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); in lowerShufflePairAsUNPCKAndPermute()
15811 SDValue Perm1 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh, in lowerShufflePairAsUNPCKAndPermute()
15813 SDValue Perm2 = DAG.getNode(X86ISD::VPERM2X128, DL, VT, Unpckl, Unpckh, in lowerShufflePairAsUNPCKAndPermute()
15823 /// Handle lowering of 4-lane 64-bit floating point shuffles.
15825 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
15845 // Use low duplicate instructions for masks that match their pattern. in lowerV4F64Shuffle()
15850 // Non-half-crossing single input shuffles can be lowered with an in lowerV4F64Shuffle()
15863 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV4F64Shuffle()
15869 // Try to permute the lanes and then use a per-lane permute. in lowerV4F64Shuffle()
15911 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV4F64Shuffle()
15917 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV4F64Shuffle()
15943 /// Handle lowering of 4-lane 64-bit integer shuffles.
15977 // When the shuffle is mirrored between the 128-bit lanes of the unit, we in lowerV4I64Shuffle()
16031 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV4I64Shuffle()
16042 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV4I64Shuffle()
16056 /// Handle lowering of 8-lane 32-bit floating point shuffles.
16058 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
16090 // If the shuffle mask is repeated in each 128-bit lane, we have many more in lowerV8F32Shuffle()
16116 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV8F32Shuffle()
16123 // two 128-bit lanes use the variable mask to VPERMILPS. in lowerV8F32Shuffle()
16138 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV8F32Shuffle()
16160 // For non-AVX512 if the Mask is of 16bit elements in lane then try to split in lowerV8F32Shuffle()
16178 /// Handle lowering of 8-lane 32-bit integer shuffles.
16207 // For non-AVX512 if the Mask is of 16bit elements in lane then try to split in lowerV8I32Shuffle()
16236 // If the shuffle mask is repeated in each 128-bit lane we can use more in lowerV8I32Shuffle()
16237 // efficient instructions that mirror the shuffles across the two 128-bit in lowerV8I32Shuffle()
16280 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV8I32Shuffle()
16287 // Try to produce a fixed cross-128-bit lane permute followed by unpack in lowerV8I32Shuffle()
16293 // generate a cross-lane VPERMD instruction. in lowerV8I32Shuffle()
16309 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV8I32Shuffle()
16320 /// Handle lowering of 16-lane 16-bit integer shuffles.
16374 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV16I16Shuffle()
16386 // Try to produce a fixed cross-128-bit lane permute followed by unpack in lowerV16I16Shuffle()
16391 // There are no generalized cross-lane shuffle operations available on i16 in lowerV16I16Shuffle()
16404 // As this is a single-input shuffle, the repeated mask should be in lowerV16I16Shuffle()
16416 // AVX512BW can lower to VPERMW (non-VLX will pad to v32i16). in lowerV16I16Shuffle()
16420 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV16I16Shuffle()
16426 // Try to permute the lanes and then use a per-lane permute. in lowerV16I16Shuffle()
16443 /// Handle lowering of 32-lane 8-bit integer shuffles.
16503 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV32I8Shuffle()
16509 // There are no generalized cross-lane shuffle operations available on i8 in lowerV32I8Shuffle()
16512 // Try to produce a fixed cross-128-bit lane permute followed by unpack in lowerV32I8Shuffle()
16529 // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8). in lowerV32I8Shuffle()
16533 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV32I8Shuffle()
16539 // Try to permute the lanes and then use a per-lane permute. in lowerV32I8Shuffle()
16564 /// High-level routine to lower various 256-bit x86 vector shuffles.
16566 /// This routine either breaks down the specific type of a 256-bit x86 vector
16567 /// shuffle or splits it into two 128-bit shuffles and fuses the results back
16569 static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, in lower256BitShuffle() argument
16575 int NumElts = VT.getVectorNumElements(); in lower256BitShuffle()
16580 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) in lower256BitShuffle()
16585 lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) in lower256BitShuffle()
16588 // There is a really nice hard cut-over between AVX1 and AVX2 that means we in lower256BitShuffle()
16590 // querying in the per-vector-type lowering routines. With AVX1 we have in lower256BitShuffle()
16591 // essentially *zero* ability to manipulate a 256-bit vector with integer in lower256BitShuffle()
16594 if (VT.isInteger() && !Subtarget.hasAVX2()) { in lower256BitShuffle()
16595 int ElementBits = VT.getScalarSizeInBits(); in lower256BitShuffle()
16598 // for masking/blending then decompose into 128-bit vectors. in lower256BitShuffle()
16599 if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, in lower256BitShuffle()
16602 if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) in lower256BitShuffle()
16604 return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); in lower256BitShuffle()
16608 VT.getVectorNumElements()); in lower256BitShuffle()
16611 return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); in lower256BitShuffle()
16614 if (VT == MVT::v16f16 || VT == MVT::v16bf16) { in lower256BitShuffle()
16617 return DAG.getBitcast(VT, in lower256BitShuffle()
16621 switch (VT.SimpleTy) { in lower256BitShuffle()
16636 llvm_unreachable("Not a valid 256-bit x86 vector type!"); in lower256BitShuffle()
16640 /// Try to lower a vector shuffle as a 128-bit shuffles.
16641 static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, in lowerV4X128Shuffle() argument
16645 assert(VT.getScalarSizeInBits() == 64 && in lowerV4X128Shuffle()
16650 assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); in lowerV4X128Shuffle()
16652 // TODO - use Zeroable like we do for lowerV2X128VectorShuffle? in lowerV4X128Shuffle()
16662 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); in lowerV4X128Shuffle()
16665 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, in lowerV4X128Shuffle()
16666 getZeroVector(VT, Subtarget, DAG, DL), LoV, in lowerV4X128Shuffle()
16670 // Check for patterns which can be matched with a single insert of a 256-bit in lowerV4X128Shuffle()
16675 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); in lowerV4X128Shuffle()
16679 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, in lowerV4X128Shuffle()
16683 // See if this is an insertion of the lower 128-bits of V2 into V1. in lowerV4X128Shuffle()
16685 int V2Index = -1; in lowerV4X128Shuffle()
16687 assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); in lowerV4X128Shuffle()
16698 // Make sure we only have a single V2 index and its the lowest 128-bits. in lowerV4X128Shuffle()
16707 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); in lowerV4X128Shuffle()
16713 // See if we can widen to a 256-bit lane shuffle, we're going to lose 128-lane in lowerV4X128Shuffle()
16724 SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; in lowerV4X128Shuffle()
16725 int PermMask[4] = {-1, -1, -1, -1}; in lowerV4X128Shuffle()
16728 assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); in lowerV4X128Shuffle()
16742 return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], in lowerV4X128Shuffle()
16746 /// Handle lowering of 8-lane 64-bit floating point shuffles.
16756 // Use low duplicate instructions for masks that match their pattern. in lowerV8F64Shuffle()
16761 // Non-half-crossing single input shuffles can be lowered with an in lowerV8F64Shuffle()
16800 /// Handle lowering of 16-lane 32-bit floating point shuffles.
16809 // If the shuffle mask is repeated in each 128-bit lane, we have many more in lowerV16F32Shuffle()
16845 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV16F32Shuffle()
16852 // 128-bit lanes and don't lane cross, use variable mask VPERMILPS. in lowerV16F32Shuffle()
16867 /// Handle lowering of 8-lane 64-bit integer shuffles.
16884 // When the shuffle is mirrored between the 128-bit lanes of the unit, we in lowerV8I64Shuffle()
16886 // 128-bit lanes. in lowerV8I64Shuffle()
16940 /// Handle lowering of 16-lane 32-bit integer shuffles.
16970 // If the shuffle mask is repeated in each 128-bit lane we can use more in lowerV16I32Shuffle()
16971 // efficient instructions that mirror the shuffles across the four 128-bit in lowerV16I32Shuffle()
17019 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV16I32Shuffle()
17037 /// Handle lowering of 32-lane 16-bit integer shuffles.
17045 assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); in lowerV32I16Shuffle()
17082 // As this is a single-input shuffle, the repeated mask should be in lowerV32I16Shuffle()
17101 /// Handle lowering of 64-lane 8-bit integer shuffles.
17109 assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); in lowerV64I8Shuffle()
17153 // Try to create an in-lane repeating shuffle mask and then shuffle the in lowerV64I8Shuffle()
17168 // Use PALIGNR+Permute if possible - permute might become PSHUFB but the in lowerV64I8Shuffle()
17181 // Try to simplify this by merging 128-bit lanes to enable a lane-based in lowerV64I8Shuffle()
17195 /// High-level routine to lower various 512-bit x86 vector shuffles.
17197 /// This routine either breaks down the specific type of a 512-bit x86 vector
17198 /// shuffle or splits it into two 256-bit shuffles and fuses the results back
17201 MVT VT, SDValue V1, SDValue V2, in lower512BitShuffle() argument
17206 "Cannot lower 512-bit vectors w/ basic ISA!"); in lower512BitShuffle()
17215 DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) in lower512BitShuffle()
17220 lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) in lower512BitShuffle()
17224 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, in lower512BitShuffle()
17228 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) { in lower512BitShuffle()
17231 if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, in lower512BitShuffle()
17234 if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) in lower512BitShuffle()
17237 return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false); in lower512BitShuffle()
17240 if (VT == MVT::v32f16 || VT == MVT::v32bf16) { in lower512BitShuffle()
17242 return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, in lower512BitShuffle()
17247 return DAG.getBitcast(VT, in lower512BitShuffle()
17255 switch (VT.SimpleTy) { in lower512BitShuffle()
17270 llvm_unreachable("Not a valid 512-bit x86 vector type!"); in lower512BitShuffle()
17275 MVT VT, SDValue V1, SDValue V2, in lower1BitShuffleAsKSHIFTR() argument
17282 int ShiftAmt = -1; in lower1BitShuffleAsKSHIFTR()
17291 // The first non-undef element determines our shift amount. in lower1BitShuffleAsKSHIFTR()
17293 ShiftAmt = M - i; in lower1BitShuffleAsKSHIFTR()
17298 // All non-undef elements must shift by the same amount. in lower1BitShuffleAsKSHIFTR()
17299 if (ShiftAmt != M - i) in lower1BitShuffleAsKSHIFTR()
17308 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, in lower1BitShuffleAsKSHIFTR()
17313 // Returns the shift amount if possible or -1 if not. This is a simplified
17321 if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) in match1BitShuffleAsKSHIFT()
17329 unsigned Low = Left ? 0 : Shift; in match1BitShuffleAsKSHIFT() local
17330 unsigned Len = Size - Shift; in match1BitShuffleAsKSHIFT()
17331 return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset); in match1BitShuffleAsKSHIFT()
17341 return -1; in match1BitShuffleAsKSHIFT()
17346 // There is no a dedicated instruction on AVX-512 that shuffles the masks.
17347 // The only way to shuffle bits is to sign-extend the mask vector to SIMD
17350 MVT VT, SDValue V1, SDValue V2, in lower1BitShuffle() argument
17355 "Cannot lower 512-bit vectors w/o basic ISA!"); in lower1BitShuffle()
17362 int Src = -1; in lower1BitShuffle()
17382 if ((int)Zeroable.countl_one() >= (NumElts - SubvecElts)) { in lower1BitShuffle()
17388 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, in lower1BitShuffle()
17389 DAG.getConstant(0, DL, VT), in lower1BitShuffle()
17394 if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, in lower1BitShuffle()
17407 if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { in lower1BitShuffle()
17411 DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); in lower1BitShuffle()
17413 ShiftAmt += WideElts - NumElts; in lower1BitShuffle()
17418 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, in lower1BitShuffle()
17427 if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) { in lower1BitShuffle()
17430 ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get(); in lower1BitShuffle()
17434 DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), in lower1BitShuffle()
17439 switch (VT.SimpleTy) { in lower1BitShuffle()
17449 // Take 512-bit type, more shuffles on KNL. If we have VLX use a 256-bit in lower1BitShuffle()
17454 // Take 512-bit type, unless we are avoiding 512-bit types and have the in lower1BitShuffle()
17455 // 256-bit operation available. in lower1BitShuffle()
17459 // Take 512-bit type, unless we are avoiding 512-bit types and have the in lower1BitShuffle()
17460 // 256-bit operation available. in lower1BitShuffle()
17478 int NumElems = VT.getVectorNumElements(); in lower1BitShuffle()
17481 return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), in lower1BitShuffle()
17484 return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); in lower1BitShuffle()
17513 // number of uses of V2 in the low half of the vector. When that is tied, in canonicalizeShuffleMaskWithCommute()
17559 MVT VT = V.getSimpleValueType().getScalarType(); in canCombineAsMaskOperation() local
17560 if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI()) in canCombineAsMaskOperation()
17564 // are preferable to blendw/blendvb/masked-mov. in canCombineAsMaskOperation()
17565 if ((VT == MVT::i16 || VT == MVT::i8) && in canCombineAsMaskOperation()
17572 switch (V->getOpcode()) { in canCombineAsMaskOperation()
17591 if (!V->hasOneUse()) in canCombineAsMaskOperation()
17609 /// Top-level lowering for x86 vector shuffles.
17619 ArrayRef<int> OrigMask = SVOp->getMask(); in lowerVECTOR_SHUFFLE()
17622 MVT VT = Op.getSimpleValueType(); in lowerVECTOR_SHUFFLE() local
17623 int NumElements = VT.getVectorNumElements(); in lowerVECTOR_SHUFFLE()
17625 bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); in lowerVECTOR_SHUFFLE()
17627 assert((VT.getSizeInBits() != 64 || Is1BitVector) && in lowerVECTOR_SHUFFLE()
17633 return DAG.getUNDEF(VT); in lowerVECTOR_SHUFFLE()
17641 // Check for non-undef masks pointing at an undef vector and make the masks in lowerVECTOR_SHUFFLE()
17649 M = -1; in lowerVECTOR_SHUFFLE()
17650 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); in lowerVECTOR_SHUFFLE()
17657 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && in lowerVECTOR_SHUFFLE()
17660 // We actually see shuffles that are entirely re-arrangements of a set of in lowerVECTOR_SHUFFLE()
17668 return getZeroVector(VT, Subtarget, DAG, DL); in lowerVECTOR_SHUFFLE()
17675 // integers to handle flipping the low and high halves of AVX 256-bit vectors. in lowerVECTOR_SHUFFLE()
17677 if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && in lowerVECTOR_SHUFFLE()
17683 // TODO: Avoid lowering directly from this top-level function: make this in lowerVECTOR_SHUFFLE()
17684 // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. in lowerVECTOR_SHUFFLE()
17685 if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, in lowerVECTOR_SHUFFLE()
17689 MVT NewEltVT = VT.isFloatingPoint() in lowerVECTOR_SHUFFLE()
17690 ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) in lowerVECTOR_SHUFFLE()
17691 : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); in lowerVECTOR_SHUFFLE()
17698 // Modify the new Mask to take all zeros from the all-zero vector. in lowerVECTOR_SHUFFLE()
17699 // Choose indices that are blend-friendly. in lowerVECTOR_SHUFFLE()
17702 "V2's non-undef elements are used?!"); in lowerVECTOR_SHUFFLE()
17708 // Ensure all elements of V2 are zero - isBuildVectorAllZeros permits in lowerVECTOR_SHUFFLE()
17716 VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask)); in lowerVECTOR_SHUFFLE()
17726 Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget)) in lowerVECTOR_SHUFFLE()
17727 return DAG.getBitcast(VT, HOp); in lowerVECTOR_SHUFFLE()
17729 V1 = DAG.getBitcast(VT, Ops[0]); in lowerVECTOR_SHUFFLE()
17730 V2 = DAG.getBitcast(VT, Ops[1]); in lowerVECTOR_SHUFFLE()
17742 if (VT.is128BitVector()) in lowerVECTOR_SHUFFLE()
17743 return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); in lowerVECTOR_SHUFFLE()
17745 if (VT.is256BitVector()) in lowerVECTOR_SHUFFLE()
17746 return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); in lowerVECTOR_SHUFFLE()
17748 if (VT.is512BitVector()) in lowerVECTOR_SHUFFLE()
17749 return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); in lowerVECTOR_SHUFFLE()
17752 return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); in lowerVECTOR_SHUFFLE()
17764 MVT VT = Op.getSimpleValueType(); in lowerVSELECTtoVectorShuffle() local
17766 // Only non-legal VSELECTs reach this lowering, convert those into generic in lowerVSELECTtoVectorShuffle()
17767 // shuffles and re-use the shuffle lowering path for blends. in lowerVSELECTtoVectorShuffle()
17771 return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); in lowerVSELECTtoVectorShuffle()
17783 MVT VT = Op.getSimpleValueType(); in LowerVSELECT() local
17784 if (isSoftF16(VT, Subtarget)) { in LowerVSELECT()
17785 MVT NVT = VT.changeVectorElementTypeToInteger(); in LowerVSELECT()
17786 return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond, in LowerVSELECT()
17798 // Try to lower this to a blend-style vector shuffle. This can handle all in LowerVSELECT()
17804 // with patterns on the mask registers on AVX-512. in LowerVSELECT()
17814 unsigned EltSize = VT.getScalarSizeInBits(); in LowerVSELECT()
17815 unsigned NumElts = VT.getVectorNumElements(); in LowerVSELECT()
17818 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) in LowerVSELECT()
17821 // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition in LowerVSELECT()
17822 // into an i1 condition so that we can use the mask-based 512-bit blend in LowerVSELECT()
17824 if (VT.getSizeInBits() == 512) { in LowerVSELECT()
17831 return DAG.getSelect(dl, VT, Mask, LHS, RHS); in LowerVSELECT()
17843 return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS); in LowerVSELECT()
17851 if (EltSize < 32 && VT.is256BitVector() && !Subtarget.hasAVX2() && in LowerVSELECT()
17863 // VSELECT-matching blend, return Op, and but if we need to expand, return in LowerVSELECT()
17865 switch (VT.SimpleTy) { in LowerVSELECT()
17885 return DAG.getBitcast(VT, Select); in LowerVSELECT()
17891 MVT VT = Op.getSimpleValueType(); in LowerEXTRACT_VECTOR_ELT_SSE4() local
17900 if (VT.getSizeInBits() == 8) { in LowerEXTRACT_VECTOR_ELT_SSE4()
17909 unsigned IdxVal = Idx->getAsZExtVal(); in LowerEXTRACT_VECTOR_ELT_SSE4()
17912 return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); in LowerEXTRACT_VECTOR_ELT_SSE4()
17915 if (VT == MVT::f32) { in LowerEXTRACT_VECTOR_ELT_SSE4()
17923 SDNode *User = *Op.getNode()->use_begin(); in LowerEXTRACT_VECTOR_ELT_SSE4()
17924 if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) && in LowerEXTRACT_VECTOR_ELT_SSE4()
17925 (User->getOpcode() != ISD::BITCAST || in LowerEXTRACT_VECTOR_ELT_SSE4()
17926 User->getValueType(0) != MVT::i32)) in LowerEXTRACT_VECTOR_ELT_SSE4()
17933 if (VT == MVT::i32 || VT == MVT::i64) in LowerEXTRACT_VECTOR_ELT_SSE4()
17940 /// AVX-512 feature.
17957 // Extending v8i1/v16i1 to 512-bit get better performance on KNL in ExtractBitFromMaskVector()
17971 unsigned IdxVal = IdxC->getZExtValue(); in ExtractBitFromMaskVector()
17988 MVT VT = N->getSimpleValueType(0); in getExtractedDemandedElts() local
17989 unsigned NumElts = VT.getVectorNumElements(); in getExtractedDemandedElts()
17991 for (SDNode *User : N->uses()) { in getExtractedDemandedElts()
17992 switch (User->getOpcode()) { in getExtractedDemandedElts()
17996 if (!isa<ConstantSDNode>(User->getOperand(1))) { in getExtractedDemandedElts()
18000 DemandedElts.setBit(User->getConstantOperandVal(1)); in getExtractedDemandedElts()
18003 if (!User->getValueType(0).isSimple() || in getExtractedDemandedElts()
18004 !User->getValueType(0).isVector()) { in getExtractedDemandedElts()
18036 // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer) in LowerEXTRACT_VECTOR_ELT()
18044 // | Uops | 0 - DV | 5 | 6 | 7 | | in LowerEXTRACT_VECTOR_ELT()
18045 // --------------------------------------------- in LowerEXTRACT_VECTOR_ELT()
18056 // |Uops| 1 | 2 - D |3 - D | 4 | 5 | | in LowerEXTRACT_VECTOR_ELT()
18057 // --------------------------------------------------------- in LowerEXTRACT_VECTOR_ELT()
18058 // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0 in LowerEXTRACT_VECTOR_ELT()
18059 // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18] in LowerEXTRACT_VECTOR_ELT()
18066 unsigned IdxVal = IdxC->getZExtValue(); in LowerEXTRACT_VECTOR_ELT()
18068 // If this is a 256-bit vector result, first extract the 128-bit vector and in LowerEXTRACT_VECTOR_ELT()
18069 // then extract the element from the 128-bit vector. in LowerEXTRACT_VECTOR_ELT()
18071 // Get the 128-bit vector. in LowerEXTRACT_VECTOR_ELT()
18080 IdxVal &= ElemsPerChunk - 1; in LowerEXTRACT_VECTOR_ELT()
18087 MVT VT = Op.getSimpleValueType(); in LowerEXTRACT_VECTOR_ELT() local
18089 if (VT == MVT::i16) { in LowerEXTRACT_VECTOR_ELT()
18104 return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); in LowerEXTRACT_VECTOR_ELT()
18111 // Only extract a single element from a v16i8 source - determine the common in LowerEXTRACT_VECTOR_ELT()
18112 // DWORD/WORD that all extractions share, and extract the sub-byte. in LowerEXTRACT_VECTOR_ELT()
18114 if (VT == MVT::i8) { in LowerEXTRACT_VECTOR_ELT()
18118 // Extract either the lowest i32 or any i16, and extract the sub-byte. in LowerEXTRACT_VECTOR_ELT()
18128 return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in LowerEXTRACT_VECTOR_ELT()
18140 return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in LowerEXTRACT_VECTOR_ELT()
18144 if (VT == MVT::f16 || VT.getSizeInBits() == 32) { in LowerEXTRACT_VECTOR_ELT()
18149 SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1); in LowerEXTRACT_VECTOR_ELT()
18152 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, in LowerEXTRACT_VECTOR_ELT()
18156 if (VT.getSizeInBits() == 64) { in LowerEXTRACT_VECTOR_ELT()
18166 int Mask[2] = { 1, -1 }; in LowerEXTRACT_VECTOR_ELT()
18168 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, in LowerEXTRACT_VECTOR_ELT()
18176 /// AVX-512 feature.
18197 // Copy into a k-register, extract to v1i1 and insert_subvector. in InsertBitToMaskVector()
18204 MVT VT = Op.getSimpleValueType(); in LowerINSERT_VECTOR_ELT() local
18205 MVT EltVT = VT.getVectorElementType(); in LowerINSERT_VECTOR_ELT()
18206 unsigned NumElts = VT.getVectorNumElements(); in LowerINSERT_VECTOR_ELT()
18219 MVT IVT = VT.changeVectorElementTypeToInteger(); in LowerINSERT_VECTOR_ELT()
18223 return DAG.getBitcast(VT, Res); in LowerINSERT_VECTOR_ELT()
18229 // possible vector indices, and FP insertion has less gpr->simd traffic. in LowerINSERT_VECTOR_ELT()
18242 SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1); in LowerINSERT_VECTOR_ELT()
18249 // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0. in LowerINSERT_VECTOR_ELT()
18254 if (N2C->getAPIntValue().uge(NumElts)) in LowerINSERT_VECTOR_ELT()
18256 uint64_t IdxVal = N2C->getZExtValue(); in LowerINSERT_VECTOR_ELT()
18259 bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); in LowerINSERT_VECTOR_ELT()
18262 // Lower insertion of v16i8/v32i8/v64i16 -1 elts as an 'OR' blend. in LowerINSERT_VECTOR_ELT()
18265 ((VT == MVT::v16i8 && !Subtarget.hasSSE41()) || in LowerINSERT_VECTOR_ELT()
18266 ((VT == MVT::v32i8 || VT == MVT::v16i16) && !Subtarget.hasInt256()))) { in LowerINSERT_VECTOR_ELT()
18267 SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType()); in LowerINSERT_VECTOR_ELT()
18268 SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType()); in LowerINSERT_VECTOR_ELT()
18271 SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts); in LowerINSERT_VECTOR_ELT()
18272 return DAG.getNode(ISD::OR, dl, VT, N0, CstVector); in LowerINSERT_VECTOR_ELT()
18277 (EltSizeInBits >= 16 || (IsZeroElt && !VT.is128BitVector()))) { in LowerINSERT_VECTOR_ELT()
18281 SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) in LowerINSERT_VECTOR_ELT()
18282 : getOnesVector(VT, DAG, dl); in LowerINSERT_VECTOR_ELT()
18283 return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); in LowerINSERT_VECTOR_ELT()
18287 // If the vector is wider than 128 bits, extract the 128-bit subvector, insert in LowerINSERT_VECTOR_ELT()
18289 if (VT.is256BitVector() || VT.is512BitVector()) { in LowerINSERT_VECTOR_ELT()
18290 // With a 256-bit vector, we can insert into the zero element efficiently in LowerINSERT_VECTOR_ELT()
18292 if (VT.is256BitVector() && IdxVal == 0) { in LowerINSERT_VECTOR_ELT()
18295 // doing anyway after extracting to a 128-bit vector. in LowerINSERT_VECTOR_ELT()
18298 SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); in LowerINSERT_VECTOR_ELT()
18299 return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, in LowerINSERT_VECTOR_ELT()
18306 "Vectors will always have power-of-two number of elements."); in LowerINSERT_VECTOR_ELT()
18308 // If we are not inserting into the low 128-bit vector chunk, in LowerINSERT_VECTOR_ELT()
18315 SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1); in LowerINSERT_VECTOR_ELT()
18319 return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask); in LowerINSERT_VECTOR_ELT()
18322 // Get the desired 128-bit vector chunk. in LowerINSERT_VECTOR_ELT()
18327 unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); in LowerINSERT_VECTOR_ELT()
18335 assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); in LowerINSERT_VECTOR_ELT()
18341 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); in LowerINSERT_VECTOR_ELT()
18349 MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); in LowerINSERT_VECTOR_ELT()
18352 return DAG.getBitcast(VT, N1); in LowerINSERT_VECTOR_ELT()
18358 if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { in LowerINSERT_VECTOR_ELT()
18360 if (VT == MVT::v8i16) { in LowerINSERT_VECTOR_ELT()
18364 assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); in LowerINSERT_VECTOR_ELT()
18369 assert(N1.getValueType() != MVT::i32 && "Unexpected VT"); in LowerINSERT_VECTOR_ELT()
18372 return DAG.getNode(Opc, dl, VT, N0, N1, N2); in LowerINSERT_VECTOR_ELT()
18388 // If this is an insertion of 32-bits into the low 32-bits of in LowerINSERT_VECTOR_ELT()
18393 // generate insertps because blendps does not have a 32-bit memory in LowerINSERT_VECTOR_ELT()
18396 return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, in LowerINSERT_VECTOR_ELT()
18401 return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, in LowerINSERT_VECTOR_ELT()
18423 // If this is a 256-bit vector result, first insert into a 128-bit in LowerSCALAR_TO_VECTOR()
18424 // vector and then insert into the 256-bit vector. in LowerSCALAR_TO_VECTOR()
18426 // Insert into a 128-bit vector. in LowerSCALAR_TO_VECTOR()
18433 // Insert the 128-bit vector. in LowerSCALAR_TO_VECTOR()
18485 // References to absolute symbols are never PC-relative. in getGlobalWrapperKind()
18486 if (GV && GV->isAbsoluteSymbolRef()) in getGlobalWrapperKind()
18489 // The following OpFlags under RIP-rel PIC use RIP. in getGlobalWrapperKind()
18518 CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag); in LowerConstantPool()
18540 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); in LowerJumpTable()
18564 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); in LowerBlockAddress()
18565 int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); in LowerBlockAddress()
18591 GV = G->getGlobal(); in LowerGlobalOrExternal()
18592 Offset = G->getOffset(); in LowerGlobalOrExternal()
18595 ExternalSym = ES->getSymbol(); in LowerGlobalOrExternal()
18615 // Suppress the folding if Offset is negative: movl foo-1, %eax is not in LowerGlobalOrExternal()
18648 // If there was a non-zero offset that we didn't fold, create an explicit in LowerGlobalOrExternal()
18673 auto UI = TGA->use_begin(); in GetTLSADDR()
18675 if (UI != TGA->use_end()) in GetTLSADDR()
18676 return SDValue(*UI->use_begin()->use_begin(), 0); in GetTLSADDR()
18678 TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), in GetTLSADDR()
18679 GA->getOffset(), OperandFlags); in GetTLSADDR()
18752 MFI->incNumLocalDynamicTLSAccesses(); in LowerToTLSLocalDynamicModel()
18774 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, in LowerToTLSLocalDynamicModel()
18775 GA->getValueType(0), in LowerToTLSLocalDynamicModel()
18776 GA->getOffset(), OperandFlags); in LowerToTLSLocalDynamicModel()
18789 // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). in LowerToTLSExecModel()
18798 // Most TLS accesses are not RIP relative, even on x86-64. One exception is in LowerToTLSExecModel()
18816 // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic) in LowerToTLSExecModel()
18818 DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), in LowerToTLSExecModel()
18819 GA->getOffset(), OperandFlags); in LowerToTLSExecModel()
18846 const GlobalValue *GV = GA->getGlobal(); in LowerGlobalTLSAddress()
18887 SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, in LowerGlobalTLSAddress()
18888 GA->getValueType(0), in LowerGlobalTLSAddress()
18889 GA->getOffset(), OpFlag); in LowerGlobalTLSAddress()
18932 // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or in LowerGlobalTLSAddress()
18933 // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly in LowerGlobalTLSAddress()
18949 if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { in LowerGlobalTLSAddress()
18971 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, in LowerGlobalTLSAddress()
18972 GA->getValueType(0), in LowerGlobalTLSAddress()
18973 GA->getOffset(), X86II::MO_SECREL); in LowerGlobalTLSAddress()
19000 // offset and returning `true` for TLS-desc currently duplicates both in addressingModeSupportsTLS()
19001 // which is detrimental :-/ in addressingModeSupportsTLS()
19017 // Try to use a packed vector operation to handle i64 on 32-bit targets when
19027 bool IsStrict = Op->isStrictFPOpcode(); in LowerI64IntToFP_AVX512DQ()
19031 MVT VT = Op.getSimpleValueType(); in LowerI64IntToFP_AVX512DQ() local
19034 (VT != MVT::f32 && VT != MVT::f64)) in LowerI64IntToFP_AVX512DQ()
19039 // Using 256-bit to ensure result is 128-bits for f32 case. in LowerI64IntToFP_AVX512DQ()
19042 MVT VecVT = MVT::getVectorVT(VT, NumElts); in LowerI64IntToFP_AVX512DQ()
19049 SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, in LowerI64IntToFP_AVX512DQ()
19056 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, in LowerI64IntToFP_AVX512DQ()
19060 // Try to use a packed vector operation to handle i64 on 32-bit targets.
19068 bool IsStrict = Op->isStrictFPOpcode(); in LowerI64IntToFP16()
19071 MVT VT = Op.getSimpleValueType(); in LowerI64IntToFP16() local
19073 if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16) in LowerI64IntToFP16()
19085 SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, in LowerI64IntToFP16()
19092 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, in LowerI64IntToFP16()
19120 /// round-trip between XMM and GPR.
19132 // See if we have a 128-bit vector cast op for this type of cast. in vectorizeExtractedCast()
19141 // If we are extracting from a non-zero element, first shuffle the source in vectorizeExtractedCast()
19144 SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1); in vectorizeExtractedCast()
19148 // If the source vector is wider than 128-bits, extract the low part. Do not in vectorizeExtractedCast()
19153 // cast (extelt V, 0) --> extelt (cast (extract_subv V)), 0 in vectorizeExtractedCast()
19154 // cast (extelt V, C) --> extelt (cast (extract_subv (shuffle V, [C...]))), 0 in vectorizeExtractedCast()
19161 /// try to vectorize the cast ops. This will avoid an expensive round-trip
19168 MVT VT = CastToFP.getSimpleValueType(); in lowerFPToIntToFP() local
19169 if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) in lowerFPToIntToFP()
19178 // See if we have 128-bit vector cast instructions for this type of cast. in lowerFPToIntToFP()
19180 if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || in lowerFPToIntToFP()
19186 unsigned VTSize = VT.getSizeInBits(); in lowerFPToIntToFP()
19189 MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); in lowerFPToIntToFP()
19191 // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64. in lowerFPToIntToFP()
19197 // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 in lowerFPToIntToFP()
19207 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); in lowerFPToIntToFP()
19213 bool IsStrict = Op->isStrictFPOpcode(); in lowerINT_TO_FP_vXi64()
19214 MVT VT = Op->getSimpleValueType(0); in lowerINT_TO_FP_vXi64() local
19215 SDValue Src = Op->getOperand(IsStrict ? 1 : 0); in lowerINT_TO_FP_vXi64()
19224 // With AVX512DQ, but not VLX we need to widen to get a 512-bit result type. in lowerINT_TO_FP_vXi64()
19225 assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) && in lowerINT_TO_FP_vXi64()
19226 "Unexpected VT!"); in lowerINT_TO_FP_vXi64()
19227 MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; in lowerINT_TO_FP_vXi64()
19238 {Op->getOperand(0), Src}); in lowerINT_TO_FP_vXi64()
19244 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, in lowerINT_TO_FP_vXi64()
19252 bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP || in lowerINT_TO_FP_vXi64()
19253 Op->getOpcode() == ISD::STRICT_SINT_TO_FP; in lowerINT_TO_FP_vXi64()
19254 if (VT != MVT::v4f32 || IsSigned) in lowerINT_TO_FP_vXi64()
19278 SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts); in lowerINT_TO_FP_vXi64()
19301 bool IsStrict = Op->isStrictFPOpcode(); in promoteXINT_TO_FP()
19303 SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); in promoteXINT_TO_FP()
19304 MVT VT = Op.getSimpleValueType(); in promoteXINT_TO_FP() local
19305 MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; in promoteXINT_TO_FP()
19310 ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other}, in promoteXINT_TO_FP()
19314 return DAG.getNode(ISD::FP_ROUND, dl, VT, in promoteXINT_TO_FP()
19318 static bool isLegalConversion(MVT VT, bool IsSigned, in isLegalConversion() argument
19320 if (VT == MVT::v4i32 && Subtarget.hasSSE2() && IsSigned) in isLegalConversion()
19322 if (VT == MVT::v8i32 && Subtarget.hasAVX() && IsSigned) in isLegalConversion()
19324 if (Subtarget.hasVLX() && (VT == MVT::v4i32 || VT == MVT::v8i32)) in isLegalConversion()
19327 if (VT == MVT::v16i32) in isLegalConversion()
19329 if (VT == MVT::v8i64 && Subtarget.hasDQI()) in isLegalConversion()
19333 (VT == MVT::v2i64 || VT == MVT::v4i64)) in isLegalConversion()
19340 bool IsStrict = Op->isStrictFPOpcode(); in LowerSINT_TO_FP()
19343 SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); in LowerSINT_TO_FP()
19345 MVT VT = Op.getSimpleValueType(); in LowerSINT_TO_FP() local
19348 if (isSoftF16(VT, Subtarget)) in LowerSINT_TO_FP()
19363 if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { in LowerSINT_TO_FP()
19368 X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, in LowerSINT_TO_FP()
19371 return DAG.getNode(X86ISD::CVTSI2P, dl, VT, in LowerSINT_TO_FP()
19384 bool UseSSEReg = isScalarFPTypeInSSEReg(VT); in LowerSINT_TO_FP()
19399 if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { in LowerSINT_TO_FP()
19402 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in LowerSINT_TO_FP()
19405 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); in LowerSINT_TO_FP()
19408 if (VT == MVT::f128 || !Subtarget.hasX87()) in LowerSINT_TO_FP()
19413 // Bitcasting to f64 here allows us to do a single 64-bit store from in LowerSINT_TO_FP()
19415 // with two 32-bit stores. in LowerSINT_TO_FP()
19428 BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG); in LowerSINT_TO_FP()
19478 /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
19487 /// 64-bit unsigned integer to double expansion.
19491 // We can't use this algorithm for strict fp. It produces -0.0 instead of +0.0 in LowerUINT_TO_FP_i64()
19493 // fall back to Expand for when i64 or is legal or use FILD in 32-bit mode. in LowerUINT_TO_FP_i64()
19494 assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!"); in LowerUINT_TO_FP_i64()
19526 // Load the 64-bit value into an XMM register. in LowerUINT_TO_FP_i64()
19539 // TODO: Are there any fast-math-flags to propagate here? in LowerUINT_TO_FP_i64()
19547 SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); in LowerUINT_TO_FP_i64()
19555 /// 32-bit unsigned integer to float expansion.
19559 unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; in LowerUINT_TO_FP_i32()
19564 // Load the 32-bit value into an XMM register. in LowerUINT_TO_FP_i32()
19581 if (Op.getNode()->isStrictFPOpcode()) { in LowerUINT_TO_FP_i32()
19583 // TODO: Are there any fast-math-flags to propagate here? in LowerUINT_TO_FP_i32()
19599 // TODO: Are there any fast-math-flags to propagate here? in LowerUINT_TO_FP_i32()
19612 bool IsStrict = Op->isStrictFPOpcode(); in lowerUINT_TO_FP_v2i32()
19625 SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other}, in lowerUINT_TO_FP_v2i32()
19644 // since double has 52-bits of mantissa. Then subtract 2^52 in floating in lowerUINT_TO_FP_v2i32()
19662 bool IsStrict = Op->isStrictFPOpcode(); in lowerUINT_TO_FP_vXi32()
19663 SDValue V = Op->getOperand(IsStrict ? 1 : 0); in lowerUINT_TO_FP_vXi32()
19669 // With AVX512, but not VLX we need to widen to get a 512-bit result type. in lowerUINT_TO_FP_vXi32()
19671 MVT VT = Op->getSimpleValueType(0); in lowerUINT_TO_FP_vXi32() local
19673 // v8i32->v8f64 is legal with AVX512 so just return it. in lowerUINT_TO_FP_vXi32()
19674 if (VT == MVT::v8f64) in lowerUINT_TO_FP_vXi32()
19677 assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && in lowerUINT_TO_FP_vXi32()
19678 "Unexpected VT!"); in lowerUINT_TO_FP_vXi32()
19679 MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; in lowerUINT_TO_FP_vXi32()
19680 MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; in lowerUINT_TO_FP_vXi32()
19690 {Op->getOperand(0), V}); in lowerUINT_TO_FP_vXi32()
19696 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, in lowerUINT_TO_FP_vXi32()
19705 Op->getSimpleValueType(0) == MVT::v4f64) { in lowerUINT_TO_FP_vXi32()
19738 // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); in lowerUINT_TO_FP_vXi32()
19745 if (VecFloatVT != Op->getSimpleValueType(0)) in lowerUINT_TO_FP_vXi32()
19749 // - The vector of constants: in lowerUINT_TO_FP_vXi32()
19750 // -- 0x4b000000 in lowerUINT_TO_FP_vXi32()
19751 // -- 0x53000000 in lowerUINT_TO_FP_vXi32()
19752 // - A shift: in lowerUINT_TO_FP_vXi32()
19753 // -- v >> 16 in lowerUINT_TO_FP_vXi32()
19764 SDValue Low, High; in lowerUINT_TO_FP_vXi32() local
19770 // Low will be bitcasted right away, so do not bother bitcasting back to its in lowerUINT_TO_FP_vXi32()
19772 Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, in lowerUINT_TO_FP_vXi32()
19786 Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); in lowerUINT_TO_FP_vXi32()
19796 // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); in lowerUINT_TO_FP_vXi32()
19798 // constant, we avoid reassociation in MachineCombiner when unsafe-fp-math is in lowerUINT_TO_FP_vXi32()
19801 // TODO: Are there any fast-math-flags to propagate here? in lowerUINT_TO_FP_vXi32()
19803 SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); in lowerUINT_TO_FP_vXi32()
19819 unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; in lowerUINT_TO_FP_vec()
19839 bool IsStrict = Op->isStrictFPOpcode(); in LowerUINT_TO_FP()
19845 MVT DstVT = Op->getSimpleValueType(0); in LowerUINT_TO_FP()
19869 // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode. in LowerUINT_TO_FP()
19873 // Promote i32 to i64 and use a signed conversion on 64-bit targets. in LowerUINT_TO_FP()
19887 // The transform for i64->f64 isn't correct for 0 when rounding to negative in LowerUINT_TO_FP()
19888 // infinity. It produces -0.0, so disable under strictfp. in LowerUINT_TO_FP()
19892 // The transform for i32->f64/f32 isn't correct for 0 when rounding to in LowerUINT_TO_FP()
19901 // Make a 64-bit buffer, and use it to build an FILD. in LowerUINT_TO_FP()
19903 int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); in LowerUINT_TO_FP()
19924 // Bitcasting to f64 here allows us to do a single 64-bit store from in LowerUINT_TO_FP()
19926 // with two 32-bit stores. in LowerUINT_TO_FP()
19950 Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign(); in LowerUINT_TO_FP()
19965 // TODO: Are there any fast-math-flags to propagate here? in LowerUINT_TO_FP()
19999 bool IsStrict = Op->isStrictFPOpcode(); in FP_TO_INTHelper()
20015 // used for the 32-bit subtarget, but also for f80 on a 64-bit target. in FP_TO_INTHelper()
20021 // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST. in FP_TO_INTHelper()
20022 // The low 32 bits of the fist result will have the correct uint32 result. in FP_TO_INTHelper()
20031 // We lower FP->int64 into FISTP64 followed by a load from a temporary in FP_TO_INTHelper()
20052 // FistSrc = (Value - FltOfs); in FP_TO_INTHelper()
20053 // Fist-to-mem64 FistSrc in FP_TO_INTHelper()
20054 // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent in FP_TO_INTHelper()
20116 // FIXME This causes a redundant load/store if the SSE-class value is already in FP_TO_INTHelper()
20152 MVT VT = Op.getSimpleValueType(); in LowerAVXExtend() local
20157 assert(VT.isVector() && InVT.isVector() && "Expected vector type"); in LowerAVXExtend()
20160 assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && in LowerAVXExtend()
20162 assert((VT.getVectorElementType() == MVT::i16 || in LowerAVXExtend()
20163 VT.getVectorElementType() == MVT::i32 || in LowerAVXExtend()
20164 VT.getVectorElementType() == MVT::i64) && in LowerAVXExtend()
20173 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { in LowerAVXExtend()
20174 assert(InVT == MVT::v32i8 && "Unexpected VT!"); in LowerAVXExtend()
20183 // v8i16 -> v8i32 in LowerAVXExtend()
20184 // Use vpmovzwd for 4 lower elements v8i16 -> v4i32. in LowerAVXExtend()
20185 // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. in LowerAVXExtend()
20188 // v4i32 -> v4i64 in LowerAVXExtend()
20189 // Use vpmovzdq for 4 lower elements v4i32 -> v2i64. in LowerAVXExtend()
20190 // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. in LowerAVXExtend()
20193 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in LowerAVXExtend()
20196 // Short-circuit if we can determine that each 128-bit half is the same value. in LowerAVXExtend()
20199 if (hasIdenticalHalvesShuffleMask(Shuf->getMask())) in LowerAVXExtend()
20200 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo); in LowerAVXExtend()
20208 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); in LowerAVXExtend()
20212 static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, in SplitAndExtendv16i1() argument
20214 assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); in SplitAndExtendv16i1()
20222 return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in SplitAndExtendv16i1()
20228 MVT VT = Op->getSimpleValueType(0); in LowerZERO_EXTEND_Mask() local
20229 SDValue In = Op->getOperand(0); in LowerZERO_EXTEND_Mask()
20232 unsigned NumElts = VT.getVectorNumElements(); in LowerZERO_EXTEND_Mask()
20236 if (VT.getVectorElementType() != MVT::i8) { in LowerZERO_EXTEND_Mask()
20237 SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In); in LowerZERO_EXTEND_Mask()
20238 return DAG.getNode(ISD::SRL, DL, VT, Extend, in LowerZERO_EXTEND_Mask()
20239 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); in LowerZERO_EXTEND_Mask()
20242 // Extend VT if BWI is not supported. in LowerZERO_EXTEND_Mask()
20243 MVT ExtVT = VT; in LowerZERO_EXTEND_Mask()
20247 return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); in LowerZERO_EXTEND_Mask()
20252 // Widen to 512-bits if VLX is not supported. in LowerZERO_EXTEND_Mask()
20269 if (VT != ExtVT) { in LowerZERO_EXTEND_Mask()
20274 // Extract back to 128/256-bit if we widened. in LowerZERO_EXTEND_Mask()
20275 if (WideVT != VT) in LowerZERO_EXTEND_Mask()
20276 SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, in LowerZERO_EXTEND_Mask()
20298 /// AVX2 (Int256) sub-targets require extra shuffling as the PACK*S operates
20299 /// within each 128-bit lane.
20305 assert(DstVT.isVector() && "VT not a vector?"); in truncateVectorWithPACK()
20331 // vXi64/vXi32 -> PACK*SDW and vXi16 -> PACK*SWB. in truncateVectorWithPACK()
20339 // Sub-128-bit truncation - widen to 128-bit src and pack in the lower half. in truncateVectorWithPACK()
20340 // On pre-AVX512, pack the src in both halves to help value tracking. in truncateVectorWithPACK()
20369 // 256bit -> 128bit truncate - PACK lower/upper 128-bit subvectors. in truncateVectorWithPACK()
20377 // AVX2: 512bit -> 256bit truncate - PACK lower/upper 256-bit subvectors. in truncateVectorWithPACK()
20378 // AVX2: 512bit -> 128bit truncate - PACK(PACK, PACK). in truncateVectorWithPACK()
20384 // 256-bit PACK(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)), in truncateVectorWithPACK()
20395 // If 512bit -> 128bit truncate another stage. in truncateVectorWithPACK()
20401 assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); in truncateVectorWithPACK()
20404 // Avoid CONCAT_VECTORS on sub-128bit nodes as these can fail after in truncateVectorWithPACK()
20419 /// e.g. trunc <8 x i32> X to <8 x i16> -->
20464 // Truncation from 128-bit to vXi32 can be better handled with PSHUFD. in matchTruncateWithPACK()
20465 // Truncation to sub-64-bit vXi16 can be better handled with PSHUFD/PSHUFLW. in matchTruncateWithPACK()
20472 // Prefer to lower v4i64 -> v4i32 as a shuffle unless we can cheaply in matchTruncateWithPACK()
20489 // Pre-SSE41 we can only use PACKUSWB. in matchTruncateWithPACK()
20491 if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) { in matchTruncateWithPACK()
20496 // Truncate with PACKSS if we are truncating a vector with sign-bits in matchTruncateWithPACK()
20501 // Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with in matchTruncateWithPACK()
20509 unsigned MinSignBits = NumSrcEltBits - NumPackedSignBits; in matchTruncateWithPACK()
20518 if (In.getOpcode() == ISD::SRL && In->hasOneUse()) in matchTruncateWithPACK()
20522 return DAG.getNode(ISD::SRA, DL, SrcVT, In->ops()); in matchTruncateWithPACK()
20529 /// This function lowers a vector truncation of 'extended sign-bits' or
20530 /// 'extended zero-bits' values.
20598 // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS in LowerTruncateVecPack()
20599 // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to in LowerTruncateVecPack()
20607 // Special case vXi64 -> vXi16, shuffle to vXi32 and then use PACKSS. in LowerTruncateVecPack()
20620 MVT VT = Op.getSimpleValueType(); in LowerTruncateVecI1() local
20623 assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type."); in LowerTruncateVecI1()
20626 unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; in LowerTruncateVecI1()
20639 return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), in LowerTruncateVecI1()
20650 // trying to avoid 512-bit vectors. If we are avoiding 512-bit vectors in LowerTruncateVecI1()
20654 // directly, so we need to shuffle high elements to low and use in LowerTruncateVecI1()
20662 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); in LowerTruncateVecI1()
20665 assert(InVT == MVT::v16i16 && "Unexpected VT!"); in LowerTruncateVecI1()
20673 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); in LowerTruncateVecI1()
20675 // We either have 8 elements or we're allowed to use 512-bit vectors. in LowerTruncateVecI1()
20682 ShiftInx = InVT.getScalarSizeInBits() - 1; in LowerTruncateVecI1()
20692 return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); in LowerTruncateVecI1()
20693 return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); in LowerTruncateVecI1()
20698 MVT VT = Op.getSimpleValueType(); in LowerTRUNCATE() local
20701 assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && in LowerTRUNCATE()
20706 if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(InVT)) { in LowerTRUNCATE()
20708 VT.is128BitVector() && Subtarget.hasAVX512()) { in LowerTRUNCATE()
20712 // truncate the remainder. We'd rather produce two 64-bit results and in LowerTRUNCATE()
20718 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in LowerTRUNCATE()
20722 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); in LowerTRUNCATE()
20725 // Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS. in LowerTRUNCATE()
20727 (InVT.is512BitVector() && VT.is256BitVector())) in LowerTRUNCATE()
20729 LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG)) in LowerTRUNCATE()
20732 // Pre-AVX512 see if we can make use of PACKSS/PACKUS. in LowerTRUNCATE()
20734 return LowerTruncateVecPack(VT, In, DL, Subtarget, DAG); in LowerTRUNCATE()
20740 if (VT.getVectorElementType() == MVT::i1) in LowerTRUNCATE()
20747 LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG)) in LowerTRUNCATE()
20753 assert(VT == MVT::v32i8 && "Unexpected VT!"); in LowerTRUNCATE()
20759 // asked to avoid 512-bit vectors. The actual promotion to v16i32 will be in LowerTRUNCATE()
20767 assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); in LowerTRUNCATE()
20769 if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { in LowerTRUNCATE()
20770 // On AVX2, v4i64 -> v4i32 becomes VPERMD. in LowerTRUNCATE()
20772 static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; in LowerTRUNCATE()
20775 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, in LowerTRUNCATE()
20784 return DAG.getVectorShuffle(VT, DL, DAG.getBitcast(MVT::v4i32, OpLo), in LowerTRUNCATE()
20788 if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { in LowerTRUNCATE()
20789 // On AVX2, v8i32 -> v8i16 becomes PSHUFB. in LowerTRUNCATE()
20793 -1, -1, -1, -1, -1, -1, -1, -1, in LowerTRUNCATE()
20795 -1, -1, -1, -1, -1, -1, -1, -1 }; in LowerTRUNCATE()
20800 static const int ShufMask2[] = {0, 2, -1, -1}; in LowerTRUNCATE()
20808 ? truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG) in LowerTRUNCATE()
20809 : truncateVectorWithPACKSS(VT, In, DL, Subtarget, DAG); in LowerTRUNCATE()
20812 if (VT == MVT::v16i8 && InVT == MVT::v16i16) in LowerTRUNCATE()
20813 return truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG); in LowerTRUNCATE()
20815 llvm_unreachable("All 256->128 cases should have been handled above!"); in LowerTRUNCATE()
20820 static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, in expandFP_TO_UINT_SSE() argument
20824 unsigned DstBits = VT.getScalarSizeInBits(); in expandFP_TO_UINT_SSE()
20825 assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported"); in expandFP_TO_UINT_SSE()
20828 // 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big"). in expandFP_TO_UINT_SSE()
20829 SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src); in expandFP_TO_UINT_SSE()
20831 DAG.getNode(X86ISD::CVTTP2SI, dl, VT, in expandFP_TO_UINT_SSE()
20842 // AVX1 can't use the signsplat masking for 256-bit vectors - we have to in expandFP_TO_UINT_SSE()
20844 if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) { in expandFP_TO_UINT_SSE()
20845 SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big); in expandFP_TO_UINT_SSE()
20846 return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small); in expandFP_TO_UINT_SSE()
20850 DAG.getNode(X86ISD::VSRAI, dl, VT, Small, in expandFP_TO_UINT_SSE()
20851 DAG.getTargetConstant(DstBits - 1, dl, MVT::i8)); in expandFP_TO_UINT_SSE()
20852 return DAG.getNode(ISD::OR, dl, VT, Small, in expandFP_TO_UINT_SSE()
20853 DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); in expandFP_TO_UINT_SSE()
20857 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_TO_INT()
20860 MVT VT = Op->getSimpleValueType(0); in LowerFP_TO_INT() local
20862 SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue(); in LowerFP_TO_INT()
20868 MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; in LowerFP_TO_INT()
20870 return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, in LowerFP_TO_INT()
20873 return DAG.getNode(Op.getOpcode(), dl, VT, in LowerFP_TO_INT()
20875 } else if (isTypeLegal(SrcVT) && isLegalConversion(VT, IsSigned, Subtarget)) { in LowerFP_TO_INT()
20879 if (VT.isVector()) { in LowerFP_TO_INT()
20880 if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { in LowerFP_TO_INT()
20891 // Widen to 512-bits. in LowerFP_TO_INT()
20897 // TODO: Should we just do this for non-strict as well? in LowerFP_TO_INT()
20919 if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16) in LowerFP_TO_INT()
20922 MVT ResVT = VT; in LowerFP_TO_INT()
20923 MVT EleVT = VT.getVectorElementType(); in LowerFP_TO_INT()
20951 if (ResVT != VT) in LowerFP_TO_INT()
20952 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, in LowerFP_TO_INT()
20960 // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first. in LowerFP_TO_INT()
20961 if (VT.getVectorElementType() == MVT::i16) { in LowerFP_TO_INT()
20965 MVT NVT = VT.changeVectorElementType(MVT::i32); in LowerFP_TO_INT()
20977 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in LowerFP_TO_INT()
20984 // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32. in LowerFP_TO_INT()
20985 if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) { in LowerFP_TO_INT()
20991 // Widen vXi32 fp_to_uint with avx512f to 512-bit source. in LowerFP_TO_INT()
20992 if ((VT == MVT::v4i32 || VT == MVT::v8i32) && in LowerFP_TO_INT()
21001 // TODO: Should we just do this for non-strict as well? in LowerFP_TO_INT()
21015 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, in LowerFP_TO_INT()
21023 // Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source. in LowerFP_TO_INT()
21024 if ((VT == MVT::v2i64 || VT == MVT::v4i64) && in LowerFP_TO_INT()
21031 // TODO: Should we just do this for non-strict as well? in LowerFP_TO_INT()
21045 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, in LowerFP_TO_INT()
21053 if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { in LowerFP_TO_INT()
21055 // Non-strict nodes without VLX can we widened to v4f32->v4i64 by type in LowerFP_TO_INT()
21077 return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); in LowerFP_TO_INT()
21080 return DAG.getNode(Opc, dl, VT, Tmp); in LowerFP_TO_INT()
21085 if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) || in LowerFP_TO_INT()
21086 (VT == MVT::v4i32 && SrcVT == MVT::v4f64) || in LowerFP_TO_INT()
21087 (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) { in LowerFP_TO_INT()
21089 return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget); in LowerFP_TO_INT()
21095 assert(!VT.isVector()); in LowerFP_TO_INT()
21106 if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) || in LowerFP_TO_INT()
21107 (VT == MVT::i64 && Subtarget.is64Bit()))) { in LowerFP_TO_INT()
21108 unsigned DstBits = VT.getScalarSizeInBits(); in LowerFP_TO_INT()
21111 DAG.getConstant(UIntLimit, dl, VT)); in LowerFP_TO_INT()
21115 // (i32) 0 to 2^31-1 ("Small") and from 2^31 to 2^32-1 ("Big"). in LowerFP_TO_INT()
21116 // (i64) 0 to 2^63-1 ("Small") and from 2^63 to 2^64-1 ("Big"). in LowerFP_TO_INT()
21118 DAG.getNode(X86ISD::CVTTS2SI, dl, VT, in LowerFP_TO_INT()
21121 X86ISD::CVTTS2SI, dl, VT, in LowerFP_TO_INT()
21132 ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8)); in LowerFP_TO_INT()
21133 return DAG.getNode(ISD::OR, dl, VT, Small, in LowerFP_TO_INT()
21134 DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); in LowerFP_TO_INT()
21138 if (VT == MVT::i64) in LowerFP_TO_INT()
21141 assert(VT == MVT::i32 && "Unexpected VT!"); in LowerFP_TO_INT()
21143 // Promote i32 to i64 and use a signed operation on 64-bit targets. in LowerFP_TO_INT()
21154 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in LowerFP_TO_INT()
21169 if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) { in LowerFP_TO_INT()
21178 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in LowerFP_TO_INT()
21192 LC = RTLIB::getFPTOSINT(SrcVT, VT); in LowerFP_TO_INT()
21194 LC = RTLIB::getFPTOUINT(SrcVT, VT); in LowerFP_TO_INT()
21198 makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain); in LowerFP_TO_INT()
21237 EVT DstVT = N->getValueType(0); in LRINT_LLRINTHelper()
21238 SDValue Src = N->getOperand(0); in LRINT_LLRINTHelper()
21256 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); in LRINT_LLRINTHelper()
21285 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; in LowerFP_TO_INT_SAT()
21288 SDValue Src = Node->getOperand(0); in LowerFP_TO_INT_SAT()
21295 EVT DstVT = Node->getValueType(0); in LowerFP_TO_INT_SAT()
21303 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); in LowerFP_TO_INT_SAT()
21316 // Promote conversions to unsigned 32-bit to 64-bit, because it will allow in LowerFP_TO_INT_SAT()
21329 // floating-point values. in LowerFP_TO_INT_SAT()
21431 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_EXTEND()
21434 MVT VT = Op.getSimpleValueType(); in LowerFP_EXTEND() local
21439 // Let f16->f80 get lowered to a libcall, except for darwin, where we should in LowerFP_EXTEND()
21441 if (VT == MVT::f128 || (SVT == MVT::f16 && VT == MVT::f80 && in LowerFP_EXTEND()
21453 if (VT != MVT::f32) { in LowerFP_EXTEND()
21456 ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, in LowerFP_EXTEND()
21460 return DAG.getNode(ISD::FP_EXTEND, DL, VT, in LowerFP_EXTEND()
21468 assert(VT == MVT::f32 && SVT == MVT::f16 && "unexpected extend libcall"); in LowerFP_EXTEND()
21470 // Need a libcall, but ABI for f16 is soft-float on MacOS. in LowerFP_EXTEND()
21487 CallingConv::C, EVT(VT).getTypeForEVT(*DAG.getContext()), Callee, in LowerFP_EXTEND()
21531 return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, in LowerFP_EXTEND()
21532 {Op->getOperand(0), Res}); in LowerFP_EXTEND()
21533 return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); in LowerFP_EXTEND()
21534 } else if (VT == MVT::v4f64 || VT == MVT::v8f64) { in LowerFP_EXTEND()
21543 return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, in LowerFP_EXTEND()
21544 {Op->getOperand(0), Res}); in LowerFP_EXTEND()
21545 return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); in LowerFP_EXTEND()
21549 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_ROUND()
21554 MVT VT = Op.getSimpleValueType(); in LowerFP_ROUND() local
21557 if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) in LowerFP_ROUND()
21560 if (VT == MVT::f16 && (SVT == MVT::f64 || SVT == MVT::f32) && in LowerFP_ROUND()
21596 if (VT.getScalarType() == MVT::bf16) { in LowerFP_ROUND()
21604 if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) { in LowerFP_ROUND()
21608 if (VT.isVector()) in LowerFP_ROUND()
21622 // FIXME: Should we use zeros for upper elements for non-strict? in LowerFP_ROUND()
21641 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP16_TO_FP()
21644 "Unexpected VT!"); in LowerFP16_TO_FP()
21670 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_TO_FP16()
21673 "Unexpected VT!"); in LowerFP_TO_FP16()
21686 // FIXME: Should we use zeros for upper elements for non-strict? in LowerFP_TO_FP16()
21754 // clang-format off in lowerAddSubToHorizontalOp()
21761 // clang-format on in lowerAddSubToHorizontalOp()
21780 // Creating a 256-bit horizontal op would be wasteful, and there is no 512-bit in lowerAddSubToHorizontalOp()
21781 // equivalent, so extract the 256/512-bit source op to 128-bit if we can. in lowerAddSubToHorizontalOp()
21788 // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 in lowerAddSubToHorizontalOp()
21789 // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0 in lowerAddSubToHorizontalOp()
21790 // add (extractelt (X, 2), extractelt (X, 3)) --> extractelt (hadd X, X), 1 in lowerAddSubToHorizontalOp()
21791 // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0 in lowerAddSubToHorizontalOp()
21812 MVT VT = Op.getSimpleValueType(); in LowerFROUND() local
21815 const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); in LowerFROUND()
21821 SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT, in LowerFROUND()
21822 DAG.getConstantFP(Point5Pred, dl, VT), N0); in LowerFROUND()
21823 N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder); in LowerFROUND()
21826 return DAG.getNode(ISD::FTRUNC, dl, VT, N0); in LowerFROUND()
21840 for (SDNode *User : Op->uses()) in LowerFABSorFNEG()
21841 if (User->getOpcode() == ISD::FNEG) in LowerFABSorFNEG()
21845 MVT VT = Op.getSimpleValueType(); in LowerFABSorFNEG() local
21847 bool IsF128 = (VT == MVT::f128); in LowerFABSorFNEG()
21848 assert(VT.isFloatingPoint() && VT != MVT::f80 && in LowerFABSorFNEG()
21849 DAG.getTargetLoweringInfo().isTypeLegal(VT) && in LowerFABSorFNEG()
21853 // decide if we should generate a 16-byte constant mask when we only need 4 or in LowerFABSorFNEG()
21857 // generate a 16-byte vector constant and logic op even for the scalar case. in LowerFABSorFNEG()
21858 // Using a 16-byte mask allows folding the load of the mask with in LowerFABSorFNEG()
21860 bool IsFakeVector = !VT.isVector() && !IsF128; in LowerFABSorFNEG()
21861 MVT LogicVT = VT; in LowerFABSorFNEG()
21863 LogicVT = (VT == MVT::f64) ? MVT::v2f64 in LowerFABSorFNEG()
21864 : (VT == MVT::f32) ? MVT::v4f32 in LowerFABSorFNEG()
21867 unsigned EltBits = VT.getScalarSizeInBits(); in LowerFABSorFNEG()
21871 const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); in LowerFABSorFNEG()
21881 if (VT.isVector() || IsF128) in LowerFABSorFNEG()
21884 // For the scalar case extend to a 128-bit vector, perform the logic op, in LowerFABSorFNEG()
21888 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, in LowerFABSorFNEG()
21898 MVT VT = Op.getSimpleValueType(); in LowerFCOPYSIGN() local
21899 if (Sign.getSimpleValueType().bitsLT(VT)) in LowerFCOPYSIGN()
21900 Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign); in LowerFCOPYSIGN()
21903 if (Sign.getSimpleValueType().bitsGT(VT)) in LowerFCOPYSIGN()
21904 Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, in LowerFCOPYSIGN()
21909 bool IsF128 = (VT == MVT::f128); in LowerFCOPYSIGN()
21910 assert(VT.isFloatingPoint() && VT != MVT::f80 && in LowerFCOPYSIGN()
21911 DAG.getTargetLoweringInfo().isTypeLegal(VT) && in LowerFCOPYSIGN()
21914 const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); in LowerFCOPYSIGN()
21916 // Perform all scalar logic operations as 16-byte vectors because there are no in LowerFCOPYSIGN()
21921 bool IsFakeVector = !VT.isVector() && !IsF128; in LowerFCOPYSIGN()
21922 MVT LogicVT = VT; in LowerFCOPYSIGN()
21924 LogicVT = (VT == MVT::f64) ? MVT::v2f64 in LowerFCOPYSIGN()
21925 : (VT == MVT::f32) ? MVT::v4f32 in LowerFCOPYSIGN()
21929 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in LowerFCOPYSIGN()
21945 APFloat APF = Op0CN->getValueAPF(); in LowerFCOPYSIGN()
21957 return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, in LowerFCOPYSIGN()
21964 MVT VT = Op.getSimpleValueType(); in LowerFGETSIGN() local
21974 Res = DAG.getZExtOrTrunc(Res, dl, VT); in LowerFGETSIGN()
21975 Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT)); in LowerFGETSIGN()
21982 // instruction. Since the shift amount is in-range-or-undefined, we know in getBT()
21993 // See if we can use the 32-bit instruction instead of the 64-bit one for a in getBT()
22007 if (BitNo.getOpcode() == ISD::AND && BitNo->hasOneUse()) in getBT()
22066 /// Try to map a 128-bit or larger integer comparison to vector instructions
22068 static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, in combineVectorSizedSetCCEquality() argument
22083 // logically-combined vector-sized operands compared to zero. This pattern may in combineVectorSizedSetCCEquality()
22100 // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. in combineVectorSizedSetCCEquality()
22101 // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. in combineVectorSizedSetCCEquality()
22142 auto ScalarToVector = [&](SDValue X) -> SDValue { in combineVectorSizedSetCCEquality()
22170 // This is a bitwise-combined equality comparison of 2 pairs of vectors: in combineVectorSizedSetCCEquality()
22191 return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), in combineVectorSizedSetCCEquality()
22200 return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0)); in combineVectorSizedSetCCEquality()
22203 // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq in combineVectorSizedSetCCEquality()
22204 // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne in combineVectorSizedSetCCEquality()
22206 "Non 128-bit vector on pre-SSE41 target"); in combineVectorSizedSetCCEquality()
22209 return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); in combineVectorSizedSetCCEquality()
22217 /// are supported when the pointer SrcMask is non-null.
22218 /// TODO - move this to SelectionDAG?
22224 EVT VT = MVT::Other; in matchScalarReduction() local
22236 if (I->getOpcode() == unsigned(BinOp)) { in matchScalarReduction()
22237 Opnds.push_back(I->getOperand(0)); in matchScalarReduction()
22238 Opnds.push_back(I->getOperand(1)); in matchScalarReduction()
22239 // Re-evaluate the number of nodes to be traversed. in matchScalarReduction()
22244 // Quit if a non-EXTRACT_VECTOR_ELT in matchScalarReduction()
22245 if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) in matchScalarReduction()
22249 auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1)); in matchScalarReduction()
22253 SDValue Src = I->getOperand(0); in matchScalarReduction()
22256 VT = Src.getValueType(); in matchScalarReduction()
22258 if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType()) in matchScalarReduction()
22260 unsigned NumElts = VT.getVectorNumElements(); in matchScalarReduction()
22267 unsigned CIdx = Idx->getZExtValue(); in matchScalarReduction()
22268 if (M->second[CIdx]) in matchScalarReduction()
22270 M->second.setBit(CIdx); in matchScalarReduction()
22276 SrcMask->push_back(SrcOpMap[SrcOp]); in matchScalarReduction()
22292 EVT VT = LHS.getValueType(); in LowerVectorAllEqual() local
22293 unsigned ScalarSize = VT.getScalarSizeInBits(); in LowerVectorAllEqual()
22299 // Quit if not convertable to legal scalar or 128/256-bit vector. in LowerVectorAllEqual()
22300 if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) in LowerVectorAllEqual()
22303 // FCMP may use ISD::SETNE when nnan - early out if we manage to get here. in LowerVectorAllEqual()
22304 if (VT.isFloatingPoint()) in LowerVectorAllEqual()
22320 // For sub-128-bit vector, cast to (legal) integer and compare with zero. in LowerVectorAllEqual()
22321 if (VT.getSizeInBits() < 128) { in LowerVectorAllEqual()
22322 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); in LowerVectorAllEqual()
22343 // Without PTEST, a masked v2i64 or-reduction is not faster than in LowerVectorAllEqual()
22350 // Split down to 128/256/512-bit vector. in LowerVectorAllEqual()
22358 VT = EVT::getVectorVT(*DAG.getContext(), MVT::i64, VT.getSizeInBits() / 64); in LowerVectorAllEqual()
22359 LHS = DAG.getBitcast(VT, LHS); in LowerVectorAllEqual()
22360 RHS = DAG.getBitcast(VT, RHS); in LowerVectorAllEqual()
22364 if (VT.getSizeInBits() > TestSize) { in LowerVectorAllEqual()
22367 // If ICMP(AND(LHS,MASK),MASK) - reduce using AND splits. in LowerVectorAllEqual()
22368 while (VT.getSizeInBits() > TestSize) { in LowerVectorAllEqual()
22370 VT = Split.first.getValueType(); in LowerVectorAllEqual()
22371 LHS = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second); in LowerVectorAllEqual()
22373 RHS = DAG.getAllOnesConstant(DL, VT); in LowerVectorAllEqual()
22376 // ALLOF(CMPEQ(X,Y)) -> AND(CMPEQ(X[0],Y[0]),CMPEQ(X[1],Y[1]),....) in LowerVectorAllEqual()
22378 VT = MVT::getVectorVT(SVT, VT.getSizeInBits() / SVT.getSizeInBits()); in LowerVectorAllEqual()
22379 LHS = DAG.getBitcast(VT, MaskBits(LHS)); in LowerVectorAllEqual()
22380 RHS = DAG.getBitcast(VT, MaskBits(RHS)); in LowerVectorAllEqual()
22381 EVT BoolVT = VT.changeVectorElementType(MVT::i1); in LowerVectorAllEqual()
22383 V = DAG.getSExtOrTrunc(V, DL, VT); in LowerVectorAllEqual()
22384 while (VT.getSizeInBits() > TestSize) { in LowerVectorAllEqual()
22386 VT = Split.first.getValueType(); in LowerVectorAllEqual()
22387 V = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second); in LowerVectorAllEqual()
22389 V = DAG.getNOT(DL, V, VT); in LowerVectorAllEqual()
22395 SDValue V = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS); in LowerVectorAllEqual()
22396 while (VT.getSizeInBits() > TestSize) { in LowerVectorAllEqual()
22398 VT = Split.first.getValueType(); in LowerVectorAllEqual()
22399 V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); in LowerVectorAllEqual()
22402 RHS = DAG.getConstant(0, DL, VT); in LowerVectorAllEqual()
22406 if (UseKORTEST && VT.is512BitVector()) { in LowerVectorAllEqual()
22407 MVT TestVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); in LowerVectorAllEqual()
22416 MVT TestVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); in LowerVectorAllEqual()
22423 assert(VT.getSizeInBits() == 128 && "Failure to split to 128-bits"); in LowerVectorAllEqual()
22434 // Check whether an AND/OR'd reduction tree is PTEST-able, or if we can fallback
22449 if (!Subtarget.hasSSE2() || !Op->hasOneUse()) in MatchVectorAllEqualTest()
22452 // Check whether we're masking/truncating an OR-reduction result, in which in MatchVectorAllEqualTest()
22467 Mask = Cst->getAPIntValue(); in MatchVectorAllEqualTest()
22478 // Match icmp(and(extract(X,0),extract(X,1)),-1) allof reduction patterns. in MatchVectorAllEqualTest()
22481 EVT VT = VecIns[0].getValueType(); in MatchVectorAllEqualTest() local
22483 [VT](SDValue V) { return VT == V.getValueType(); }) && in MatchVectorAllEqualTest()
22486 // Quit if not splittable to scalar/128/256/512-bit vector. in MatchVectorAllEqualTest()
22487 if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits())) in MatchVectorAllEqualTest()
22492 for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; in MatchVectorAllEqualTest()
22498 VecIns.push_back(DAG.getNode(LogicOp, DL, VT, LHS, RHS)); in MatchVectorAllEqualTest()
22502 CmpNull ? DAG.getConstant(0, DL, VT) in MatchVectorAllEqualTest()
22503 : DAG.getAllOnesConstant(DL, VT), in MatchVectorAllEqualTest()
22508 // Match icmp(reduce_and(X),-1) allof reduction patterns. in MatchVectorAllEqualTest()
22528 // Match icmp(bitcast(icmp_eq(X,Y)),-1) reduction patterns. in MatchVectorAllEqualTest()
22533 ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get(); in MatchVectorAllEqualTest()
22542 // Match icmp(bitcast(vXi1 trunc(Y)),-1) reduction patterns. in MatchVectorAllEqualTest()
22564 for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; in hasNonFlagsUse()
22568 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { in hasNonFlagsUse()
22570 UOpNo = User->use_begin().getOperandNo(); in hasNonFlagsUse()
22571 User = *User->use_begin(); in hasNonFlagsUse()
22574 if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && in hasNonFlagsUse()
22575 !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) in hasNonFlagsUse()
22581 // Transform to an x86-specific ALU node with flags if there is a chance of
22585 for (SDNode *U : Op->uses()) in isProfitableToUseFlagOp()
22586 if (U->getOpcode() != ISD::CopyToReg && in isProfitableToUseFlagOp()
22587 U->getOpcode() != ISD::SETCC && in isProfitableToUseFlagOp()
22588 U->getOpcode() != ISD::STORE) in isProfitableToUseFlagOp()
22614 switch (Op->getOpcode()) { in EmitTest()
22619 if (Op.getNode()->getFlags().hasNoSignedWrap()) in EmitTest()
22644 // non-casted variable when we check for possible users. in EmitTest()
22660 // Otherwise use a regular EFLAGS-setting instruction. in EmitTest()
22662 // clang-format off in EmitTest()
22669 // clang-format on in EmitTest()
22684 return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), in EmitTest()
22685 Op->getOperand(1)).getValue(1); in EmitTest()
22697 SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands); in EmitTest()
22715 CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); in EmitCmp()
22726 // Don't do this if the immediate can fit in 8-bits. in EmitCmp()
22727 if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) || in EmitCmp()
22728 (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) { in EmitCmp()
22750 // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)? in EmitCmp()
22760 // 0-x == y --> x+y == 0 in EmitCmp()
22761 // 0-x != y --> x+y != 0 in EmitCmp()
22769 // x == 0-y --> x+y == 0 in EmitCmp()
22770 // x != 0-y --> x+y != 0 in EmitCmp()
22785 EVT VT) const { in isXAndYEqZeroPreferableToXAndYEqY()
22786 return !VT.isVector() || Cond != ISD::CondCode::SETEQ; in isXAndYEqZeroPreferableToXAndYEqY()
22791 if (N->getOpcode() == ISD::FDIV) in optimizeFMulOrFDivAsShiftAddBitcast()
22794 EVT FPVT = N->getValueType(0); in optimizeFMulOrFDivAsShiftAddBitcast()
22797 // This indicates a non-free bitcast. in optimizeFMulOrFDivAsShiftAddBitcast()
22799 // integer vector anyways for the int->fp cast. in optimizeFMulOrFDivAsShiftAddBitcast()
22809 EVT VT = Op.getValueType(); in isFsqrtCheap() local
22812 if (VT.getScalarType() == MVT::f16) in isFsqrtCheap()
22816 if (DAG.doesNodeExist(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) in isFsqrtCheap()
22819 if (VT.isVector()) in isFsqrtCheap()
22824 /// The minimum architected relative accuracy is 2^-12. We need one
22825 /// Newton-Raphson step to have a good float result (24 bits of precision).
22832 EVT VT = Op.getValueType(); in getSqrtEstimate() local
22834 // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. in getSqrtEstimate()
22835 // It is likely not profitable to do this for f64 because a double-precision in getSqrtEstimate()
22842 if ((VT == MVT::f32 && Subtarget.hasSSE1()) || in getSqrtEstimate()
22843 (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) || in getSqrtEstimate()
22844 (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) || in getSqrtEstimate()
22845 (VT == MVT::v8f32 && Subtarget.hasAVX()) || in getSqrtEstimate()
22846 (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { in getSqrtEstimate()
22851 // There is no FSQRT for 512-bits, but there is RSQRT14. in getSqrtEstimate()
22852 unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; in getSqrtEstimate()
22853 SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op); in getSqrtEstimate()
22855 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate); in getSqrtEstimate()
22859 if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) && in getSqrtEstimate()
22865 if (VT == MVT::f16) { in getSqrtEstimate()
22873 return DAG.getNode(X86ISD::RSQRT14, DL, VT, Op); in getSqrtEstimate()
22878 /// The minimum architected relative accuracy is 2^-12. We need one
22879 /// Newton-Raphson step to have a good float result (24 bits of precision).
22884 EVT VT = Op.getValueType(); in getRecipEstimate() local
22886 // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. in getRecipEstimate()
22887 // It is likely not profitable to do this for f64 because a double-precision in getRecipEstimate()
22893 if ((VT == MVT::f32 && Subtarget.hasSSE1()) || in getRecipEstimate()
22894 (VT == MVT::v4f32 && Subtarget.hasSSE1()) || in getRecipEstimate()
22895 (VT == MVT::v8f32 && Subtarget.hasAVX()) || in getRecipEstimate()
22896 (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { in getRecipEstimate()
22899 // real-world code. These defaults are intended to match GCC behavior. in getRecipEstimate()
22900 if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) in getRecipEstimate()
22906 // There is no FSQRT for 512-bits, but there is RCP14. in getRecipEstimate()
22907 unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP; in getRecipEstimate()
22908 return DAG.getNode(Opcode, DL, VT, Op); in getRecipEstimate()
22911 if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) && in getRecipEstimate()
22916 if (VT == MVT::f16) { in getRecipEstimate()
22924 return DAG.getNode(X86ISD::RCP14, DL, VT, Op); in getRecipEstimate()
22944 if (isIntDivCheap(N->getValueType(0), Attr)) in BuildSDIVPow2()
22956 EVT VT = N->getValueType(0); in BuildSDIVPow2() local
22958 if (VT != MVT::i16 && VT != MVT::i32 && in BuildSDIVPow2()
22959 !(Subtarget.is64Bit() && VT == MVT::i64)) in BuildSDIVPow2()
22962 // If the divisor is 2 or -2, the default expansion is better. in BuildSDIVPow2()
22964 Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true)) in BuildSDIVPow2()
22993 if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) in LowerAndToBT()
23001 uint64_t AndRHSVal = AndRHS->getZExtValue(); in LowerAndToBT()
23039 // Check if pre-AVX condcode can be performed by a single FCMP op.
23044 /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
23052 // 0 - EQ in translateX86FSETCC()
23053 // 1 - LT in translateX86FSETCC()
23054 // 2 - LE in translateX86FSETCC()
23055 // 3 - UNORD in translateX86FSETCC()
23056 // 4 - NEQ in translateX86FSETCC()
23057 // 5 - NLT in translateX86FSETCC()
23058 // 6 - NLE in translateX86FSETCC()
23059 // 7 - ORD in translateX86FSETCC()
23061 // clang-format off in translateX86FSETCC()
23083 // clang-format on in translateX86FSETCC()
23107 /// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
23109 static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS, in splitIntVSETCC() argument
23112 assert(VT.isInteger() && VT == LHS.getValueType() && in splitIntVSETCC()
23113 VT == RHS.getValueType() && "Unsupported VTs!"); in splitIntVSETCC()
23127 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in splitIntVSETCC()
23128 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, in splitIntVSETCC()
23138 MVT VT = Op.getSimpleValueType(); in LowerIntVSETCC_AVX512() local
23139 assert(VT.getVectorElementType() == MVT::i1 && in LowerIntVSETCC_AVX512()
23142 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); in LowerIntVSETCC_AVX512()
23150 return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode); in LowerIntVSETCC_AVX512()
23163 MVT VT = V.getSimpleValueType(); in incDecVectorConstant() local
23164 MVT EltVT = VT.getVectorElementType(); in incDecVectorConstant()
23165 unsigned NumElts = VT.getVectorNumElements(); in incDecVectorConstant()
23169 auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i)); in incDecVectorConstant()
23170 if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) in incDecVectorConstant()
23174 const APInt &EltC = Elt->getAPIntValue(); in incDecVectorConstant()
23181 NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT)); in incDecVectorConstant()
23184 return DAG.getBuildVector(VT, DL, NewVecC); in incDecVectorConstant()
23191 static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, in LowerVSETCCWithSUBUS() argument
23198 MVT VET = VT.getVectorElementType(); in LowerVSETCCWithSUBUS()
23210 // Only do this pre-AVX since vpcmp* is no longer destructive. in LowerVSETCCWithSUBUS()
23224 // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0 in LowerVSETCCWithSUBUS()
23233 // Psubus is better than flip-sign because it requires no inversion. in LowerVSETCCWithSUBUS()
23241 SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); in LowerVSETCCWithSUBUS()
23242 return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, in LowerVSETCCWithSUBUS()
23243 DAG.getConstant(0, dl, VT)); in LowerVSETCCWithSUBUS()
23253 MVT VT = Op->getSimpleValueType(0); in LowerVSETCC() local
23254 ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); in LowerVSETCC()
23269 // compare like we do for non-strict, we might trigger spurious exceptions in LowerVSETCC()
23272 if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 && in LowerVSETCC()
23276 unsigned Num = VT.getVectorNumElements(); in LowerVSETCC()
23283 // floating-point vector result that matches the operand type. This allows in LowerVSETCC()
23285 VT = Op0.getSimpleValueType(); in LowerVSETCC()
23305 Opc, dl, {VT, MVT::Other}, in LowerVSETCC()
23311 SignalCmp->setFlags(Op->getFlags()); in LowerVSETCC()
23335 Opc, dl, {VT, MVT::Other}, in LowerVSETCC()
23338 Opc, dl, {VT, MVT::Other}, in LowerVSETCC()
23344 Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)); in LowerVSETCC()
23346 Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)); in LowerVSETCC()
23348 Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); in LowerVSETCC()
23352 Opc, dl, {VT, MVT::Other}, in LowerVSETCC()
23357 Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); in LowerVSETCC()
23365 Opc, dl, {VT, MVT::Other}, in LowerVSETCC()
23370 Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); in LowerVSETCC()
23373 if (VT.getFixedSizeInBits() > in LowerVSETCC()
23377 EVT CastVT = EVT(VT).changeVectorElementTypeToInteger(); in LowerVSETCC()
23399 assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && in LowerVSETCC()
23402 // The non-AVX512 code below works under the assumption that source and in LowerVSETCC()
23404 assert((Subtarget.hasAVX512() || (VT == VTOp0)) && in LowerVSETCC()
23408 if (VT.getVectorElementType() == MVT::i1) { in LowerVSETCC()
23409 // In AVX-512 architecture setcc returns mask with i1 elements, in LowerVSETCC()
23417 if (VT.is128BitVector() && Subtarget.hasXOP()) { in LowerVSETCC()
23421 // clang-format off in LowerVSETCC()
23433 // clang-format on in LowerVSETCC()
23440 return DAG.getNode(Opc, dl, VT, Op0, Op1, in LowerVSETCC()
23444 // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. in LowerVSETCC()
23452 BC0.getOperand(1), VT.getScalarSizeInBits(), UndefElts, EltBits, in LowerVSETCC()
23456 Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); in LowerVSETCC()
23462 // ICMP_EQ(AND(X,C),C) -> SRA(SHL(X,LOG2(C)),BW-1) iff C is power-of-2. in LowerVSETCC()
23466 if (C1 && C1->getAPIntValue().isPowerOf2()) { in LowerVSETCC()
23467 unsigned BitWidth = VT.getScalarSizeInBits(); in LowerVSETCC()
23468 unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1; in LowerVSETCC()
23471 Result = DAG.getNode(ISD::SHL, dl, VT, Result, in LowerVSETCC()
23472 DAG.getConstant(ShiftAmt, dl, VT)); in LowerVSETCC()
23473 Result = DAG.getNode(ISD::SRA, dl, VT, Result, in LowerVSETCC()
23474 DAG.getConstant(BitWidth - 1, dl, VT)); in LowerVSETCC()
23479 // Break 256-bit integer vector compare into smaller ones. in LowerVSETCC()
23480 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerVSETCC()
23481 return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); in LowerVSETCC()
23483 // Break 512-bit integer vector compare into smaller ones. in LowerVSETCC()
23485 if (VT.is512BitVector()) in LowerVSETCC()
23486 return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); in LowerVSETCC()
23489 // not-of-PCMPEQ: in LowerVSETCC()
23490 // X != INT_MIN --> X >s INT_MIN in LowerVSETCC()
23491 // X != INT_MAX --> X <s INT_MAX --> INT_MAX >s X in LowerVSETCC()
23492 // +X != 0 --> +X >s 0 in LowerVSETCC()
23504 // If both operands are known non-negative, then an unsigned compare is the in LowerVSETCC()
23515 TLI.isOperationLegal(ISD::UMIN, VT)) { in LowerVSETCC()
23519 // X > C --> X >= (C+1) --> X == umax(X, C+1) in LowerVSETCC()
23527 // X < C --> X <= (C-1) --> X == umin(X, C-1) in LowerVSETCC()
23537 // clang-format off in LowerVSETCC()
23543 // clang-format on in LowerVSETCC()
23546 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); in LowerVSETCC()
23547 Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); in LowerVSETCC()
23549 // If the logical-not of the result is required, perform that now. in LowerVSETCC()
23551 Result = DAG.getNOT(dl, Result, VT); in LowerVSETCC()
23559 LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) in LowerVSETCC()
23577 if (VT == MVT::v2i64) { in LowerVSETCC()
23591 return DAG.getBitcast(VT, Result); in LowerVSETCC()
23596 Op1 = DAG.getConstant(-1, dl, MVT::v4i32); in LowerVSETCC()
23602 return DAG.getBitcast(VT, Result); in LowerVSETCC()
23605 // If the i64 elements are sign-extended enough to be representable as i32 in LowerVSETCC()
23616 return DAG.getBitcast(VT, Result); in LowerVSETCC()
23637 // Create masks for only the low parts/high parts of the 64 bit integers. in LowerVSETCC()
23650 return DAG.getBitcast(VT, Result); in LowerVSETCC()
23665 // Make sure the lower and upper halves are both all-ones. in LowerVSETCC()
23673 return DAG.getBitcast(VT, Result); in LowerVSETCC()
23680 MVT EltVT = VT.getVectorElementType(); in LowerVSETCC()
23682 VT); in LowerVSETCC()
23683 Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); in LowerVSETCC()
23684 Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); in LowerVSETCC()
23687 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); in LowerVSETCC()
23689 // If the logical-not of the result is required, perform that now. in LowerVSETCC()
23691 Result = DAG.getNOT(dl, Result, VT); in LowerVSETCC()
23708 MVT VT = Op0.getSimpleValueType(); in EmitAVX512Test() local
23709 if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && in EmitAVX512Test()
23710 !(Subtarget.hasDQI() && VT == MVT::v8i1) && in EmitAVX512Test()
23711 !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) in EmitAVX512Test()
23725 if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) in EmitAVX512Test()
23727 if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) in EmitAVX512Test()
23771 // Try to use PTEST/PMOVMSKB for a tree AND/ORs equality compared with -1/0. in emitFlagsForSetcc()
23804 EVT VT = Op0.getValueType(); in emitFlagsForSetcc() local
23805 if (VT == MVT::i32 || VT == MVT::i64 || Op0->hasOneUse()) { in emitFlagsForSetcc()
23806 SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32); in emitFlagsForSetcc()
23810 DAG.getConstant(0, dl, VT), Op0); in emitFlagsForSetcc()
23816 // (seteq (add X, -1), -1). Similar for setne. in emitFlagsForSetcc()
23845 MVT VT = Op->getSimpleValueType(0); in LowerSETCC() local
23847 if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); in LowerSETCC()
23849 assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); in LowerSETCC()
23855 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); in LowerSETCC()
23877 // Attempt to canonicalize SGT/UGT -> SGE/UGE compares with constant which in LowerSETCC()
23880 // equivalent for SLE/ULE -> SLT/ULT isn't likely to happen as we already in LowerSETCC()
23883 // encoding size - so it must either already be a i8 or i32 immediate, or it in LowerSETCC()
23888 const APInt &Op1Val = Op1C->getAPIntValue(); in LowerSETCC()
23939 X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get()); in LowerSETCCCARRY()
24012 assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!"); in LowerXALUO()
24013 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC); in LowerXALUO()
24038 return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); in isTruncWithZeroHighBitsInput()
24047 MVT VT = Op1.getSimpleValueType(); in LowerSELECT() local
24050 if (isSoftF16(VT, Subtarget)) { in LowerSELECT()
24051 MVT NVT = VT.changeTypeToInteger(); in LowerSELECT()
24052 return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond, in LowerSELECT()
24060 if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) && in LowerSELECT()
24061 VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { in LowerSELECT()
24065 translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(), in LowerSELECT()
24072 assert(!VT.isVector() && "Not a scalar type?"); in LowerSELECT()
24073 return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); in LowerSELECT()
24077 SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, in LowerSELECT()
24096 MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; in LowerSELECT()
24101 MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; in LowerSELECT()
24106 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, in LowerSELECT()
24109 SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); in LowerSELECT()
24110 SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); in LowerSELECT()
24111 return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); in LowerSELECT()
24116 if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) { in LowerSELECT()
24118 return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); in LowerSELECT()
24133 // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y in LowerSELECT()
24134 // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y in LowerSELECT()
24135 // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y in LowerSELECT()
24136 // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y in LowerSELECT()
24137 // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y in LowerSELECT()
24138 // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y in LowerSELECT()
24139 // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x in LowerSELECT()
24140 // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x in LowerSELECT()
24148 // Special handling for __builtin_ffs(X) - 1 pattern which looks like in LowerSELECT()
24149 // (select (seteq X, 0), -1, (cttz_zero_undef X)). Disable the special in LowerSELECT()
24157 if (Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64) && in LowerSELECT()
24166 // 'X - 1' sets the carry flag if X == 0. in LowerSELECT()
24167 // '0 - X' sets the carry flag if X != 0. in LowerSELECT()
24168 // Convert the carry flag to a -1/0 mask with sbb: in LowerSELECT()
24169 // select (X != 0), -1, Y --> 0 - X; or (sbb), Y in LowerSELECT()
24170 // select (X == 0), Y, -1 --> 0 - X; or (sbb), Y in LowerSELECT()
24171 // select (X != 0), Y, -1 --> X - 1; or (sbb), Y in LowerSELECT()
24172 // select (X == 0), -1, Y --> X - 1; or (sbb), Y in LowerSELECT()
24181 SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in LowerSELECT()
24184 return DAG.getNode(ISD::OR, DL, VT, SBB, Y); in LowerSELECT()
24208 if (CmpSz > VT.getSizeInBits()) in LowerSELECT()
24209 Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); in LowerSELECT()
24210 else if (CmpSz < VT.getSizeInBits()) in LowerSELECT()
24211 Neg = DAG.getNode(ISD::AND, DL, VT, in LowerSELECT()
24212 DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), in LowerSELECT()
24213 DAG.getConstant(1, DL, VT)); in LowerSELECT()
24216 SDValue Mask = DAG.getNegative(Neg, DL, VT); // -(and (x, 0x1)) in LowerSELECT()
24217 SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z in LowerSELECT()
24218 return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y in LowerSELECT()
24220 } else if ((VT == MVT::i32 || VT == MVT::i64) && isNullConstant(Op2) && in LowerSELECT()
24221 Cmp.getNode()->hasOneUse() && (CmpOp0 == Op1) && in LowerSELECT()
24224 // (select (x < 0), x, 0) -> ((x >> (size_in_bits(x)-1))) & x in LowerSELECT()
24229 // (select (x > 0), x, 0) -> (~(x >> (size_in_bits(x)-1))) & x in LowerSELECT()
24230 unsigned ShCt = VT.getSizeInBits() - 1; in LowerSELECT()
24231 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, VT); in LowerSELECT()
24232 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, Op1, ShiftAmt); in LowerSELECT()
24234 Shift = DAG.getNOT(DL, Shift, VT); in LowerSELECT()
24235 return DAG.getNode(ISD::AND, DL, VT, Shift, Op1); in LowerSELECT()
24254 if (VT.isFloatingPoint() && !VT.isVector() && in LowerSELECT()
24255 !isScalarFPTypeInSSEReg(VT) && Subtarget.canUseCMOV()) // FPStack? in LowerSELECT()
24256 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); in LowerSELECT()
24296 // a < b ? -1 : 0 -> RES = ~setcc_carry in LowerSELECT()
24297 // a < b ? 0 : -1 -> RES = setcc_carry in LowerSELECT()
24298 // a >= b ? -1 : 0 -> RES = setcc_carry in LowerSELECT()
24299 // a >= b ? 0 : -1 -> RES = ~setcc_carry in LowerSELECT()
24301 unsigned CondCode = CC->getAsZExtVal(); in LowerSELECT()
24349 return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags()); in LowerSELECT()
24355 MVT VT = Op->getSimpleValueType(0); in LowerSIGN_EXTEND_Mask() local
24356 SDValue In = Op->getOperand(0); in LowerSIGN_EXTEND_Mask()
24359 MVT VTElt = VT.getVectorElementType(); in LowerSIGN_EXTEND_Mask()
24360 unsigned NumElts = VT.getVectorNumElements(); in LowerSIGN_EXTEND_Mask()
24362 // Extend VT if the scalar type is i8/i16 and BWI is not supported. in LowerSIGN_EXTEND_Mask()
24363 MVT ExtVT = VT; in LowerSIGN_EXTEND_Mask()
24367 return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG); in LowerSIGN_EXTEND_Mask()
24372 // Widen to 512-bits if VLX is not supported. in LowerSIGN_EXTEND_Mask()
24388 SDValue NegOne = DAG.getConstant(-1, dl, WideVT); in LowerSIGN_EXTEND_Mask()
24394 if (VT != ExtVT) { in LowerSIGN_EXTEND_Mask()
24399 // Extract back to 128/256-bit if we widened. in LowerSIGN_EXTEND_Mask()
24400 if (WideVT != VT) in LowerSIGN_EXTEND_Mask()
24401 V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, in LowerSIGN_EXTEND_Mask()
24409 SDValue In = Op->getOperand(0); in LowerANY_EXTEND()
24422 // non-SSE4.1 targets. For zero extend this should only handle inputs of
24427 SDValue In = Op->getOperand(0); in LowerEXTEND_VECTOR_INREG()
24428 MVT VT = Op->getSimpleValueType(0); in LowerEXTEND_VECTOR_INREG() local
24431 MVT SVT = VT.getVectorElementType(); in LowerEXTEND_VECTOR_INREG()
24439 if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && in LowerEXTEND_VECTOR_INREG()
24440 !(VT.is256BitVector() && Subtarget.hasAVX()) && in LowerEXTEND_VECTOR_INREG()
24441 !(VT.is512BitVector() && Subtarget.hasAVX512())) in LowerEXTEND_VECTOR_INREG()
24446 unsigned NumElts = VT.getVectorNumElements(); in LowerEXTEND_VECTOR_INREG()
24448 // For 256-bit vectors, we only need the lower (128-bit) half of the input. in LowerEXTEND_VECTOR_INREG()
24449 // For 512-bit vectors, we need 128-bits or 256-bits. in LowerEXTEND_VECTOR_INREG()
24452 // at least 128-bits. in LowerEXTEND_VECTOR_INREG()
24458 // SSE41 targets can use the pmov[sz]x* instructions directly for 128-bit results, in LowerEXTEND_VECTOR_INREG()
24460 // need to be handled here for 256/512-bit results. in LowerEXTEND_VECTOR_INREG()
24462 assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); in LowerEXTEND_VECTOR_INREG()
24465 return DAG.getNode(Op.getOpcode(), dl, VT, In); in LowerEXTEND_VECTOR_INREG()
24472 return DAG.getNode(ExtOpc, dl, VT, In); in LowerEXTEND_VECTOR_INREG()
24475 // pre-AVX2 256-bit extensions need to be split into 128-bit instructions. in LowerEXTEND_VECTOR_INREG()
24477 assert(VT.is256BitVector() && "256-bit vector expected"); in LowerEXTEND_VECTOR_INREG()
24478 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in LowerEXTEND_VECTOR_INREG()
24489 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in LowerEXTEND_VECTOR_INREG()
24494 assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs"); in LowerEXTEND_VECTOR_INREG()
24497 // If the source elements are already all-signbits, we don't need to extend, in LowerEXTEND_VECTOR_INREG()
24505 return DAG.getBitcast(VT, in LowerEXTEND_VECTOR_INREG()
24509 // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI. in LowerEXTEND_VECTOR_INREG()
24516 MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT; in LowerEXTEND_VECTOR_INREG()
24526 Mask[i * Scale + (Scale - 1)] = i; in LowerEXTEND_VECTOR_INREG()
24531 unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); in LowerEXTEND_VECTOR_INREG()
24536 if (VT == MVT::v2i64) { in LowerEXTEND_VECTOR_INREG()
24537 assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT"); in LowerEXTEND_VECTOR_INREG()
24541 SignExt = DAG.getBitcast(VT, SignExt); in LowerEXTEND_VECTOR_INREG()
24549 MVT VT = Op->getSimpleValueType(0); in LowerSIGN_EXTEND() local
24550 SDValue In = Op->getOperand(0); in LowerSIGN_EXTEND()
24557 assert(VT.isVector() && InVT.isVector() && "Expected vector type"); in LowerSIGN_EXTEND()
24558 assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && in LowerSIGN_EXTEND()
24560 assert((VT.getVectorElementType() == MVT::i16 || in LowerSIGN_EXTEND()
24561 VT.getVectorElementType() == MVT::i32 || in LowerSIGN_EXTEND()
24562 VT.getVectorElementType() == MVT::i64) && in LowerSIGN_EXTEND()
24569 if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { in LowerSIGN_EXTEND()
24570 assert(InVT == MVT::v32i8 && "Unexpected VT!"); in LowerSIGN_EXTEND()
24582 // for v4i32 the high shuffle mask will be {2, 3, -1, -1} in LowerSIGN_EXTEND()
24583 // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 in LowerSIGN_EXTEND()
24584 // concat the vectors to original VT in LowerSIGN_EXTEND()
24585 MVT HalfVT = VT.getHalfNumVectorElementsVT(); in LowerSIGN_EXTEND()
24589 SmallVector<int,8> ShufMask(NumElems, -1); in LowerSIGN_EXTEND()
24596 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); in LowerSIGN_EXTEND()
24599 /// Change a vector store into a pair of half-size vector stores.
24601 SDValue StoredVal = Store->getValue(); in splitVectorStore()
24604 "Expecting 256/512-bit op"); in splitVectorStore()
24611 if (!Store->isSimple()) in splitVectorStore()
24618 SDValue Ptr0 = Store->getBasePtr(); in splitVectorStore()
24622 DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(), in splitVectorStore()
24623 Store->getOriginalAlign(), in splitVectorStore()
24624 Store->getMemOperand()->getFlags()); in splitVectorStore()
24625 SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1, in splitVectorStore()
24626 Store->getPointerInfo().getWithOffset(HalfOffset), in splitVectorStore()
24627 Store->getOriginalAlign(), in splitVectorStore()
24628 Store->getMemOperand()->getFlags()); in splitVectorStore()
24636 SDValue StoredVal = Store->getValue(); in scalarizeVectorStore()
24638 StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op"); in scalarizeVectorStore()
24644 if (!Store->isSimple()) in scalarizeVectorStore()
24655 SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), in scalarizeVectorStore()
24659 SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr, in scalarizeVectorStore()
24660 Store->getPointerInfo().getWithOffset(Offset), in scalarizeVectorStore()
24661 Store->getOriginalAlign(), in scalarizeVectorStore()
24662 Store->getMemOperand()->getFlags()); in scalarizeVectorStore()
24672 SDValue StoredVal = St->getValue(); in LowerStore()
24678 assert(NumElts <= 8 && "Unexpected VT"); in LowerStore()
24679 assert(!St->isTruncatingStore() && "Expected non-truncating store"); in LowerStore()
24694 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), in LowerStore()
24695 St->getPointerInfo(), St->getOriginalAlign(), in LowerStore()
24696 St->getMemOperand()->getFlags()); in LowerStore()
24699 if (St->isTruncatingStore()) in LowerStore()
24702 // If this is a 256-bit store of concatenated ops, we are better off splitting in LowerStore()
24703 // that store into two 128-bit stores. This avoids spurious use of 256-bit ops in LowerStore()
24719 assert(StoreVT.is64BitVector() && "Unexpected VT"); in LowerStore()
24729 // Widen the vector, cast to a v2x64 type, extract the single 64-bit element in LowerStore()
24737 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), in LowerStore()
24738 St->getPointerInfo(), St->getOriginalAlign(), in LowerStore()
24739 St->getMemOperand()->getFlags()); in LowerStore()
24743 SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; in LowerStore()
24745 St->getMemOperand()); in LowerStore()
24767 assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); in LowerLoad()
24768 assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); in LowerLoad()
24772 SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), in LowerLoad()
24773 Ld->getPointerInfo(), Ld->getOriginalAlign(), in LowerLoad()
24774 Ld->getMemOperand()->getFlags()); in LowerLoad()
24777 assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); in LowerLoad()
24813 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in LowerBRCOND()
24844 // have a fall-through edge, because this requires an explicit in LowerBRCOND()
24846 if (Op.getNode()->hasOneUse()) { in LowerBRCOND()
24847 SDNode *User = *Op.getNode()->use_begin(); in LowerBRCOND()
24851 if (User->getOpcode() == ISD::BR) { in LowerBRCOND()
24852 SDValue FalseBB = User->getOperand(1); in LowerBRCOND()
24854 DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); in LowerBRCOND()
24940 EVT VT = Node->getValueType(0); in LowerDYNAMIC_STACKALLOC() local
24967 SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); in LowerDYNAMIC_STACKALLOC()
24969 Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value in LowerDYNAMIC_STACKALLOC()
24973 DAG.getNode(ISD::AND, dl, VT, Result, in LowerDYNAMIC_STACKALLOC()
24974 DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); in LowerDYNAMIC_STACKALLOC()
24998 MF.getInfo<X86MachineFunctionInfo>()->setHasDynAlloca(true); in LowerDYNAMIC_STACKALLOC()
25001 Register SPReg = RegInfo->getStackRegister(); in LowerDYNAMIC_STACKALLOC()
25006 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), in LowerDYNAMIC_STACKALLOC()
25007 DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); in LowerDYNAMIC_STACKALLOC()
25025 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVASTART()
25032 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerVASTART()
25038 // gp_offset (0 - 6 * 8) in LowerVASTART()
25039 // fp_offset (48 - 48 + 8 * 16) in LowerVASTART()
25047 DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, in LowerVASTART()
25055 DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, in LowerVASTART()
25061 SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerVASTART()
25069 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); in LowerVASTART()
25079 "LowerVAARG only handles 64-bit va_arg!"); in LowerVAARG()
25089 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVAARG()
25093 EVT ArgVT = Op.getNode()->getValueType(0); in LowerVAARG()
25137 // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows, in LowerVACOPY()
25139 assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); in LowerVACOPY()
25148 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); in LowerVACOPY()
25149 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); in LowerVACOPY()
25181 static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, in getTargetVShiftByConstNode() argument
25184 MVT ElementType = VT.getVectorElementType(); in getTargetVShiftByConstNode()
25188 if (VT != SrcOp.getSimpleValueType()) in getTargetVShiftByConstNode()
25189 SrcOp = DAG.getBitcast(VT, SrcOp); in getTargetVShiftByConstNode()
25198 ShiftAmt = ElementType.getSizeInBits() - 1; in getTargetVShiftByConstNode()
25200 return DAG.getConstant(0, dl, VT); in getTargetVShiftByConstNode()
25204 && "Unknown target vector shift-by-constant node"); in getTargetVShiftByConstNode()
25223 SDValue Amt = DAG.getConstant(ShiftAmt, dl, VT); in getTargetVShiftByConstNode()
25224 if (SDValue C = DAG.FoldConstantArithmetic(ShiftOpc, dl, VT, {SrcOp, Amt})) in getTargetVShiftByConstNode()
25228 return DAG.getNode(Opc, dl, VT, SrcOp, in getTargetVShiftByConstNode()
25233 static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, in getTargetVShiftNode() argument
25244 SmallVector<int> Mask(AmtVT.getVectorNumElements(), -1); in getTargetVShiftNode()
25249 // Peek through any zext node if we can get back to a 128-bit source. in getTargetVShiftNode()
25260 // The shift uses the entire lower 64-bits of the amount vector, so no need to in getTargetVShiftNode()
25266 // If the shift amount has come from a scalar, then zero-extend the scalar in getTargetVShiftNode()
25275 // then we can zero-extend it by setting all the other mask elements to in getTargetVShiftNode()
25290 // Extract if the shift amount vector is larger than 128-bits. in getTargetVShiftNode()
25296 // Zero-extend bottom element to v2i64 vector type, either by extension or in getTargetVShiftNode()
25307 (128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); in getTargetVShiftNode()
25316 // Change opcode to non-immediate version. in getTargetVShiftNode()
25319 // The return type has to be a 128-bit type with the same element in getTargetVShiftNode()
25321 MVT EltVT = VT.getVectorElementType(); in getTargetVShiftNode()
25325 return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); in getTargetVShiftNode()
25353 // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements in getMaskNode()
25368 MVT VT = Op.getSimpleValueType(); in getVectorMaskingNode() local
25369 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); in getVectorMaskingNode()
25379 PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); in getVectorMaskingNode()
25380 return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); in getVectorMaskingNode()
25396 if (MaskConst->getZExtValue() & 0x1) in getScalarMaskingNode()
25399 MVT VT = Op.getSimpleValueType(); in getScalarMaskingNode() local
25409 return DAG.getNode(ISD::AND, dl, VT, Op, IMask); in getScalarMaskingNode()
25412 PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); in getScalarMaskingNode()
25413 return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc); in getScalarMaskingNode()
25417 if (!Fn->hasPersonalityFn()) in getSEHRegistrationNodeSize()
25420 // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See in getSEHRegistrationNodeSize()
25422 switch (classifyEHPersonality(Fn->getPersonalityFn())) { in getSEHRegistrationNodeSize()
25428 "can only recover FP for 32-bit MSVC EH personality functions"); in getSEHRegistrationNodeSize()
25435 /// RegNodeBase = EntryEBP - RegNodeSize
25436 /// ParentFP = RegNodeBase - ParentFrameOffset
25450 if (!Fn->hasPersonalityFn()) in recoverFramePointer()
25456 GlobalValue::dropLLVMManglingEscape(Fn->getName())); in recoverFramePointer()
25468 // RegNodeBase = EntryEBP - RegNodeSize in recoverFramePointer()
25469 // ParentFP = RegNodeBase - ParentFrameOffset in recoverFramePointer()
25480 return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; in LowerINTRINSIC_WO_CHAIN()
25486 unsigned RC = C->getZExtValue(); in LowerINTRINSIC_WO_CHAIN()
25500 RC = C->getZExtValue(); in LowerINTRINSIC_WO_CHAIN()
25516 MVT VT = Op.getSimpleValueType(); in LowerINTRINSIC_WO_CHAIN() local
25520 SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags()); in LowerINTRINSIC_WO_CHAIN()
25523 switch(IntrData->Type) { in LowerINTRINSIC_WO_CHAIN()
25526 // First, we check if the intrinsic may have non-default rounding mode, in LowerINTRINSIC_WO_CHAIN()
25527 // (IntrData->Opc1 != 0), then we check the rounding mode operand. in LowerINTRINSIC_WO_CHAIN()
25528 unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25539 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
25547 Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25549 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25559 // First, we check if the intrinsic may have non-default rounding mode, in LowerINTRINSIC_WO_CHAIN()
25560 // (IntrData->Opc1 != 0), then we check the rounding mode operand. in LowerINTRINSIC_WO_CHAIN()
25561 unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25573 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
25581 Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25583 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25596 if (IntrData->Type == INTR_TYPE_3OP_IMM8 && in LowerINTRINSIC_WO_CHAIN()
25598 Src3 = DAG.getTargetConstant(Src3->getAsZExtVal() & 0xff, dl, MVT::i8); in LowerINTRINSIC_WO_CHAIN()
25602 // First, we check if the intrinsic may have non-default rounding mode, in LowerINTRINSIC_WO_CHAIN()
25603 // (IntrData->Opc1 != 0), then we check the rounding mode operand. in LowerINTRINSIC_WO_CHAIN()
25604 unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25616 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
25620 assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); in LowerINTRINSIC_WO_CHAIN()
25623 Src4 = DAG.getTargetConstant(Src4->getAsZExtVal() & 0xff, dl, MVT::i8); in LowerINTRINSIC_WO_CHAIN()
25626 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
25635 // - RC Opcode is specified and in LowerINTRINSIC_WO_CHAIN()
25636 // - RC is not "current direction". in LowerINTRINSIC_WO_CHAIN()
25637 unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25650 DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, in LowerINTRINSIC_WO_CHAIN()
25661 Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25663 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25667 return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, in LowerINTRINSIC_WO_CHAIN()
25675 unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25677 // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands in LowerINTRINSIC_WO_CHAIN()
25678 // (2) With rounding mode and sae - 7 operands. in LowerINTRINSIC_WO_CHAIN()
25686 DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, in LowerINTRINSIC_WO_CHAIN()
25692 return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, in LowerINTRINSIC_WO_CHAIN()
25700 unsigned Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25708 return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, in LowerINTRINSIC_WO_CHAIN()
25722 NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); in LowerINTRINSIC_WO_CHAIN()
25724 NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, in LowerINTRINSIC_WO_CHAIN()
25739 Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25741 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25745 return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), in LowerINTRINSIC_WO_CHAIN()
25754 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25758 NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, in LowerINTRINSIC_WO_CHAIN()
25764 NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); in LowerINTRINSIC_WO_CHAIN()
25773 unsigned Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25774 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25777 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25782 return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), in LowerINTRINSIC_WO_CHAIN()
25794 Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25796 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25800 return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), in LowerINTRINSIC_WO_CHAIN()
25810 unsigned Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
25811 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25814 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25818 return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), in LowerINTRINSIC_WO_CHAIN()
25830 return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1); in LowerINTRINSIC_WO_CHAIN()
25837 return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); in LowerINTRINSIC_WO_CHAIN()
25845 MVT VT = Op.getSimpleValueType(); in LowerINTRINSIC_WO_CHAIN() local
25848 if (IntrData->Type == CFMA_OP_MASKZ) in LowerINTRINSIC_WO_CHAIN()
25849 PassThru = getZeroVector(VT, Subtarget, DAG, dl); in LowerINTRINSIC_WO_CHAIN()
25852 // - RC Opcode is specified and in LowerINTRINSIC_WO_CHAIN()
25853 // - RC is not "current direction". in LowerINTRINSIC_WO_CHAIN()
25855 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25859 NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3, in LowerINTRINSIC_WO_CHAIN()
25865 NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3); in LowerINTRINSIC_WO_CHAIN()
25871 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
25877 SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); in LowerINTRINSIC_WO_CHAIN()
25893 // First, we check if the intrinsic may have non-default rounding mode, in LowerINTRINSIC_WO_CHAIN()
25894 // (IntrData->Opc1 != 0), then we check the rounding mode operand. in LowerINTRINSIC_WO_CHAIN()
25895 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25898 return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), in LowerINTRINSIC_WO_CHAIN()
25904 return DAG.getNode(IntrData->Opc0, dl, MaskVT, in LowerINTRINSIC_WO_CHAIN()
25914 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
25917 Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae); in LowerINTRINSIC_WO_CHAIN()
25923 Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); in LowerINTRINSIC_WO_CHAIN()
25935 ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
25942 SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); in LowerINTRINSIC_WO_CHAIN()
26000 // Catch shift-by-constant. in LowerINTRINSIC_WO_CHAIN()
26002 return getTargetVShiftByConstNode(IntrData->Opc0, dl, in LowerINTRINSIC_WO_CHAIN()
26004 CShAmt->getZExtValue(), DAG); in LowerINTRINSIC_WO_CHAIN()
26007 return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), in LowerINTRINSIC_WO_CHAIN()
26019 PassThru = getZeroVector(VT, Subtarget, DAG, dl); in LowerINTRINSIC_WO_CHAIN()
26021 return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, in LowerINTRINSIC_WO_CHAIN()
26031 SDValue Passthru = (IntrData->Type == FIXUPIMM) in LowerINTRINSIC_WO_CHAIN()
26033 : getZeroVector(VT, Subtarget, DAG, dl); in LowerINTRINSIC_WO_CHAIN()
26035 unsigned Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
26036 if (IntrData->Opc1 != 0) { in LowerINTRINSIC_WO_CHAIN()
26039 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
26044 SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm); in LowerINTRINSIC_WO_CHAIN()
26052 assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); in LowerINTRINSIC_WO_CHAIN()
26057 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
26061 assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); in LowerINTRINSIC_WO_CHAIN()
26066 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
26070 assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode"); in LowerINTRINSIC_WO_CHAIN()
26075 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
26080 SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); in LowerINTRINSIC_WO_CHAIN()
26087 Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), in LowerINTRINSIC_WO_CHAIN()
26091 DAG.getConstant(-1, dl, MVT::i8)); in LowerINTRINSIC_WO_CHAIN()
26092 Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), in LowerINTRINSIC_WO_CHAIN()
26108 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); in LowerINTRINSIC_WO_CHAIN()
26113 return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), in LowerINTRINSIC_WO_CHAIN()
26123 unsigned Opc = IntrData->Opc0; in LowerINTRINSIC_WO_CHAIN()
26137 Opc = IntrData->Opc1; in LowerINTRINSIC_WO_CHAIN()
26149 return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); in LowerINTRINSIC_WO_CHAIN()
26155 return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, in LowerINTRINSIC_WO_CHAIN()
26311 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops())); in LowerINTRINSIC_WO_CHAIN()
26326 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops())); in LowerINTRINSIC_WO_CHAIN()
26339 SmallVector<SDValue, 5> NewOps(llvm::drop_begin(Op->ops())); in LowerINTRINSIC_WO_CHAIN()
26351 return DAG.getNode(getGlobalWrapperKind(nullptr, /*OpFlags=*/0), dl, VT, in LowerINTRINSIC_WO_CHAIN()
26359 auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); in LowerINTRINSIC_WO_CHAIN()
26361 GlobalValue::dropLLVMManglingEscape(Fn->getName())); in LowerINTRINSIC_WO_CHAIN()
26364 // supported on 32-bit Windows, which isn't PIC. in LowerINTRINSIC_WO_CHAIN()
26365 SDValue Result = DAG.getMCSymbol(LSDASym, VT); in LowerINTRINSIC_WO_CHAIN()
26366 return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); in LowerINTRINSIC_WO_CHAIN()
26373 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); in LowerINTRINSIC_WO_CHAIN()
26386 if (RegInfo->hasBasePointer(MF)) in LowerINTRINSIC_WO_CHAIN()
26387 Reg = RegInfo->getBaseRegister(); in LowerINTRINSIC_WO_CHAIN()
26389 bool CantUseFP = RegInfo->hasStackRealignment(MF); in LowerINTRINSIC_WO_CHAIN()
26391 Reg = RegInfo->getPtrSizedStackRegister(MF); in LowerINTRINSIC_WO_CHAIN()
26393 Reg = RegInfo->getPtrSizedFrameRegister(MF); in LowerINTRINSIC_WO_CHAIN()
26395 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); in LowerINTRINSIC_WO_CHAIN()
26410 Op->getOperand(1), Op->getOperand(2)); in LowerINTRINSIC_WO_CHAIN()
26431 // to 8-bits which may make it no longer out of bounds. in LowerINTRINSIC_WO_CHAIN()
26432 unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255); in LowerINTRINSIC_WO_CHAIN()
26483 // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). in LowerINTRINSIC_WO_CHAIN()
26505 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, in getAVX2GatherNode()
26523 MemIntr->getMemoryVT(), MemIntr->getMemOperand()); in getAVX2GatherNode()
26531 MVT VT = Op.getSimpleValueType(); in getGatherNode() local
26538 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, in getGatherNode()
26541 VT.getVectorNumElements()); in getGatherNode()
26561 MemIntr->getMemoryVT(), MemIntr->getMemOperand()); in getGatherNode()
26575 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, in getScatterNode()
26592 MemIntr->getMemoryVT(), MemIntr->getMemOperand()); in getScatterNode()
26606 SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, in getPrefetchNode()
26623 /// Returns a Glue value which can be used to add extra copy-from-reg if the
26632 SDValue Chain = N->getOperand(0); in expandIntrinsicWChainHelper()
26636 assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); in expandIntrinsicWChainHelper()
26637 Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue); in expandIntrinsicWChainHelper()
26662 // Merge the two 32-bit values into a 64-bit one. in expandIntrinsicWChainHelper()
26670 // Use a buildpair to merge the two 32-bit values into a 64-bit one. in expandIntrinsicWChainHelper()
26685 // The processor's time-stamp counter (a 64-bit MSR) is stored into the in getReadTimeStampCounter()
26686 // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR in getReadTimeStampCounter()
26687 // and the EAX register is loaded with the low-order 32 bits. in getReadTimeStampCounter()
26723 EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); in MarkEHRegistrationNode()
26741 EHInfo->EHGuardFrameIndex = FINode->getIndex(); in MarkEHGuard()
26774 // 64-bit targets support extended Swift async frame setup, in isExtendedSwiftAsyncFrameSupported()
26776 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); in isExtendedSwiftAsyncFrameSupported()
26792 X86FI->setHasSwiftAsyncContext(true); in LowerINTRINSIC_W_CHAIN()
26793 SDValue Chain = Op->getOperand(0); in LowerINTRINSIC_W_CHAIN()
26800 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, in LowerINTRINSIC_W_CHAIN()
26805 if (!X86FI->getSwiftAsyncContextFrameIdx()) in LowerINTRINSIC_W_CHAIN()
26806 X86FI->setSwiftAsyncContextFrameIdx( in LowerINTRINSIC_W_CHAIN()
26810 DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), in LowerINTRINSIC_W_CHAIN()
26813 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, in LowerINTRINSIC_W_CHAIN()
26814 Op->getOperand(0)); in LowerINTRINSIC_W_CHAIN()
26861 SDValue Chain = Op->getOperand(0); in LowerINTRINSIC_W_CHAIN()
26880 DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), in LowerINTRINSIC_W_CHAIN()
26881 Op->getOperand(3), Op->getOperand(4)); in LowerINTRINSIC_W_CHAIN()
26883 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, in LowerINTRINSIC_W_CHAIN()
26904 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, in LowerINTRINSIC_W_CHAIN()
26933 MachineMemOperand *MMO = MemIntr->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
26934 EVT MemVT = MemIntr->getMemoryVT(); in LowerINTRINSIC_W_CHAIN()
26940 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), in LowerINTRINSIC_W_CHAIN()
26971 MachineMemOperand *MMO = MemIntr->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
26972 EVT MemVT = MemIntr->getMemoryVT(); in LowerINTRINSIC_W_CHAIN()
26981 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), in LowerINTRINSIC_W_CHAIN()
26994 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, in LowerINTRINSIC_W_CHAIN()
27001 MVT VT = Op.getSimpleValueType(); in LowerINTRINSIC_W_CHAIN() local
27008 MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
27011 {Chain, Op1, Op2}, VT, MMO); in LowerINTRINSIC_W_CHAIN()
27013 Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); in LowerINTRINSIC_W_CHAIN()
27014 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); in LowerINTRINSIC_W_CHAIN()
27020 MVT VT = Op.getSimpleValueType(); in LowerINTRINSIC_W_CHAIN() local
27027 SDValue Size = DAG.getConstant(VT.getScalarSizeInBits(), DL, MVT::i32); in LowerINTRINSIC_W_CHAIN()
27028 MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
27031 {Chain, Op1, Op2, Size}, VT, MMO); in LowerINTRINSIC_W_CHAIN()
27033 Res = DAG.getZExtOrTrunc(getSETCC(X86::COND_B, Res, DL, DAG), DL, VT); in LowerINTRINSIC_W_CHAIN()
27034 unsigned Imm = Op2->getAsZExtVal(); in LowerINTRINSIC_W_CHAIN()
27036 Res = DAG.getNode(ISD::SHL, DL, VT, Res, in LowerINTRINSIC_W_CHAIN()
27037 DAG.getShiftAmountConstant(Imm, VT, DL)); in LowerINTRINSIC_W_CHAIN()
27038 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Res, Chain); in LowerINTRINSIC_W_CHAIN()
27048 MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
27050 X86ISD::CMPCCXADD, DL, Op->getVTList(), {Chain, Addr, Src1, Src2, CC}, in LowerINTRINSIC_W_CHAIN()
27066 MVT VT = Op2.getSimpleValueType(); in LowerINTRINSIC_W_CHAIN() local
27088 MachineMemOperand *MMO = cast<MemSDNode>(Op)->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
27089 return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), in LowerINTRINSIC_W_CHAIN()
27090 {Chain, Op1, Op2}, VT, MMO); in LowerINTRINSIC_W_CHAIN()
27102 MVT VT = Op2.getSimpleValueType(); in LowerINTRINSIC_W_CHAIN() local
27123 MachineMemOperand *MMO = cast<MemIntrinsicSDNode>(Op)->getMemOperand(); in LowerINTRINSIC_W_CHAIN()
27126 {Chain, Op1, Op2}, VT, MMO); in LowerINTRINSIC_W_CHAIN()
27135 switch(IntrData->Type) { in LowerINTRINSIC_W_CHAIN()
27140 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); in LowerINTRINSIC_W_CHAIN()
27141 SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); in LowerINTRINSIC_W_CHAIN()
27145 SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), in LowerINTRINSIC_W_CHAIN()
27146 DAG.getConstant(1, dl, Op->getValueType(1)), in LowerINTRINSIC_W_CHAIN()
27149 SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); in LowerINTRINSIC_W_CHAIN()
27152 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, in LowerINTRINSIC_W_CHAIN()
27162 return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, in LowerINTRINSIC_W_CHAIN()
27184 return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, in LowerINTRINSIC_W_CHAIN()
27191 unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); in LowerINTRINSIC_W_CHAIN()
27203 getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, in LowerINTRINSIC_W_CHAIN()
27219 expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, in LowerINTRINSIC_W_CHAIN()
27225 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); in LowerINTRINSIC_W_CHAIN()
27226 SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); in LowerINTRINSIC_W_CHAIN()
27229 SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); in LowerINTRINSIC_W_CHAIN()
27230 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), in LowerINTRINSIC_W_CHAIN()
27244 EVT MemVT = MemIntr->getMemoryVT(); in LowerINTRINSIC_W_CHAIN()
27246 uint16_t TruncationOp = IntrData->Opc0; in LowerINTRINSIC_W_CHAIN()
27251 MemIntr->getMemOperand()); in LowerINTRINSIC_W_CHAIN()
27258 MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, in LowerINTRINSIC_W_CHAIN()
27266 MemIntr->getMemOperand(), DAG); in LowerINTRINSIC_W_CHAIN()
27272 VMask, MemVT, MemIntr->getMemOperand(), DAG); in LowerINTRINSIC_W_CHAIN()
27296 SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); in LowerRETURNADDR()
27319 EVT VT = Op.getValueType(); in LowerFRAMEADDR() local
27323 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { in LowerFRAMEADDR()
27327 int FrameAddrIndex = FuncInfo->getFAIndex(); in LowerFRAMEADDR()
27330 unsigned SlotSize = RegInfo->getSlotSize(); in LowerFRAMEADDR()
27333 FuncInfo->setFAIndex(FrameAddrIndex); in LowerFRAMEADDR()
27335 return DAG.getFrameIndex(FrameAddrIndex, VT); in LowerFRAMEADDR()
27339 RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); in LowerFRAMEADDR()
27342 assert(((FrameReg == X86::RBP && VT == MVT::i64) || in LowerFRAMEADDR()
27343 (FrameReg == X86::EBP && VT == MVT::i32)) && in LowerFRAMEADDR()
27345 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); in LowerFRAMEADDR()
27346 while (Depth--) in LowerFRAMEADDR()
27347 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, in LowerFRAMEADDR()
27354 Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT, in getRegisterByName() argument
27374 Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); in getRegisterByName()
27390 return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); in LowerFRAME_TO_ARGS_OFFSET()
27421 Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); in LowerEH_RETURN()
27429 DAG.getIntPtrConstant(RegInfo->getSlotSize(), in LowerEH_RETURN()
27450 (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); in lowerEH_SJLJ_SETJMP()
27483 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); in LowerINIT_TRAMPOLINE()
27489 // Large code-model. in LowerINIT_TRAMPOLINE()
27490 const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. in LowerINIT_TRAMPOLINE()
27493 const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; in LowerINIT_TRAMPOLINE()
27494 const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; in LowerINIT_TRAMPOLINE()
27538 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); in LowerINIT_TRAMPOLINE()
27539 CallingConv::ID CC = Func->getCallingConv(); in LowerINIT_TRAMPOLINE()
27552 FunctionType *FTy = Func->getFunctionType(); in LowerINIT_TRAMPOLINE()
27553 const AttributeList &Attrs = Func->getAttributes(); in LowerINIT_TRAMPOLINE()
27555 if (!Attrs.isEmpty() && !Func->isVarArg()) { in LowerINIT_TRAMPOLINE()
27559 for (FunctionType::param_iterator I = FTy->param_begin(), in LowerINIT_TRAMPOLINE()
27560 E = FTy->param_end(); I != E; ++I, ++Idx) in LowerINIT_TRAMPOLINE()
27568 report_fatal_error("Nest register in use - reduce number of inreg" in LowerINIT_TRAMPOLINE()
27594 const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; in LowerINIT_TRAMPOLINE()
27626 01 Round to -inf in LowerGET_ROUNDING()
27631 -1 Undefined in LowerGET_ROUNDING()
27635 3 Round to -inf in LowerGET_ROUNDING()
27637 To perform the conversion, we use a packed lookup table of the four 2-bit in LowerGET_ROUNDING()
27639 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] in LowerGET_ROUNDING()
27645 MVT VT = Op.getSimpleValueType(); in LowerGET_ROUNDING() local
27679 RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT); in LowerGET_ROUNDING()
27688 SDValue Chain = Op.getNode()->getOperand(0); in LowerSET_ROUNDING()
27711 SDValue NewRM = Op.getNode()->getOperand(1); in LowerSET_ROUNDING()
27714 uint64_t RM = CVal->getZExtValue(); in LowerSET_ROUNDING()
27717 // clang-format off in LowerSET_ROUNDING()
27724 // clang-format on in LowerSET_ROUNDING()
27729 // 0 Round to 0 -> 11 in LowerSET_ROUNDING()
27730 // 1 Round to nearest -> 00 in LowerSET_ROUNDING()
27731 // 2 Round to +inf -> 10 in LowerSET_ROUNDING()
27732 // 3 Round to -inf -> 01 in LowerSET_ROUNDING()
27733 // The 2-bit value needs then to be shifted so that it occupies bits 11:10. in LowerSET_ROUNDING()
27806 SDValue Chain = Op->getOperand(0); in LowerGET_FPENV_MEM()
27807 SDValue Ptr = Op->getOperand(1); in LowerGET_FPENV_MEM()
27809 EVT MemVT = Node->getMemoryVT(); in LowerGET_FPENV_MEM()
27811 MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand(); in LowerGET_FPENV_MEM()
27822 (MMO->getFlags() & ~MachineMemOperand::MOStore); in LowerGET_FPENV_MEM()
27872 SDValue Chain = Op->getOperand(0); in LowerSET_FPENV_MEM()
27873 SDValue Ptr = Op->getOperand(1); in LowerSET_FPENV_MEM()
27875 EVT MemVT = Node->getMemoryVT(); in LowerSET_FPENV_MEM()
27877 MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand(); in LowerSET_FPENV_MEM()
27885 SDValue Chain = Op.getNode()->getOperand(0); in LowerRESET_FPENV()
27891 // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to in LowerRESET_FPENV()
27900 // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear in LowerRESET_FPENV()
27924 MVT VT = Op.getSimpleValueType(); in LowerVectorCTLZ_AVX512CDI() local
27925 MVT EltVT = VT.getVectorElementType(); in LowerVectorCTLZ_AVX512CDI()
27926 unsigned NumElems = VT.getVectorNumElements(); in LowerVectorCTLZ_AVX512CDI()
27943 SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); in LowerVectorCTLZ_AVX512CDI()
27944 SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); in LowerVectorCTLZ_AVX512CDI()
27946 return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); in LowerVectorCTLZ_AVX512CDI()
27953 MVT VT = Op.getSimpleValueType(); in LowerVectorCTLZInRegLUT() local
27954 int NumElts = VT.getVectorNumElements(); in LowerVectorCTLZInRegLUT()
27955 int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8); in LowerVectorCTLZInRegLUT()
27958 // Per-nibble leading zero PSHUFB lookup table. in LowerVectorCTLZInRegLUT()
27994 // Merge result back from vXi8 back to VT, working on the lo/hi halves in LowerVectorCTLZInRegLUT()
27999 while (CurrVT != VT) { in LowerVectorCTLZInRegLUT()
28035 MVT VT = Op.getSimpleValueType(); in LowerVectorCTLZ() local
28038 // vXi8 vectors need to be promoted to 512-bits for vXi32. in LowerVectorCTLZ()
28039 (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) in LowerVectorCTLZ()
28042 // Decompose 256-bit ops into smaller 128-bit ops. in LowerVectorCTLZ()
28043 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerVectorCTLZ()
28046 // Decompose 512-bit ops into smaller 256-bit ops. in LowerVectorCTLZ()
28047 if (VT.is512BitVector() && !Subtarget.hasBWI()) in LowerVectorCTLZ()
28056 MVT VT = Op.getSimpleValueType(); in LowerCTLZ() local
28057 MVT OpVT = VT; in LowerCTLZ()
28058 unsigned NumBits = VT.getSizeInBits(); in LowerCTLZ()
28062 if (VT.isVector()) in LowerCTLZ()
28066 if (VT == MVT::i8) { in LowerCTLZ()
28078 SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), in LowerCTLZ()
28084 // Finally xor with NumBits-1. in LowerCTLZ()
28086 DAG.getConstant(NumBits - 1, dl, OpVT)); in LowerCTLZ()
28088 if (VT == MVT::i8) in LowerCTLZ()
28095 MVT VT = Op.getSimpleValueType(); in LowerCTTZ() local
28096 unsigned NumBits = VT.getScalarSizeInBits(); in LowerCTTZ()
28100 assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && in LowerCTTZ()
28104 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in LowerCTTZ()
28112 SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), in LowerCTTZ()
28115 return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); in LowerCTTZ()
28120 MVT VT = Op.getSimpleValueType(); in lowerAddSub() local
28123 if (VT == MVT::i16 || VT == MVT::i32) in lowerAddSub()
28126 if (VT == MVT::v32i16 || VT == MVT::v64i8) in lowerAddSub()
28131 "Only handle AVX 256-bit vector integer operation"); in lowerAddSub()
28137 MVT VT = Op.getSimpleValueType(); in LowerADDSAT_SUBSAT() local
28142 if (VT == MVT::v32i16 || VT == MVT::v64i8 || in LowerADDSAT_SUBSAT()
28143 (VT.is256BitVector() && !Subtarget.hasInt256())) { in LowerADDSAT_SUBSAT()
28152 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); in LowerADDSAT_SUBSAT()
28154 unsigned BitWidth = VT.getScalarSizeInBits(); in LowerADDSAT_SUBSAT()
28156 if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) { in LowerADDSAT_SUBSAT()
28157 // Handle a special-case with a bit-hack instead of cmp+select: in LowerADDSAT_SUBSAT()
28158 // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1) in LowerADDSAT_SUBSAT()
28163 if (C && C->getAPIntValue().isSignMask()) { in LowerADDSAT_SUBSAT()
28164 SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT); in LowerADDSAT_SUBSAT()
28165 SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT); in LowerADDSAT_SUBSAT()
28166 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask); in LowerADDSAT_SUBSAT()
28167 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt); in LowerADDSAT_SUBSAT()
28168 return DAG.getNode(ISD::AND, DL, VT, Xor, Sra); in LowerADDSAT_SUBSAT()
28171 if (!TLI.isOperationLegal(ISD::UMAX, VT)) { in LowerADDSAT_SUBSAT()
28172 // usubsat X, Y --> (X >u Y) ? X - Y : 0 in LowerADDSAT_SUBSAT()
28173 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y); in LowerADDSAT_SUBSAT()
28176 if (SetCCResultType == VT && in LowerADDSAT_SUBSAT()
28177 DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits()) in LowerADDSAT_SUBSAT()
28178 return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub); in LowerADDSAT_SUBSAT()
28179 return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT)); in LowerADDSAT_SUBSAT()
28184 (!VT.isVector() || VT == MVT::v2i64)) { in LowerADDSAT_SUBSAT()
28187 SDValue Zero = DAG.getConstant(0, DL, VT); in LowerADDSAT_SUBSAT()
28190 DAG.getVTList(VT, SetCCResultType), X, Y); in LowerADDSAT_SUBSAT()
28193 SDValue SatMin = DAG.getConstant(MinVal, DL, VT); in LowerADDSAT_SUBSAT()
28194 SDValue SatMax = DAG.getConstant(MaxVal, DL, VT); in LowerADDSAT_SUBSAT()
28197 Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin); in LowerADDSAT_SUBSAT()
28198 return DAG.getSelect(DL, VT, Overflow, Result, SumDiff); in LowerADDSAT_SUBSAT()
28207 MVT VT = Op.getSimpleValueType(); in LowerABS() local
28210 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { in LowerABS()
28211 // Since X86 does not have CMOV for 8-bit integer, we don't convert in LowerABS()
28212 // 8-bit integer abs to NEG and CMOV. in LowerABS()
28214 SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), in LowerABS()
28215 DAG.getConstant(0, DL, VT), N0); in LowerABS()
28218 return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); in LowerABS()
28221 // ABS(vXi64 X) --> VPBLENDVPD(X, 0-X, X). in LowerABS()
28222 if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) { in LowerABS()
28224 SDValue Neg = DAG.getNegative(Src, DL, VT); in LowerABS()
28225 return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Neg, Src); in LowerABS()
28228 if (VT.is256BitVector() && !Subtarget.hasInt256()) { in LowerABS()
28229 assert(VT.isInteger() && in LowerABS()
28230 "Only handle AVX 256-bit vector integer operation"); in LowerABS()
28234 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) in LowerABS()
28243 MVT VT = Op.getSimpleValueType(); in LowerAVG() local
28247 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerAVG()
28250 if (VT == MVT::v32i16 || VT == MVT::v64i8) in LowerAVG()
28259 MVT VT = Op.getSimpleValueType(); in LowerMINMAX() local
28263 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerMINMAX()
28266 if (VT == MVT::v32i16 || VT == MVT::v64i8) in LowerMINMAX()
28278 EVT VT = Op.getValueType(); in LowerFMINIMUM_FMAXIMUM() local
28282 uint64_t SizeInBits = VT.getScalarSizeInBits(); in LowerFMINIMUM_FMAXIMUM()
28285 EVT IVT = VT.changeTypeToInteger(); in LowerFMINIMUM_FMAXIMUM()
28295 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); in LowerFMINIMUM_FMAXIMUM()
28301 // Num xNaN +0 -0 in LowerFMINIMUM_FMAXIMUM()
28302 // --------------- --------------- in LowerFMINIMUM_FMAXIMUM()
28304 // X --------------- X --------------- in LowerFMINIMUM_FMAXIMUM()
28305 // xNaN | X | X/Y | -0 | +0 | -0 | in LowerFMINIMUM_FMAXIMUM()
28306 // --------------- --------------- in LowerFMINIMUM_FMAXIMUM()
28317 return CstOp->getValueAPF().bitcastToAPInt() == Zero; in LowerFMINIMUM_FMAXIMUM()
28319 return CstOp->getAPIntValue() == Zero; in LowerFMINIMUM_FMAXIMUM()
28320 if (Op->getOpcode() == ISD::BUILD_VECTOR || in LowerFMINIMUM_FMAXIMUM()
28321 Op->getOpcode() == ISD::SPLAT_VECTOR) { in LowerFMINIMUM_FMAXIMUM()
28322 for (const SDValue &OpVal : Op->op_values()) { in LowerFMINIMUM_FMAXIMUM()
28328 if (!CstOp->getValueAPF().isZero()) in LowerFMINIMUM_FMAXIMUM()
28330 if (CstOp->getValueAPF().bitcastToAPInt() != Zero) in LowerFMINIMUM_FMAXIMUM()
28341 Op->getFlags().hasNoSignedZeros() || in LowerFMINIMUM_FMAXIMUM()
28353 } else if (!VT.isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) && in LowerFMINIMUM_FMAXIMUM()
28354 (Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) { in LowerFMINIMUM_FMAXIMUM()
28359 MVT VectorType = MVT::getVectorVT(VT.getSimpleVT(), 128 / SizeInBits); in LowerFMINIMUM_FMAXIMUM()
28371 NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X); in LowerFMINIMUM_FMAXIMUM()
28372 NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y); in LowerFMINIMUM_FMAXIMUM()
28373 return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags()); in LowerFMINIMUM_FMAXIMUM()
28376 if (Subtarget.is64Bit() || VT != MVT::f64) { in LowerFMINIMUM_FMAXIMUM()
28381 assert(VT == MVT::f64); in LowerFMINIMUM_FMAXIMUM()
28395 NewX = DAG.getSelect(DL, VT, IsXSigned, X, Y); in LowerFMINIMUM_FMAXIMUM()
28396 NewY = DAG.getSelect(DL, VT, IsXSigned, Y, X); in LowerFMINIMUM_FMAXIMUM()
28398 NewX = DAG.getSelect(DL, VT, IsXSigned, Y, X); in LowerFMINIMUM_FMAXIMUM()
28399 NewY = DAG.getSelect(DL, VT, IsXSigned, X, Y); in LowerFMINIMUM_FMAXIMUM()
28404 Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN); in LowerFMINIMUM_FMAXIMUM()
28412 SDValue MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags()); in LowerFMINIMUM_FMAXIMUM()
28418 return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax); in LowerFMINIMUM_FMAXIMUM()
28423 MVT VT = Op.getSimpleValueType(); in LowerABD() local
28427 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerABD()
28430 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.useBWIRegs()) in LowerABD()
28437 if (VT.isScalarInteger()) { in LowerABD()
28438 unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u); in LowerABD()
28441 // abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs)))) in LowerABD()
28442 // abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs)))) in LowerABD()
28448 return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff); in LowerABD()
28459 MVT VT = Op.getSimpleValueType(); in LowerMUL() local
28461 // Decompose 256-bit ops into 128-bit ops. in LowerMUL()
28462 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerMUL()
28465 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) in LowerMUL()
28471 // Lower v16i8/v32i8/v64i8 mul as sign-extension to v8i16/v16i16/v32i16 in LowerMUL()
28473 if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) { in LowerMUL()
28474 unsigned NumElts = VT.getVectorNumElements(); in LowerMUL()
28475 unsigned NumLanes = VT.getSizeInBits() / 128; in LowerMUL()
28478 if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || in LowerMUL()
28479 (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { in LowerMUL()
28480 MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); in LowerMUL()
28482 ISD::TRUNCATE, dl, VT, in LowerMUL()
28497 for (auto [Idx, Val] : enumerate(B->ops())) { in LowerMUL()
28505 SDValue Mask = DAG.getBitcast(VT, DAG.getConstant(0x00FF, dl, ExVT)); in LowerMUL()
28506 SDValue BLo = DAG.getNode(ISD::AND, dl, VT, Mask, B); in LowerMUL()
28507 SDValue BHi = DAG.getNode(X86ISD::ANDNP, dl, VT, Mask, B); in LowerMUL()
28510 RLo = DAG.getNode(ISD::AND, dl, VT, DAG.getBitcast(VT, RLo), Mask); in LowerMUL()
28513 return DAG.getNode(ISD::OR, dl, VT, RLo, DAG.getBitcast(VT, RHi)); in LowerMUL()
28518 // We're going to mask off the low byte of each result element of the in LowerMUL()
28519 // pmullw, so it doesn't matter what's in the high byte of each 16-bit in LowerMUL()
28521 SDValue Undef = DAG.getUNDEF(VT); in LowerMUL()
28522 SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef)); in LowerMUL()
28523 SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef)); in LowerMUL()
28541 BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef)); in LowerMUL()
28542 BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef)); in LowerMUL()
28548 return getPack(DAG, Subtarget, dl, VT, RLo, RHi); in LowerMUL()
28552 if (VT == MVT::v4i32) { in LowerMUL()
28557 static const int UnpackMask[] = { 1, -1, 3, -1 }; in LowerMUL()
28558 SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); in LowerMUL()
28559 SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); in LowerMUL()
28570 Evens = DAG.getBitcast(VT, Evens); in LowerMUL()
28571 Odds = DAG.getBitcast(VT, Odds); in LowerMUL()
28576 return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); in LowerMUL()
28579 assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && in LowerMUL()
28603 SDValue Zero = DAG.getConstant(0, dl, VT); in LowerMUL()
28608 AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); in LowerMUL()
28612 SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); in LowerMUL()
28613 AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); in LowerMUL()
28618 SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); in LowerMUL()
28619 AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); in LowerMUL()
28622 SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); in LowerMUL()
28623 Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); in LowerMUL()
28625 return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); in LowerMUL()
28629 MVT VT, bool IsSigned, in LowervXi8MulWithUNPCK() argument
28632 SDValue *Low = nullptr) { in LowervXi8MulWithUNPCK() argument
28633 unsigned NumElts = VT.getVectorNumElements(); in LowervXi8MulWithUNPCK()
28635 // For vXi8 we will unpack the low and high half of each 128 bit lane to widen in LowervXi8MulWithUNPCK()
28641 // and use pmullw to calculate the full 16-bit product. in LowervXi8MulWithUNPCK()
28644 // pmulhw to calculate the full 16-bit product. This trick means we don't in LowervXi8MulWithUNPCK()
28648 SDValue Zero = DAG.getConstant(0, dl, VT); in LowervXi8MulWithUNPCK()
28652 ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); in LowervXi8MulWithUNPCK()
28653 AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); in LowervXi8MulWithUNPCK()
28655 ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); in LowervXi8MulWithUNPCK()
28656 AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); in LowervXi8MulWithUNPCK()
28688 BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); in LowervXi8MulWithUNPCK()
28689 BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); in LowervXi8MulWithUNPCK()
28691 BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); in LowervXi8MulWithUNPCK()
28692 BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); in LowervXi8MulWithUNPCK()
28701 if (Low) in LowervXi8MulWithUNPCK()
28702 *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi); in LowervXi8MulWithUNPCK()
28704 return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true); in LowervXi8MulWithUNPCK()
28710 MVT VT = Op.getSimpleValueType(); in LowerMULH() local
28711 bool IsSigned = Op->getOpcode() == ISD::MULHS; in LowerMULH()
28712 unsigned NumElts = VT.getVectorNumElements(); in LowerMULH()
28716 // Decompose 256-bit ops into 128-bit ops. in LowerMULH()
28717 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerMULH()
28720 if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) in LowerMULH()
28723 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) { in LowerMULH()
28724 assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || in LowerMULH()
28725 (VT == MVT::v8i32 && Subtarget.hasInt256()) || in LowerMULH()
28726 (VT == MVT::v16i32 && Subtarget.hasAVX512())); in LowerMULH()
28740 const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, in LowerMULH()
28741 9, -1, 11, -1, 13, -1, 15, -1}; in LowerMULH()
28744 DAG.getVectorShuffle(VT, dl, A, A, ArrayRef(&Mask[0], NumElts)); in LowerMULH()
28747 DAG.getVectorShuffle(VT, dl, B, B, ArrayRef(&Mask[0], NumElts)); in LowerMULH()
28756 SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, in LowerMULH()
28761 SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, in LowerMULH()
28770 SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask); in LowerMULH()
28775 SDValue Zero = DAG.getConstant(0, dl, VT); in LowerMULH()
28776 SDValue T1 = DAG.getNode(ISD::AND, dl, VT, in LowerMULH()
28777 DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); in LowerMULH()
28778 SDValue T2 = DAG.getNode(ISD::AND, dl, VT, in LowerMULH()
28779 DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); in LowerMULH()
28781 SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); in LowerMULH()
28782 Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); in LowerMULH()
28789 assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || in LowerMULH()
28790 (VT == MVT::v64i8 && Subtarget.hasBWI())) && in LowerMULH()
28796 // With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack in LowerMULH()
28799 if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || in LowerMULH()
28800 (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { in LowerMULH()
28807 return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); in LowerMULH()
28810 return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG); in LowerMULH()
28816 MVT VT = Op.getSimpleValueType(); in LowerMULO() local
28819 if (!VT.isVector()) in LowerMULO()
28823 bool IsSigned = Op->getOpcode() == ISD::SMULO; in LowerMULO()
28826 EVT OvfVT = Op->getValueType(1); in LowerMULO()
28828 if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) || in LowerMULO()
28829 (VT == MVT::v64i8 && !Subtarget.hasBWI())) { in LowerMULO()
28848 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in LowerMULO()
28857 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); in LowerMULO()
28859 if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || in LowerMULO()
28860 (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { in LowerMULO()
28861 unsigned NumElts = VT.getVectorNumElements(); in LowerMULO()
28868 SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); in LowerMULO() local
28878 // Fill all 16 bits with the sign bit from the low. in LowerMULO()
28892 High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); in LowerMULO()
28894 DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); in LowerMULO()
28911 High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); in LowerMULO()
28921 return DAG.getMergeValues({Low, Ovf}, dl); in LowerMULO()
28924 SDValue Low; in LowerMULO() local
28926 LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low); in LowerMULO()
28930 // SMULO overflows if the high bits don't match the sign of the low. in LowerMULO()
28932 DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); in LowerMULO()
28935 // UMULO overflows if the high bits are non-zero. in LowerMULO()
28937 DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE); in LowerMULO()
28942 return DAG.getMergeValues({Low, Ovf}, dl); in LowerMULO()
28947 EVT VT = Op.getValueType(); in LowerWin64_i128OP() local
28948 assert(VT.isInteger() && VT.getSizeInBits() == 128 && in LowerWin64_i128OP()
28951 if (isa<ConstantSDNode>(Op->getOperand(1))) { in LowerWin64_i128OP()
28954 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), VT, Result[0], Result[1]); in LowerWin64_i128OP()
28959 switch (Op->getOpcode()) { in LowerWin64_i128OP()
28960 // clang-format off in LowerWin64_i128OP()
28966 // clang-format on in LowerWin64_i128OP()
28974 for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { in LowerWin64_i128OP()
28975 EVT ArgVT = Op->getOperand(i).getValueType(); in LowerWin64_i128OP()
28979 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); in LowerWin64_i128OP()
28984 DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16)); in LowerWin64_i128OP()
29007 return DAG.getBitcast(VT, CallInfo.first); in LowerWin64_i128OP()
29014 EVT VT = Op.getValueType(); in LowerWin64_FP_TO_INT128() local
29015 bool IsStrict = Op->isStrictFPOpcode(); in LowerWin64_FP_TO_INT128()
29020 assert(VT.isInteger() && VT.getSizeInBits() == 128 && in LowerWin64_FP_TO_INT128()
29024 if (Op->getOpcode() == ISD::FP_TO_SINT || in LowerWin64_FP_TO_INT128()
29025 Op->getOpcode() == ISD::STRICT_FP_TO_SINT) in LowerWin64_FP_TO_INT128()
29026 LC = RTLIB::getFPTOSINT(ArgVT, VT); in LowerWin64_FP_TO_INT128()
29028 LC = RTLIB::getFPTOUINT(ArgVT, VT); in LowerWin64_FP_TO_INT128()
29037 // expected VT (i128). in LowerWin64_FP_TO_INT128()
29040 Result = DAG.getBitcast(VT, Result); in LowerWin64_FP_TO_INT128()
29047 EVT VT = Op.getValueType(); in LowerWin64_INT128_TO_FP() local
29048 bool IsStrict = Op->isStrictFPOpcode(); in LowerWin64_INT128_TO_FP()
29057 if (Op->getOpcode() == ISD::SINT_TO_FP || in LowerWin64_INT128_TO_FP()
29058 Op->getOpcode() == ISD::STRICT_SINT_TO_FP) in LowerWin64_INT128_TO_FP()
29059 LC = RTLIB::getSINTTOFP(ArgVT, VT); in LowerWin64_INT128_TO_FP()
29061 LC = RTLIB::getUINTTOFP(ArgVT, VT); in LowerWin64_INT128_TO_FP()
29070 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); in LowerWin64_INT128_TO_FP()
29077 makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain); in LowerWin64_INT128_TO_FP()
29095 (0x8080808080808080ULL >> (64 - (8 * Amt)))); in getGFNICtrlImm()
29097 return getGFNICtrlImm(ISD::SRL, 8 - Amt) | getGFNICtrlImm(ISD::SHL, Amt); in getGFNICtrlImm()
29099 return getGFNICtrlImm(ISD::SHL, 8 - Amt) | getGFNICtrlImm(ISD::SRL, Amt); in getGFNICtrlImm()
29105 SDValue getGFNICtrlMask(unsigned Opcode, SelectionDAG &DAG, const SDLoc &DL, MVT VT, in getGFNICtrlMask() argument
29107 assert(VT.getVectorElementType() == MVT::i8 && in getGFNICtrlMask()
29108 (VT.getSizeInBits() % 64) == 0 && "Illegal GFNI control type"); in getGFNICtrlMask()
29111 for (unsigned I = 0, E = VT.getSizeInBits(); I != E; I += 8) { in getGFNICtrlMask()
29115 return DAG.getBuildVector(VT, DL, MaskBits); in getGFNICtrlMask()
29118 // Return true if the required (according to Opcode) shift-imm form is natively
29120 static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget, in supportedVectorShiftWithImm() argument
29125 if (!VT.isSimple()) in supportedVectorShiftWithImm()
29128 if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) in supportedVectorShiftWithImm()
29131 if (VT.getScalarSizeInBits() < 16) in supportedVectorShiftWithImm()
29134 if (VT.is512BitVector() && Subtarget.useAVX512Regs() && in supportedVectorShiftWithImm()
29135 (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) in supportedVectorShiftWithImm()
29138 bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) || in supportedVectorShiftWithImm()
29139 (VT.is256BitVector() && Subtarget.hasInt256()); in supportedVectorShiftWithImm()
29142 (VT != MVT::v2i64 && VT != MVT::v4i64)); in supportedVectorShiftWithImm()
29147 // These instructions are defined together with shift-immediate.
29149 bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget, in supportedVectorShiftWithBaseAmnt() argument
29151 return supportedVectorShiftWithImm(VT, Subtarget, Opcode); in supportedVectorShiftWithBaseAmnt()
29154 // Return true if the required (according to Opcode) variable-shift form is
29156 static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget, in supportedVectorVarShift() argument
29161 if (!VT.isSimple()) in supportedVectorVarShift()
29164 if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) in supportedVectorVarShift()
29167 if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) in supportedVectorVarShift()
29170 // vXi16 supported only on AVX-512, BWI in supportedVectorVarShift()
29171 if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) in supportedVectorVarShift()
29175 (Subtarget.useAVX512Regs() || !VT.is512BitVector())) in supportedVectorVarShift()
29178 bool LShift = VT.is128BitVector() || VT.is256BitVector(); in supportedVectorVarShift()
29179 bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; in supportedVectorVarShift()
29185 MVT VT = Op.getSimpleValueType(); in LowerShiftByScalarImmediate() local
29190 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in LowerShiftByScalarImmediate()
29193 assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); in LowerShiftByScalarImmediate()
29194 MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); in LowerShiftByScalarImmediate()
29199 assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && in LowerShiftByScalarImmediate()
29201 return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); in LowerShiftByScalarImmediate()
29209 ShiftAmt - 32, DAG); in LowerShiftByScalarImmediate()
29210 if (VT == MVT::v2i64) in LowerShiftByScalarImmediate()
29212 if (VT == MVT::v4i64) in LowerShiftByScalarImmediate()
29220 getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); in LowerShiftByScalarImmediate()
29222 if (VT == MVT::v2i64) in LowerShiftByScalarImmediate()
29224 if (VT == MVT::v4i64) in LowerShiftByScalarImmediate()
29228 return DAG.getBitcast(VT, Ex); in LowerShiftByScalarImmediate()
29238 return DAG.getUNDEF(VT); in LowerShiftByScalarImmediate()
29242 if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) { in LowerShiftByScalarImmediate()
29245 // shl: (shl V, 1) -> (add (freeze V), (freeze V)) in LowerShiftByScalarImmediate()
29247 // R may be undef at run-time, but (shl R, 1) must be an even number (LSB in LowerShiftByScalarImmediate()
29253 return DAG.getNode(ISD::ADD, dl, VT, R, R); in LowerShiftByScalarImmediate()
29256 return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); in LowerShiftByScalarImmediate()
29260 if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || in LowerShiftByScalarImmediate()
29261 (Subtarget.hasInt256() && VT == MVT::v4i64)) && in LowerShiftByScalarImmediate()
29265 // If we're logical shifting an all-signbits value then we can just perform as in LowerShiftByScalarImmediate()
29269 SDValue Mask = DAG.getAllOnesConstant(dl, VT); in LowerShiftByScalarImmediate()
29270 Mask = DAG.getNode(Op.getOpcode(), dl, VT, Mask, Amt); in LowerShiftByScalarImmediate()
29271 return DAG.getNode(ISD::AND, dl, VT, R, Mask); in LowerShiftByScalarImmediate()
29274 if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || in LowerShiftByScalarImmediate()
29275 (Subtarget.hasBWI() && VT == MVT::v64i8)) { in LowerShiftByScalarImmediate()
29276 unsigned NumElts = VT.getVectorNumElements(); in LowerShiftByScalarImmediate()
29281 // R may be undef at run-time, but (shl R, 1) must be an even number (LSB in LowerShiftByScalarImmediate()
29287 return DAG.getNode(ISD::ADD, dl, VT, R, R); in LowerShiftByScalarImmediate()
29292 SDValue Zeros = DAG.getConstant(0, dl, VT); in LowerShiftByScalarImmediate()
29293 if (VT.is512BitVector()) { in LowerShiftByScalarImmediate()
29294 assert(VT == MVT::v64i8 && "Unexpected element type!"); in LowerShiftByScalarImmediate()
29296 return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); in LowerShiftByScalarImmediate()
29298 return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); in LowerShiftByScalarImmediate()
29302 if (VT == MVT::v16i8 && Subtarget.hasXOP()) in LowerShiftByScalarImmediate()
29306 SDValue Mask = getGFNICtrlMask(Op.getOpcode(), DAG, dl, VT, ShiftAmt); in LowerShiftByScalarImmediate()
29307 return DAG.getNode(X86ISD::GF2P8AFFINEQB, dl, VT, R, Mask, in LowerShiftByScalarImmediate()
29315 SHL = DAG.getBitcast(VT, SHL); in LowerShiftByScalarImmediate()
29317 APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt); in LowerShiftByScalarImmediate()
29318 return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT)); in LowerShiftByScalarImmediate()
29324 SRL = DAG.getBitcast(VT, SRL); in LowerShiftByScalarImmediate()
29326 APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt); in LowerShiftByScalarImmediate()
29327 return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT)); in LowerShiftByScalarImmediate()
29331 SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); in LowerShiftByScalarImmediate()
29333 SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); in LowerShiftByScalarImmediate()
29334 Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); in LowerShiftByScalarImmediate()
29335 Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); in LowerShiftByScalarImmediate()
29346 MVT VT = Op.getSimpleValueType(); in LowerShiftByScalarVariable() local
29353 int BaseShAmtIdx = -1; in LowerShiftByScalarVariable()
29355 if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) in LowerShiftByScalarVariable()
29356 return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, BaseShAmtIdx, in LowerShiftByScalarVariable()
29359 // vXi8 shifts - shift as v8i16 + mask result. in LowerShiftByScalarVariable()
29360 if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || in LowerShiftByScalarVariable()
29361 (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || in LowerShiftByScalarVariable()
29362 VT == MVT::v64i8) && in LowerShiftByScalarVariable()
29364 unsigned NumElts = VT.getVectorNumElements(); in LowerShiftByScalarVariable()
29370 // Create the mask using vXi16 shifts. For shift-rights we need to move in LowerShiftByScalarVariable()
29372 SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); in LowerShiftByScalarVariable()
29378 BitMask = DAG.getBitcast(VT, BitMask); in LowerShiftByScalarVariable()
29379 BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, in LowerShiftByScalarVariable()
29385 Res = DAG.getBitcast(VT, Res); in LowerShiftByScalarVariable()
29386 Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); in LowerShiftByScalarVariable()
29390 // SignMask = lshr(SignBit, Amt) - safe to do this with PSRLW. in LowerShiftByScalarVariable()
29395 SignMask = DAG.getBitcast(VT, SignMask); in LowerShiftByScalarVariable()
29396 Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); in LowerShiftByScalarVariable()
29397 Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); in LowerShiftByScalarVariable()
29411 MVT VT = Amt.getSimpleValueType(); in convertShiftLeftToScale() local
29412 if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || in convertShiftLeftToScale()
29413 (Subtarget.hasInt256() && VT == MVT::v16i16) || in convertShiftLeftToScale()
29414 (Subtarget.hasAVX512() && VT == MVT::v32i16) || in convertShiftLeftToScale()
29415 (!Subtarget.hasAVX512() && VT == MVT::v16i8) || in convertShiftLeftToScale()
29416 (Subtarget.hasInt256() && VT == MVT::v32i8) || in convertShiftLeftToScale()
29417 (Subtarget.hasBWI() && VT == MVT::v64i8))) in convertShiftLeftToScale()
29420 MVT SVT = VT.getVectorElementType(); in convertShiftLeftToScale()
29422 unsigned NumElems = VT.getVectorNumElements(); in convertShiftLeftToScale()
29435 return DAG.getBuildVector(VT, dl, Elts); in convertShiftLeftToScale()
29440 if (VT == MVT::v4i32) { in convertShiftLeftToScale()
29441 Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); in convertShiftLeftToScale()
29442 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, in convertShiftLeftToScale()
29443 DAG.getConstant(0x3f800000U, dl, VT)); in convertShiftLeftToScale()
29445 return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt); in convertShiftLeftToScale()
29449 if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { in convertShiftLeftToScale()
29450 SDValue Z = DAG.getConstant(0, dl, VT); in convertShiftLeftToScale()
29451 SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); in convertShiftLeftToScale()
29452 SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); in convertShiftLeftToScale()
29456 return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); in convertShiftLeftToScale()
29457 return getPack(DAG, Subtarget, dl, VT, Lo, Hi); in convertShiftLeftToScale()
29465 MVT VT = Op.getSimpleValueType(); in LowerShift() local
29469 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in LowerShift()
29476 assert(VT.isVector() && "Custom lowering only for vector shifts!"); in LowerShift()
29485 if (supportedVectorVarShift(VT, Subtarget, Opc)) in LowerShift()
29491 if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) || in LowerShift()
29492 (VT == MVT::v4i64 && Subtarget.hasInt256())) && in LowerShift()
29494 SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); in LowerShift()
29495 SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); in LowerShift()
29496 R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); in LowerShift()
29497 R = DAG.getNode(ISD::XOR, dl, VT, R, M); in LowerShift()
29498 R = DAG.getNode(ISD::SUB, dl, VT, R, M); in LowerShift()
29502 // XOP has 128-bit variable logical/arithmetic shifts. in LowerShift()
29503 // +ve/-ve Amt = shift left/right. in LowerShift()
29504 if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || in LowerShift()
29505 VT == MVT::v8i16 || VT == MVT::v16i8)) { in LowerShift()
29507 Amt = DAG.getNegative(Amt, dl, VT); in LowerShift()
29509 return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); in LowerShift()
29511 return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); in LowerShift()
29514 // 2i64 vector logical shifts can efficiently avoid scalarization - do the in LowerShift()
29515 // shifts per-lane and then shuffle the partial results back together. in LowerShift()
29516 if (VT == MVT::v2i64 && Opc != ISD::SRA) { in LowerShift()
29518 SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); in LowerShift()
29519 SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); in LowerShift()
29520 SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); in LowerShift()
29521 SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); in LowerShift()
29522 return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); in LowerShift()
29535 if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 || in LowerShift()
29536 (VT == MVT::v16i16 && Subtarget.hasInt256()))) { in LowerShift()
29538 unsigned NumElts = VT.getVectorNumElements(); in LowerShift()
29541 SDValue A = Amt->getOperand(i); in LowerShift()
29561 (VT != MVT::v16i16 || in LowerShift()
29562 is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) && in LowerShift()
29563 (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL || in LowerShift()
29567 if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) && in LowerShift()
29568 Cst2->getAPIntValue().ult(EltSizeInBits)) { in LowerShift()
29569 SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, in LowerShift()
29570 Cst1->getZExtValue(), DAG); in LowerShift()
29571 SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, in LowerShift()
29572 Cst2->getZExtValue(), DAG); in LowerShift()
29573 return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); in LowerShift()
29581 if (Opc == ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() || in LowerShift()
29584 return DAG.getNode(ISD::MUL, dl, VT, R, Scale); in LowerShift()
29587 // can replace with ISD::MULHU, creating scale factor from (NumEltBits - Amt). in LowerShift()
29589 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { in LowerShift()
29590 SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); in LowerShift()
29591 SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); in LowerShift()
29593 SDValue Zero = DAG.getConstant(0, dl, VT); in LowerShift()
29594 SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); in LowerShift()
29595 SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); in LowerShift()
29596 return DAG.getSelect(dl, VT, ZAmt, R, Res); in LowerShift()
29601 // can replace with ISD::MULHS, creating scale factor from (NumEltBits - Amt). in LowerShift()
29603 // of these cases in pre-SSE41/XOP/AVX512 but not both. in LowerShift()
29605 (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) && in LowerShift()
29609 SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); in LowerShift()
29610 SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); in LowerShift()
29613 DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ); in LowerShift()
29615 DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ); in LowerShift()
29617 getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG); in LowerShift()
29618 SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale); in LowerShift()
29619 Res = DAG.getSelect(dl, VT, Amt0, R, Res); in LowerShift()
29620 return DAG.getSelect(dl, VT, Amt1, Sra1, Res); in LowerShift()
29626 // immediate shifts, else we need to zero-extend each lane to the lower i64 in LowerShift()
29629 if (VT == MVT::v4i32) { in LowerShift()
29632 Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); in LowerShift()
29633 Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); in LowerShift()
29634 Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); in LowerShift()
29635 Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); in LowerShift()
29639 // just zero-extending, but for SSE just duplicating the top 16-bits is in LowerShift()
29642 SDValue Z = DAG.getConstant(0, dl, VT); in LowerShift()
29643 Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); in LowerShift()
29644 Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); in LowerShift()
29645 Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); in LowerShift()
29646 Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); in LowerShift()
29650 {4, 5, 6, 7, -1, -1, -1, -1}); in LowerShift()
29661 SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0)); in LowerShift()
29662 SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1)); in LowerShift()
29663 SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2)); in LowerShift()
29664 SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3)); in LowerShift()
29667 // TODO - ideally shuffle combining would handle this. in LowerShift()
29669 SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); in LowerShift()
29670 SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); in LowerShift()
29671 return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); in LowerShift()
29673 SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5}); in LowerShift()
29674 SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7}); in LowerShift()
29675 return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7}); in LowerShift()
29681 // NOTE: We honor prefered vector width before promoting to 512-bits. in LowerShift()
29682 if ((Subtarget.hasInt256() && VT == MVT::v8i16) || in LowerShift()
29683 (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || in LowerShift()
29684 (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || in LowerShift()
29685 (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || in LowerShift()
29686 (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { in LowerShift()
29687 assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && in LowerShift()
29690 MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); in LowerShift()
29694 return DAG.getNode(ISD::TRUNCATE, dl, VT, in LowerShift()
29701 (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || in LowerShift()
29702 (VT == MVT::v64i8 && Subtarget.hasBWI())) && in LowerShift()
29704 int NumElts = VT.getVectorNumElements(); in LowerShift()
29716 if (VT == MVT::v16i8 && Subtarget.hasInt256()) { in LowerShift()
29721 return DAG.getZExtOrTrunc(R, dl, VT); in LowerShift()
29736 SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R)); in LowerShift()
29737 SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R)); in LowerShift()
29744 return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR); in LowerShift()
29747 if (VT == MVT::v16i8 || in LowerShift()
29748 (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || in LowerShift()
29749 (VT == MVT::v64i8 && Subtarget.hasBWI())) { in LowerShift()
29750 MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); in LowerShift()
29753 if (VT.is512BitVector()) { in LowerShift()
29757 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); in LowerShift()
29758 V0 = DAG.getBitcast(VT, V0); in LowerShift()
29759 V1 = DAG.getBitcast(VT, V1); in LowerShift()
29760 Sel = DAG.getBitcast(VT, Sel); in LowerShift()
29761 Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel, in LowerShift()
29763 return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); in LowerShift()
29767 V0 = DAG.getBitcast(VT, V0); in LowerShift()
29768 V1 = DAG.getBitcast(VT, V1); in LowerShift()
29769 Sel = DAG.getBitcast(VT, Sel); in LowerShift()
29771 DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1)); in LowerShift()
29773 // On pre-SSE41 targets we test for the sign bit by comparing to in LowerShift()
29774 // zero - a negative value will set all bits of the lanes to true in LowerShift()
29786 Amt = DAG.getBitcast(VT, Amt); in LowerShift()
29790 SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT)); in LowerShift()
29791 R = SignBitSelect(VT, Amt, M, R); in LowerShift()
29794 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); in LowerShift()
29797 M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT)); in LowerShift()
29798 R = SignBitSelect(VT, Amt, M, R); in LowerShift()
29801 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); in LowerShift()
29804 M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT)); in LowerShift()
29805 R = SignBitSelect(VT, Amt, M, R); in LowerShift()
29813 SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt); in LowerShift()
29814 SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt); in LowerShift()
29815 SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R); in LowerShift()
29816 SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R); in LowerShift()
29852 return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); in LowerShift()
29856 if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { in LowerShift()
29858 SDValue Z = DAG.getConstant(0, dl, VT); in LowerShift()
29859 SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z); in LowerShift()
29860 SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z); in LowerShift()
29861 SDValue RLo = getUnpackl(DAG, dl, VT, Z, R); in LowerShift()
29862 SDValue RHi = getUnpackh(DAG, dl, VT, Z, R); in LowerShift()
29871 return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); in LowerShift()
29874 if (VT == MVT::v8i16) { in LowerShift()
29875 // If we have a constant shift amount, the non-SSE41 path is best as in LowerShift()
29884 MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); in LowerShift()
29889 VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1)); in LowerShift()
29891 // On pre-SSE41 targets we splat the sign bit - a negative value will in LowerShift()
29895 getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG); in LowerShift()
29896 return DAG.getSelect(dl, VT, C, V0, V1); in LowerShift()
29904 ISD::OR, dl, VT, in LowerShift()
29905 getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG), in LowerShift()
29906 getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG)); in LowerShift()
29908 Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG); in LowerShift()
29912 SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG); in LowerShift()
29916 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); in LowerShift()
29919 M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG); in LowerShift()
29923 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); in LowerShift()
29926 M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG); in LowerShift()
29930 Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); in LowerShift()
29933 M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG); in LowerShift()
29938 // Decompose 256-bit shifts into 128-bit shifts. in LowerShift()
29939 if (VT.is256BitVector()) in LowerShift()
29942 if (VT == MVT::v32i16 || VT == MVT::v64i8) in LowerShift()
29950 MVT VT = Op.getSimpleValueType(); in LowerFunnelShift() local
29958 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in LowerFunnelShift()
29961 if (VT.isVector()) { in LowerFunnelShift()
29964 unsigned NumElts = VT.getVectorNumElements(); in LowerFunnelShift()
29973 return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, in LowerFunnelShift()
29976 return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT, in LowerFunnelShift()
29979 assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 || in LowerFunnelShift()
29980 VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16 || in LowerFunnelShift()
29981 VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) && in LowerFunnelShift()
29984 // fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw. in LowerFunnelShift()
29985 // fshr(x,y,z) -> unpack(y,x) >> (z & (bw-1))). in LowerFunnelShift()
29991 uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt; in LowerFunnelShift()
29992 uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt); in LowerFunnelShift()
29998 (useVPTERNLOG(Subtarget, VT) && in LowerFunnelShift()
30001 // bit-select - lower using vXi16 shifts and then perform the bitmask at in LowerFunnelShift()
30003 APInt MaskX = APInt::getHighBitsSet(8, 8 - ShXAmt); in LowerFunnelShift()
30004 APInt MaskY = APInt::getLowBitsSet(8, 8 - ShYAmt); in LowerFunnelShift()
30011 ShX = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShX), in LowerFunnelShift()
30012 DAG.getConstant(MaskX, DL, VT)); in LowerFunnelShift()
30013 ShY = DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, ShY), in LowerFunnelShift()
30014 DAG.getConstant(MaskY, DL, VT)); in LowerFunnelShift()
30015 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY); in LowerFunnelShift()
30018 SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, Op0, in LowerFunnelShift()
30019 DAG.getShiftAmountConstant(ShXAmt, VT, DL)); in LowerFunnelShift()
30020 SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Op1, in LowerFunnelShift()
30021 DAG.getShiftAmountConstant(ShYAmt, VT, DL)); in LowerFunnelShift()
30022 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY); in LowerFunnelShift()
30025 SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); in LowerFunnelShift()
30026 SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); in LowerFunnelShift()
30037 // Split 256-bit integers on XOP/pre-AVX2 targets. in LowerFunnelShift()
30038 // Split 512-bit integers on non 512-bit BWI targets. in LowerFunnelShift()
30039 if ((VT.is256BitVector() && ((Subtarget.hasXOP() && EltSizeInBits < 16) || in LowerFunnelShift()
30041 (VT.is512BitVector() && !Subtarget.useBWIRegs() && in LowerFunnelShift()
30043 // Pre-mask the amount modulo using the wider vector. in LowerFunnelShift()
30044 Op = DAG.getNode(Op.getOpcode(), DL, VT, Op0, Op1, AmtMod); in LowerFunnelShift()
30050 int ScalarAmtIdx = -1; in LowerFunnelShift()
30056 SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); in LowerFunnelShift()
30057 SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); in LowerFunnelShift()
30062 return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR); in LowerFunnelShift()
30070 // If per-element shifts are legal, fallback to generic expansion. in LowerFunnelShift()
30071 if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) || Subtarget.hasXOP()) in LowerFunnelShift()
30075 // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw. in LowerFunnelShift()
30076 // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))). in LowerFunnelShift()
30089 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res); in LowerFunnelShift()
30092 // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z) in LowerFunnelShift()
30095 SDValue Z = DAG.getConstant(0, DL, VT); in LowerFunnelShift()
30096 SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0)); in LowerFunnelShift()
30097 SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0)); in LowerFunnelShift()
30098 SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z)); in LowerFunnelShift()
30099 SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z)); in LowerFunnelShift()
30102 return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR); in LowerFunnelShift()
30109 (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && in LowerFunnelShift()
30116 // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw. in LowerFunnelShift()
30117 // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))). in LowerFunnelShift()
30118 if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) && in LowerFunnelShift()
30120 SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType()); in LowerFunnelShift()
30133 return DAG.getZExtOrTrunc(Res, DL, VT); in LowerFunnelShift()
30136 if (VT == MVT::i8 || ExpandFunnel) in LowerFunnelShift()
30140 if (VT == MVT::i16) { in LowerFunnelShift()
30144 return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt); in LowerFunnelShift()
30152 MVT VT = Op.getSimpleValueType(); in LowerRotate() local
30153 assert(VT.isVector() && "Custom lowering only for vector rotates!"); in LowerRotate()
30159 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in LowerRotate()
30160 int NumElts = VT.getVectorNumElements(); in LowerRotate()
30179 return DAG.getNode(RotOpc, DL, VT, R, in LowerRotate()
30183 // Else, fall-back on VPROLV/VPRORV. in LowerRotate()
30187 // AVX512 VBMI2 vXi16 - lower to funnel shifts. in LowerRotate()
30190 return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); in LowerRotate()
30193 SDValue Z = DAG.getConstant(0, DL, VT); in LowerRotate()
30198 if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt})) in LowerRotate()
30199 return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt); in LowerRotate()
30203 return DAG.getNode(ISD::ROTL, DL, VT, R, in LowerRotate()
30204 DAG.getNode(ISD::SUB, DL, VT, Z, Amt)); in LowerRotate()
30208 if (IsCstSplat && Subtarget.hasGFNI() && VT.getScalarType() == MVT::i8 && in LowerRotate()
30209 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { in LowerRotate()
30211 SDValue Mask = getGFNICtrlMask(Opcode, DAG, DL, VT, RotAmt); in LowerRotate()
30212 return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, R, Mask, in LowerRotate()
30216 // Split 256-bit integers on XOP/pre-AVX2 targets. in LowerRotate()
30217 if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2())) in LowerRotate()
30220 // XOP has 128-bit vector variable + immediate rotates. in LowerRotate()
30221 // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL. in LowerRotate()
30225 assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); in LowerRotate()
30230 return DAG.getNode(X86ISD::VROTLI, DL, VT, R, in LowerRotate()
30234 // Use general rotate by variable (per-element). in LowerRotate()
30238 // Rotate by an uniform constant - expand back to shifts. in LowerRotate()
30243 uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt); in LowerRotate()
30244 uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt; in LowerRotate()
30245 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R, in LowerRotate()
30246 DAG.getShiftAmountConstant(ShlAmt, VT, DL)); in LowerRotate()
30247 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R, in LowerRotate()
30248 DAG.getShiftAmountConstant(SrlAmt, VT, DL)); in LowerRotate()
30249 return DAG.getNode(ISD::OR, DL, VT, Shl, Srl); in LowerRotate()
30252 // Split 512-bit integers on non 512-bit BWI targets. in LowerRotate()
30253 if (VT.is512BitVector() && !Subtarget.useBWIRegs()) in LowerRotate()
30257 (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || in LowerRotate()
30258 ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && in LowerRotate()
30260 ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) && in LowerRotate()
30266 SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); in LowerRotate()
30267 SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); in LowerRotate()
30270 // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. in LowerRotate()
30271 // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). in LowerRotate()
30273 int BaseRotAmtIdx = -1; in LowerRotate()
30277 return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); in LowerRotate()
30280 SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); in LowerRotate()
30281 SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); in LowerRotate()
30286 return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); in LowerRotate()
30294 // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. in LowerRotate()
30295 // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). in LowerRotate()
30296 // Const vXi16/vXi32 are excluded in favor of MUL-based lowering. in LowerRotate()
30298 !supportedVectorVarShift(VT, Subtarget, ShiftOpc) && in LowerRotate()
30300 SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); in LowerRotate()
30301 SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); in LowerRotate()
30302 SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z)); in LowerRotate()
30303 SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z)); in LowerRotate()
30306 return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); in LowerRotate()
30317 // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw. in LowerRotate()
30318 // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))). in LowerRotate()
30333 return DAG.getNode(ISD::TRUNCATE, DL, VT, R); in LowerRotate()
30341 V0 = DAG.getBitcast(VT, V0); in LowerRotate()
30342 V1 = DAG.getBitcast(VT, V1); in LowerRotate()
30343 Sel = DAG.getBitcast(VT, Sel); in LowerRotate()
30345 DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1)); in LowerRotate()
30347 // On pre-SSE41 targets we test for the sign bit by comparing to in LowerRotate()
30348 // zero - a negative value will set all bits of the lanes to true in LowerRotate()
30356 if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) { in LowerRotate()
30357 Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); in LowerRotate()
30369 Amt = DAG.getBitcast(VT, Amt); in LowerRotate()
30374 ISD::OR, DL, VT, in LowerRotate()
30375 DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(4, DL, VT)), in LowerRotate()
30376 DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(4, DL, VT))); in LowerRotate()
30377 R = SignBitSelect(VT, Amt, M, R); in LowerRotate()
30380 Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); in LowerRotate()
30384 ISD::OR, DL, VT, in LowerRotate()
30385 DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(2, DL, VT)), in LowerRotate()
30386 DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(6, DL, VT))); in LowerRotate()
30387 R = SignBitSelect(VT, Amt, M, R); in LowerRotate()
30390 Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); in LowerRotate()
30394 ISD::OR, DL, VT, in LowerRotate()
30395 DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(1, DL, VT)), in LowerRotate()
30396 DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(7, DL, VT))); in LowerRotate()
30397 return SignBitSelect(VT, Amt, M, R); in LowerRotate()
30401 bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) && in LowerRotate()
30402 supportedVectorVarShift(VT, Subtarget, ISD::SRL); in LowerRotate()
30405 // Fallback for non-constants AVX2 vXi16 as well. in LowerRotate()
30407 Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); in LowerRotate()
30408 SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); in LowerRotate()
30409 AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); in LowerRotate()
30410 SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt); in LowerRotate()
30411 SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR); in LowerRotate()
30412 return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); in LowerRotate()
30417 Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); in LowerRotate()
30422 Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); in LowerRotate()
30434 SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale); in LowerRotate()
30435 SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale); in LowerRotate()
30436 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); in LowerRotate()
30440 // to v2i64 results at a time. The upper 32-bits contain the wrapped bits in LowerRotate()
30441 // that can then be OR'd with the lower 32-bits. in LowerRotate()
30442 assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected"); in LowerRotate()
30443 static const int OddMask[] = {1, -1, 3, -1}; in LowerRotate()
30444 SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask); in LowerRotate()
30445 SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask); in LowerRotate()
30453 Res02 = DAG.getBitcast(VT, Res02); in LowerRotate()
30454 Res13 = DAG.getBitcast(VT, Res13); in LowerRotate()
30456 return DAG.getNode(ISD::OR, DL, VT, in LowerRotate()
30457 DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}), in LowerRotate()
30458 DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7})); in LowerRotate()
30466 unsigned OpWidth = MemType->getPrimitiveSizeInBits(); in needsCmpXchgNb()
30478 Type *MemType = SI->getValueOperand()->getType(); in shouldExpandAtomicStoreInIR()
30480 if (!SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && in shouldExpandAtomicStoreInIR()
30482 if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && in shouldExpandAtomicStoreInIR()
30486 if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() && in shouldExpandAtomicStoreInIR()
30498 Type *MemType = LI->getType(); in shouldExpandAtomicLoadInIR()
30500 if (!LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && in shouldExpandAtomicLoadInIR()
30502 // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we in shouldExpandAtomicLoadInIR()
30503 // can use movq to do the load. If we have X87 we can load into an 80-bit in shouldExpandAtomicLoadInIR()
30505 if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && in shouldExpandAtomicLoadInIR()
30509 // If this is a 128-bit load with AVX, 128-bit SSE loads/stores are atomic. in shouldExpandAtomicLoadInIR()
30510 if (MemType->getPrimitiveSizeInBits() == 128 && Subtarget.is64Bit() && in shouldExpandAtomicLoadInIR()
30533 if (isPowerOf2_64(C->getZExtValue())) in FindSingleBitChange()
30535 else if (isPowerOf2_64((~C->getValue()).getZExtValue())) in FindSingleBitChange()
30540 // Check if V is some power of 2 pattern known to be non-zero in FindSingleBitChange()
30559 if (I->getOpcode() == Instruction::Shl) { in FindSingleBitChange()
30561 // -X` and some other provable power of 2 patterns that we can use CTZ on in FindSingleBitChange()
30564 // non-zero even where C != 1. Likewise LShr(C, X) and AShr(C, X) may also in FindSingleBitChange()
30565 // be provably a non-zero power of 2. in FindSingleBitChange()
30568 auto *ShiftVal = dyn_cast<ConstantInt>(I->getOperand(0)); in FindSingleBitChange()
30571 if (ShiftVal->equalsInt(1)) in FindSingleBitChange()
30577 Value *BitV = I->getOperand(1); in FindSingleBitChange()
30583 if (*AndC == (I->getType()->getPrimitiveSizeInBits() - 1)) in FindSingleBitChange()
30597 if (AI->use_empty()) in shouldExpandLogicAtomicRMWInIR()
30600 if (AI->getOperation() == AtomicRMWInst::Xor) { in shouldExpandLogicAtomicRMWInIR()
30601 // A ^ SignBit -> A + SignBit. This allows us to use `xadd` which is in shouldExpandLogicAtomicRMWInIR()
30603 if (match(AI->getOperand(1), m_SignMask())) in shouldExpandLogicAtomicRMWInIR()
30609 // Note: InstCombinePass can cause a de-optimization here. It replaces the in shouldExpandLogicAtomicRMWInIR()
30613 Instruction *I = AI->user_back(); in shouldExpandLogicAtomicRMWInIR()
30614 auto BitChange = FindSingleBitChange(AI->getValOperand()); in shouldExpandLogicAtomicRMWInIR()
30615 if (BitChange.second == UndefBit || !AI->hasOneUse() || in shouldExpandLogicAtomicRMWInIR()
30616 I->getOpcode() != Instruction::And || in shouldExpandLogicAtomicRMWInIR()
30617 AI->getType()->getPrimitiveSizeInBits() == 8 || in shouldExpandLogicAtomicRMWInIR()
30618 AI->getParent() != I->getParent()) in shouldExpandLogicAtomicRMWInIR()
30621 unsigned OtherIdx = I->getOperand(0) == AI ? 1 : 0; in shouldExpandLogicAtomicRMWInIR()
30624 if (AI == I->getOperand(OtherIdx)) in shouldExpandLogicAtomicRMWInIR()
30629 auto *C1 = cast<ConstantInt>(AI->getValOperand()); in shouldExpandLogicAtomicRMWInIR()
30630 auto *C2 = dyn_cast<ConstantInt>(I->getOperand(OtherIdx)); in shouldExpandLogicAtomicRMWInIR()
30631 if (!C2 || !isPowerOf2_64(C2->getZExtValue())) { in shouldExpandLogicAtomicRMWInIR()
30634 if (AI->getOperation() == AtomicRMWInst::And) { in shouldExpandLogicAtomicRMWInIR()
30635 return ~C1->getValue() == C2->getValue() in shouldExpandLogicAtomicRMWInIR()
30645 auto BitTested = FindSingleBitChange(I->getOperand(OtherIdx)); in shouldExpandLogicAtomicRMWInIR()
30657 if (AI->getOperation() == AtomicRMWInst::And) in shouldExpandLogicAtomicRMWInIR()
30673 switch (AI->getOperation()) { in emitBitTestAtomicRMWIntrinsic()
30689 Instruction *I = AI->user_back(); in emitBitTestAtomicRMWIntrinsic()
30690 LLVMContext &Ctx = AI->getContext(); in emitBitTestAtomicRMWIntrinsic()
30691 Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), in emitBitTestAtomicRMWIntrinsic()
30695 auto BitTested = FindSingleBitChange(AI->getValOperand()); in emitBitTestAtomicRMWIntrinsic()
30699 auto *C = cast<ConstantInt>(I->getOperand(I->getOperand(0) == AI ? 1 : 0)); in emitBitTestAtomicRMWIntrinsic()
30701 BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType()); in emitBitTestAtomicRMWIntrinsic()
30703 unsigned Imm = llvm::countr_zero(C->getZExtValue()); in emitBitTestAtomicRMWIntrinsic()
30706 BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType()); in emitBitTestAtomicRMWIntrinsic()
30715 unsigned ShiftBits = SI->getType()->getPrimitiveSizeInBits(); in emitBitTestAtomicRMWIntrinsic()
30717 Builder.CreateAnd(SI, Builder.getIntN(ShiftBits, ShiftBits - 1)); in emitBitTestAtomicRMWIntrinsic()
30725 Result = Builder.CreateZExtOrTrunc(Result, AI->getType()); in emitBitTestAtomicRMWIntrinsic()
30727 // If the result is only used for zero/non-zero status then we don't need to in emitBitTestAtomicRMWIntrinsic()
30729 for (auto It = I->user_begin(); It != I->user_end(); ++It) { in emitBitTestAtomicRMWIntrinsic()
30731 if (ICmp->isEquality()) { in emitBitTestAtomicRMWIntrinsic()
30732 auto *C0 = dyn_cast<ConstantInt>(ICmp->getOperand(0)); in emitBitTestAtomicRMWIntrinsic()
30733 auto *C1 = dyn_cast<ConstantInt>(ICmp->getOperand(1)); in emitBitTestAtomicRMWIntrinsic()
30736 if ((C0 ? C0 : C1)->isZero()) in emitBitTestAtomicRMWIntrinsic()
30746 I->replaceAllUsesWith(Result); in emitBitTestAtomicRMWIntrinsic()
30747 I->eraseFromParent(); in emitBitTestAtomicRMWIntrinsic()
30748 AI->eraseFromParent(); in emitBitTestAtomicRMWIntrinsic()
30753 if (!AI->hasOneUse()) in shouldExpandCmpArithRMWInIR()
30756 Value *Op = AI->getOperand(1); in shouldExpandCmpArithRMWInIR()
30758 Instruction *I = AI->user_back(); in shouldExpandCmpArithRMWInIR()
30759 AtomicRMWInst::BinOp Opc = AI->getOperation(); in shouldExpandCmpArithRMWInIR()
30764 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) in shouldExpandCmpArithRMWInIR()
30766 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes()))) in shouldExpandCmpArithRMWInIR()
30775 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) in shouldExpandCmpArithRMWInIR()
30777 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes()))) in shouldExpandCmpArithRMWInIR()
30786 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) in shouldExpandCmpArithRMWInIR()
30789 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes()))) in shouldExpandCmpArithRMWInIR()
30797 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) in shouldExpandCmpArithRMWInIR()
30799 if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_AllOnes()))) in shouldExpandCmpArithRMWInIR()
30813 LLVMContext &Ctx = AI->getContext(); in emitCmpArithAtomicRMWIntrinsic()
30814 ICmpInst *ICI = dyn_cast<ICmpInst>(AI->user_back()); in emitCmpArithAtomicRMWIntrinsic()
30816 TempI = AI->user_back(); in emitCmpArithAtomicRMWIntrinsic()
30817 assert(TempI->hasOneUse() && "Must have one use"); in emitCmpArithAtomicRMWIntrinsic()
30818 ICI = cast<ICmpInst>(TempI->user_back()); in emitCmpArithAtomicRMWIntrinsic()
30821 ICmpInst::Predicate Pred = ICI->getPredicate(); in emitCmpArithAtomicRMWIntrinsic()
30839 switch (AI->getOperation()) { in emitCmpArithAtomicRMWIntrinsic()
30859 Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); in emitCmpArithAtomicRMWIntrinsic()
30860 Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), in emitCmpArithAtomicRMWIntrinsic()
30863 CmpArith, {Addr, AI->getValOperand(), Builder.getInt32((unsigned)CC)}); in emitCmpArithAtomicRMWIntrinsic()
30865 ICI->replaceAllUsesWith(Result); in emitCmpArithAtomicRMWIntrinsic()
30866 ICI->eraseFromParent(); in emitCmpArithAtomicRMWIntrinsic()
30868 TempI->eraseFromParent(); in emitCmpArithAtomicRMWIntrinsic()
30869 AI->eraseFromParent(); in emitCmpArithAtomicRMWIntrinsic()
30875 Type *MemType = AI->getType(); in shouldExpandAtomicRMWInIR()
30879 if (MemType->getPrimitiveSizeInBits() > NativeWidth) { in shouldExpandAtomicRMWInIR()
30884 AtomicRMWInst::BinOp Op = AI->getOperation(); in shouldExpandAtomicRMWInIR()
30912 // These always require a non-trivial set of data operations on x86. We must in shouldExpandAtomicRMWInIR()
30921 Type *MemType = AI->getType(); in lowerIdempotentRMWIntoFencedLoad()
30925 if (MemType->getPrimitiveSizeInBits() > NativeWidth) in lowerIdempotentRMWIntoFencedLoad()
30931 if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand())) in lowerIdempotentRMWIntoFencedLoad()
30932 if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && in lowerIdempotentRMWIntoFencedLoad()
30933 AI->use_empty()) in lowerIdempotentRMWIntoFencedLoad()
30938 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in lowerIdempotentRMWIntoFencedLoad()
30939 auto SSID = AI->getSyncScopeID(); in lowerIdempotentRMWIntoFencedLoad()
30942 auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); in lowerIdempotentRMWIntoFencedLoad()
30945 // http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf showing why a fence in lowerIdempotentRMWIntoFencedLoad()
30967 // different cache-line to prevent cache-line bouncing. In practice it in lowerIdempotentRMWIntoFencedLoad()
30978 AI->getType(), AI->getPointerOperand(), AI->getAlign()); in lowerIdempotentRMWIntoFencedLoad()
30979 Loaded->setAtomic(Order, SSID); in lowerIdempotentRMWIntoFencedLoad()
30980 AI->replaceAllUsesWith(Loaded); in lowerIdempotentRMWIntoFencedLoad()
30981 AI->eraseFromParent(); in lowerIdempotentRMWIntoFencedLoad()
30996 // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual, in emitLockedStackOp()
31007 // c) To minimize concerns about cross thread stack usage - in particular, in emitLockedStackOp()
31009 // captures state in the TOS frame and accesses it from many threads - in emitLockedStackOp()
31014 // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ in emitLockedStackOp()
31018 const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0; in emitLockedStackOp()
31058 // The only fence that needs an instruction is a sequentially-consistent in LowerATOMIC_FENCE()
31059 // cross-thread fence. in LowerATOMIC_FENCE()
31069 // MEMBARRIER is a compiler barrier; it codegens to a no-op. in LowerATOMIC_FENCE()
31097 MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); in LowerCMP_SWAP()
31107 return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), in LowerCMP_SWAP()
31149 assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); in LowerBITCAST()
31161 assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512"); in LowerBITCAST()
31170 SrcVT == MVT::i64) && "Unexpected VT!"); in LowerBITCAST()
31202 /// Compute the horizontal sum of bytes in V for the elements of VT.
31204 /// Requires V to be a byte vector and VT to be an integer vector type with
31205 /// wider elements than V's type. The width of the elements of VT determines
31208 static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, in LowerHorizontalByteSum() argument
31213 MVT EltVT = VT.getVectorElementType(); in LowerHorizontalByteSum()
31218 unsigned VecSize = VT.getSizeInBits(); in LowerHorizontalByteSum()
31227 return DAG.getBitcast(VT, V); in LowerHorizontalByteSum()
31231 // We unpack the low half and high half into i32s interleaved with zeros so in LowerHorizontalByteSum()
31236 SDValue Zeros = DAG.getConstant(0, DL, VT); in LowerHorizontalByteSum()
31237 SDValue V32 = DAG.getBitcast(VT, V); in LowerHorizontalByteSum()
31238 SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); in LowerHorizontalByteSum() local
31239 SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); in LowerHorizontalByteSum()
31244 Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, in LowerHorizontalByteSum()
31245 DAG.getBitcast(ByteVecVT, Low), Zeros); in LowerHorizontalByteSum()
31252 DAG.getBitcast(ShortVecVT, Low), in LowerHorizontalByteSum()
31255 return DAG.getBitcast(VT, V); in LowerHorizontalByteSum()
31265 SDValue ShifterV = DAG.getConstant(8, DL, VT); in LowerHorizontalByteSum()
31266 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV); in LowerHorizontalByteSum()
31269 return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV); in LowerHorizontalByteSum()
31275 MVT VT = Op.getSimpleValueType(); in LowerVectorCTPOPInRegLUT() local
31276 MVT EltVT = VT.getVectorElementType(); in LowerVectorCTPOPInRegLUT()
31277 int NumElts = VT.getVectorNumElements(); in LowerVectorCTPOPInRegLUT()
31282 // http://wm.ite.pl/articles/sse-popcount.html in LowerVectorCTPOPInRegLUT()
31285 // index into a in-register pre-computed pop count table. We then split up the in LowerVectorCTPOPInRegLUT()
31286 // input vector in two new ones: (1) a vector with only the shifted-right in LowerVectorCTPOPInRegLUT()
31289 // to index the in-register table. Next, both are added and the result is a in LowerVectorCTPOPInRegLUT()
31299 SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec); in LowerVectorCTPOPInRegLUT()
31300 SDValue M0F = DAG.getConstant(0x0F, DL, VT); in LowerVectorCTPOPInRegLUT()
31303 SDValue FourV = DAG.getConstant(4, DL, VT); in LowerVectorCTPOPInRegLUT()
31304 SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV); in LowerVectorCTPOPInRegLUT()
31306 // Low nibbles in LowerVectorCTPOPInRegLUT()
31307 SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F); in LowerVectorCTPOPInRegLUT()
31310 // LUT. After counting low and high nibbles, add the vector to obtain the in LowerVectorCTPOPInRegLUT()
31312 SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles); in LowerVectorCTPOPInRegLUT()
31313 SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles); in LowerVectorCTPOPInRegLUT()
31314 return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt); in LowerVectorCTPOPInRegLUT()
31322 MVT VT = Op.getSimpleValueType(); in LowerVectorCTPOP() local
31323 assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) && in LowerVectorCTPOP()
31329 unsigned NumElems = VT.getVectorNumElements(); in LowerVectorCTPOP()
31330 assert((VT.getVectorElementType() == MVT::i8 || in LowerVectorCTPOP()
31331 VT.getVectorElementType() == MVT::i16) && "Unexpected type"); in LowerVectorCTPOP()
31336 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); in LowerVectorCTPOP()
31340 // Decompose 256-bit ops into smaller 128-bit ops. in LowerVectorCTPOP()
31341 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerVectorCTPOP()
31344 // Decompose 512-bit ops into smaller 256-bit ops. in LowerVectorCTPOP()
31345 if (VT.is512BitVector() && !Subtarget.hasBWI()) in LowerVectorCTPOP()
31349 if (VT.getScalarType() != MVT::i8) { in LowerVectorCTPOP()
31350 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); in LowerVectorCTPOP()
31353 return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG); in LowerVectorCTPOP()
31365 MVT VT = N.getSimpleValueType(); in LowerCTPOP() local
31369 if (VT.isScalarInteger()) { in LowerCTPOP()
31377 unsigned ActiveBits = Known.getBitWidth() - LZ; in LowerCTPOP()
31378 unsigned ShiftedActiveBits = Known.getBitWidth() - (LZ + TZ); in LowerCTPOP()
31380 // i2 CTPOP - "ctpop(x) --> sub(x, (x >> 1))". in LowerCTPOP()
31383 Op = DAG.getNode(ISD::SRL, DL, VT, Op, in LowerCTPOP()
31384 DAG.getShiftAmountConstant(TZ, VT, DL)); in LowerCTPOP()
31388 DAG.getShiftAmountConstant(1, VT, DL))); in LowerCTPOP()
31389 return DAG.getZExtOrTrunc(Op, DL, VT); in LowerCTPOP()
31392 // i3 CTPOP - perform LUT into i32 integer. in LowerCTPOP()
31395 Op = DAG.getNode(ISD::SRL, DL, VT, Op, in LowerCTPOP()
31396 DAG.getShiftAmountConstant(TZ, VT, DL)); in LowerCTPOP()
31399 DAG.getShiftAmountConstant(1, VT, DL)); in LowerCTPOP()
31404 return DAG.getZExtOrTrunc(Op, DL, VT); in LowerCTPOP()
31407 // i4 CTPOP - perform LUT into i64 integer. in LowerCTPOP()
31412 Op = DAG.getNode(ISD::SRL, DL, VT, Op, in LowerCTPOP()
31413 DAG.getShiftAmountConstant(TZ, VT, DL)); in LowerCTPOP()
31421 return DAG.getZExtOrTrunc(Op, DL, VT); in LowerCTPOP()
31424 // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply. in LowerCTPOP()
31428 Op = DAG.getNode(ISD::SRL, DL, VT, Op, in LowerCTPOP()
31429 DAG.getShiftAmountConstant(TZ, VT, DL)); in LowerCTPOP()
31439 return DAG.getZExtOrTrunc(Op, DL, VT); in LowerCTPOP()
31445 assert(VT.isVector() && in LowerCTPOP()
31451 MVT VT = Op.getSimpleValueType(); in LowerBITREVERSE_XOP() local
31457 if (!VT.isVector()) { in LowerBITREVERSE_XOP()
31458 MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); in LowerBITREVERSE_XOP()
31461 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res, in LowerBITREVERSE_XOP()
31465 int NumElts = VT.getVectorNumElements(); in LowerBITREVERSE_XOP()
31466 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; in LowerBITREVERSE_XOP()
31468 // Decompose 256-bit ops into smaller 128-bit ops. in LowerBITREVERSE_XOP()
31469 if (VT.is256BitVector()) in LowerBITREVERSE_XOP()
31472 assert(VT.is128BitVector() && in LowerBITREVERSE_XOP()
31473 "Only 128-bit vector bitreverse lowering supported."); in LowerBITREVERSE_XOP()
31481 for (int j = ScalarSizeInBytes - 1; j >= 0; --j) { in LowerBITREVERSE_XOP()
31492 return DAG.getBitcast(VT, Res); in LowerBITREVERSE_XOP()
31497 MVT VT = Op.getSimpleValueType(); in LowerBITREVERSE() local
31499 if (Subtarget.hasXOP() && !VT.is512BitVector()) in LowerBITREVERSE()
31507 // Split 512-bit ops without BWI so that we can still use the PSHUFB lowering. in LowerBITREVERSE()
31508 if (VT.is512BitVector() && !Subtarget.hasBWI()) in LowerBITREVERSE()
31511 // Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2. in LowerBITREVERSE()
31512 if (VT.is256BitVector() && !Subtarget.hasInt256()) in LowerBITREVERSE()
31516 if (!VT.isVector()) { in LowerBITREVERSE()
31518 (VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i16 || VT == MVT::i8) && in LowerBITREVERSE()
31520 MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); in LowerBITREVERSE()
31524 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, in LowerBITREVERSE()
31526 return (VT == MVT::i8) ? Res : DAG.getNode(ISD::BSWAP, DL, VT, Res); in LowerBITREVERSE()
31529 assert(VT.isVector() && VT.getSizeInBits() >= 128); in LowerBITREVERSE()
31532 if (VT.getScalarType() != MVT::i8) { in LowerBITREVERSE()
31533 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); in LowerBITREVERSE()
31534 SDValue Res = DAG.getNode(ISD::BSWAP, DL, VT, In); in LowerBITREVERSE()
31537 return DAG.getBitcast(VT, Res); in LowerBITREVERSE()
31539 assert(VT.isVector() && VT.getScalarType() == MVT::i8 && in LowerBITREVERSE()
31542 unsigned NumElts = VT.getVectorNumElements(); in LowerBITREVERSE()
31546 SDValue Matrix = getGFNICtrlMask(ISD::BITREVERSE, DAG, DL, VT); in LowerBITREVERSE()
31547 return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix, in LowerBITREVERSE()
31553 // 0-15 value (moved to the other nibble). in LowerBITREVERSE()
31554 SDValue NibbleMask = DAG.getConstant(0xF, DL, VT); in LowerBITREVERSE()
31555 SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask); in LowerBITREVERSE()
31556 SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT)); in LowerBITREVERSE()
31575 SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts); in LowerBITREVERSE()
31576 SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts); in LowerBITREVERSE()
31577 Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo); in LowerBITREVERSE()
31578 Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi); in LowerBITREVERSE()
31579 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); in LowerBITREVERSE()
31586 MVT VT = Op.getSimpleValueType(); in LowerPARITY() local
31588 // Special case. If the input fits in 8-bits we can use a single 8-bit TEST. in LowerPARITY()
31589 if (VT == MVT::i8 || in LowerPARITY()
31590 DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { in LowerPARITY()
31597 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); in LowerPARITY()
31604 if (VT == MVT::i64) { in LowerPARITY()
31605 // Xor the high and low 16-bits together using a 32-bit operation. in LowerPARITY()
31613 if (VT != MVT::i16) { in LowerPARITY()
31614 // Xor the high and low 16-bits together using a 32-bit operation. in LowerPARITY()
31619 // If the input is 16-bits, we need to extend to use an i32 shift below. in LowerPARITY()
31623 // Finally xor the low 2 bytes together and use a 8-bit flag setting xor. in LowerPARITY()
31624 // This should allow an h-reg to be used to save a shift. in LowerPARITY()
31635 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); in LowerPARITY()
31641 switch (N->getOpcode()) { in lowerAtomicArithWithLOCK()
31661 MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand(); in lowerAtomicArithWithLOCK()
31665 {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, in lowerAtomicArithWithLOCK()
31666 /*MemVT=*/N->getSimpleValueType(0), MMO); in lowerAtomicArithWithLOCK()
31669 /// Lower atomic_load_ops into LOCK-prefixed operations.
31673 SDValue Chain = N->getOperand(0); in lowerAtomicArith()
31674 SDValue LHS = N->getOperand(1); in lowerAtomicArith()
31675 SDValue RHS = N->getOperand(2); in lowerAtomicArith()
31676 unsigned Opc = N->getOpcode(); in lowerAtomicArith()
31677 MVT VT = N->getSimpleValueType(0); in lowerAtomicArith() local
31683 if (N->hasAnyUseOfValue(0)) { in lowerAtomicArith()
31684 // Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to in lowerAtomicArith()
31690 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, in lowerAtomicArith()
31691 DAG.getNegative(RHS, DL, VT), AN->getMemOperand()); in lowerAtomicArith()
31705 if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS) && !AN->isVolatile()) { in lowerAtomicArith()
31711 if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent && in lowerAtomicArith()
31712 AN->getSyncScopeID() == SyncScope::System) { in lowerAtomicArith()
31717 assert(!N->hasAnyUseOfValue(0)); in lowerAtomicArith()
31719 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), in lowerAtomicArith()
31720 DAG.getUNDEF(VT), NewChain); in lowerAtomicArith()
31722 // MEMBARRIER is a compiler barrier; it codegens to a no-op. in lowerAtomicArith()
31724 assert(!N->hasAnyUseOfValue(0)); in lowerAtomicArith()
31726 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), in lowerAtomicArith()
31727 DAG.getUNDEF(VT), NewChain); in lowerAtomicArith()
31732 assert(!N->hasAnyUseOfValue(0)); in lowerAtomicArith()
31734 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), in lowerAtomicArith()
31735 DAG.getUNDEF(VT), LockOp.getValue(1)); in lowerAtomicArith()
31742 EVT VT = Node->getMemoryVT(); in LowerATOMIC_STORE() local
31745 Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent; in LowerATOMIC_STORE()
31746 bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT); in LowerATOMIC_STORE()
31759 if (VT == MVT::i128 && Subtarget.is64Bit() && Subtarget.hasAVX()) { in LowerATOMIC_STORE()
31760 SDValue VecVal = DAG.getBitcast(MVT::v2i64, Node->getVal()); in LowerATOMIC_STORE()
31761 Chain = DAG.getStore(Node->getChain(), dl, VecVal, Node->getBasePtr(), in LowerATOMIC_STORE()
31762 Node->getMemOperand()); in LowerATOMIC_STORE()
31767 if (VT == MVT::i64) { in LowerATOMIC_STORE()
31770 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Node->getVal()); in LowerATOMIC_STORE()
31774 SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; in LowerATOMIC_STORE()
31776 MVT::i64, Node->getMemOperand()); in LowerATOMIC_STORE()
31778 // First load this into an 80-bit X87 register using a stack temporary. in LowerATOMIC_STORE()
31781 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); in LowerATOMIC_STORE()
31784 Chain = DAG.getStore(Node->getChain(), dl, Node->getVal(), StackPtr, in LowerATOMIC_STORE()
31794 SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()}; in LowerATOMIC_STORE()
31797 StoreOps, MVT::i64, Node->getMemOperand()); in LowerATOMIC_STORE()
31811 // Convert seq_cst store -> xchg in LowerATOMIC_STORE()
31812 // Convert wide store -> swap (-> cmpxchg8b/cmpxchg16b) in LowerATOMIC_STORE()
31813 // FIXME: 16-byte ATOMIC_SWAP isn't actually hooked up at the moment. in LowerATOMIC_STORE()
31814 SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, Node->getMemoryVT(), in LowerATOMIC_STORE()
31815 Node->getOperand(0), Node->getOperand(2), in LowerATOMIC_STORE()
31816 Node->getOperand(1), Node->getMemOperand()); in LowerATOMIC_STORE()
31822 MVT VT = N->getSimpleValueType(0); in LowerADDSUBO_CARRY() local
31826 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) in LowerADDSUBO_CARRY()
31829 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in LowerADDSUBO_CARRY()
31846 if (N->getValueType(1) == MVT::i1) in LowerADDSUBO_CARRY()
31849 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); in LowerADDSUBO_CARRY()
31947 for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) in ExtendToType()
31960 "MGATHER/MSCATTER are supported on AVX-512 arch only"); in LowerMSCATTER()
31963 SDValue Src = N->getValue(); in LowerMSCATTER()
31964 MVT VT = Src.getSimpleValueType(); in LowerMSCATTER() local
31965 assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); in LowerMSCATTER()
31968 SDValue Scale = N->getScale(); in LowerMSCATTER()
31969 SDValue Index = N->getIndex(); in LowerMSCATTER()
31970 SDValue Mask = N->getMask(); in LowerMSCATTER()
31971 SDValue Chain = N->getChain(); in LowerMSCATTER()
31972 SDValue BasePtr = N->getBasePtr(); in LowerMSCATTER()
31974 if (VT == MVT::v2f32 || VT == MVT::v2i32) { in LowerMSCATTER()
31979 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); in LowerMSCATTER()
31980 Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); in LowerMSCATTER()
31984 N->getMemoryVT(), N->getMemOperand()); in LowerMSCATTER()
31996 // If we don't have VLX and neither the passthru or index is 512-bits, we in LowerMSCATTER()
31998 if (!Subtarget.hasVLX() && !VT.is512BitVector() && in LowerMSCATTER()
32000 // Determine how much we need to widen by to get a 512-bit type. in LowerMSCATTER()
32001 unsigned Factor = std::min(512/VT.getSizeInBits(), in LowerMSCATTER()
32003 unsigned NumElts = VT.getVectorNumElements() * Factor; in LowerMSCATTER()
32005 VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); in LowerMSCATTER()
32009 Src = ExtendToType(Src, VT, DAG); in LowerMSCATTER()
32017 N->getMemoryVT(), N->getMemOperand()); in LowerMSCATTER()
32024 MVT VT = Op.getSimpleValueType(); in LowerMLOAD() local
32025 MVT ScalarVT = VT.getScalarType(); in LowerMLOAD()
32026 SDValue Mask = N->getMask(); in LowerMLOAD()
32028 SDValue PassThru = N->getPassThru(); in LowerMLOAD()
32038 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, in LowerMLOAD()
32039 getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), in LowerMLOAD()
32040 N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), in LowerMLOAD()
32041 N->isExpandingLoad()); in LowerMLOAD()
32043 SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); in LowerMLOAD()
32047 assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && in LowerMLOAD()
32048 "Expanding masked load is supported on AVX-512 target only!"); in LowerMLOAD()
32050 assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && in LowerMLOAD()
32051 "Expanding masked load is supported for 32 and 64-bit types only!"); in LowerMLOAD()
32053 assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && in LowerMLOAD()
32063 unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); in LowerMLOAD()
32075 WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, in LowerMLOAD()
32076 PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), in LowerMLOAD()
32077 N->getExtensionType(), N->isExpandingLoad()); in LowerMLOAD()
32080 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), in LowerMLOAD()
32089 SDValue DataToStore = N->getValue(); in LowerMSTORE()
32090 MVT VT = DataToStore.getSimpleValueType(); in LowerMSTORE() local
32091 MVT ScalarVT = VT.getScalarType(); in LowerMSTORE()
32092 SDValue Mask = N->getMask(); in LowerMSTORE()
32095 assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && in LowerMSTORE()
32096 "Expanding masked load is supported on AVX-512 target only!"); in LowerMSTORE()
32098 assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && in LowerMSTORE()
32099 "Expanding masked load is supported for 32 and 64-bit types only!"); in LowerMSTORE()
32101 assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && in LowerMSTORE()
32111 unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); in LowerMSTORE()
32122 return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), in LowerMSTORE()
32123 N->getOffset(), Mask, N->getMemoryVT(), in LowerMSTORE()
32124 N->getMemOperand(), N->getAddressingMode(), in LowerMSTORE()
32125 N->isTruncatingStore(), N->isCompressingStore()); in LowerMSTORE()
32131 "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only"); in LowerMGATHER()
32135 MVT VT = Op.getSimpleValueType(); in LowerMGATHER() local
32136 SDValue Index = N->getIndex(); in LowerMGATHER()
32137 SDValue Mask = N->getMask(); in LowerMGATHER()
32138 SDValue PassThru = N->getPassThru(); in LowerMGATHER()
32141 assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); in LowerMGATHER()
32147 // If we don't have VLX and neither the passthru or index is 512-bits, we in LowerMGATHER()
32149 MVT OrigVT = VT; in LowerMGATHER()
32150 if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && in LowerMGATHER()
32152 // Determine how much we need to widen by to get a 512-bit type. in LowerMGATHER()
32153 unsigned Factor = std::min(512/VT.getSizeInBits(), in LowerMGATHER()
32156 unsigned NumElts = VT.getVectorNumElements() * Factor; in LowerMGATHER()
32158 VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); in LowerMGATHER()
32162 PassThru = ExtendToType(PassThru, VT, DAG); in LowerMGATHER()
32169 PassThru = getZeroVector(VT, Subtarget, DAG, dl); in LowerMGATHER()
32171 SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, in LowerMGATHER()
32172 N->getScale() }; in LowerMGATHER()
32174 X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(), in LowerMGATHER()
32175 N->getMemOperand()); in LowerMGATHER()
32187 unsigned SrcAS = N->getSrcAddressSpace(); in LowerADDRSPACECAST()
32189 assert(SrcAS != N->getDestAddressSpace() && in LowerADDRSPACECAST()
32209 // no-ops in the case of a null GC strategy (or a GC strategy which does not in LowerGC_TRANSITION()
32214 if (Op->getGluedNode()) in LowerGC_TRANSITION()
32215 Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); in LowerGC_TRANSITION()
32224 EVT VT = Op.getValueType(); in LowerCVTPS2PH() local
32228 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in LowerCVTPS2PH()
32232 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in LowerCVTPS2PH()
32239 // We don't support non-data prefetch without PREFETCHI. in LowerPREFETCH()
32259 // sub-string, e.g. "$12" contain "$1" in getInstrStrFromOpNo()
32261 I = AsmStr.size() - OpNoStr1.size(); in getInstrStrFromOpNo()
32316 // -> in visitMaskedLoad()
32337 // -> in visitMaskedStore()
32352 // clang-format off in LowerOperation()
32504 // clang-format on in LowerOperation()
32514 switch (N->getOpcode()) { in ReplaceNodeResults()
32518 N->dump(&DAG); in ReplaceNodeResults()
32522 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32526 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in ReplaceNodeResults()
32529 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in ReplaceNodeResults()
32534 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32538 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); in ReplaceNodeResults()
32540 {N->getOperand(0), Lo}); in ReplaceNodeResults()
32542 {N->getOperand(0), Hi}); in ReplaceNodeResults()
32545 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in ReplaceNodeResults()
32554 assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); in ReplaceNodeResults()
32557 KnownBits Known = DAG.computeKnownBits(N->getOperand(0)); in ReplaceNodeResults()
32561 SDValue Op = DAG.getNode(ISD::SRL, dl, MVT::i64, N->getOperand(0), in ReplaceNodeResults()
32575 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0)); in ReplaceNodeResults()
32577 // Bit count should fit in 32-bits, extract it as that and then zero in ReplaceNodeResults()
32588 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32589 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
32590 VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); in ReplaceNodeResults()
32591 // Pre-promote these to vXi16 to avoid op legalization thinking all 16 in ReplaceNodeResults()
32593 MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); in ReplaceNodeResults()
32594 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); in ReplaceNodeResults()
32595 SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); in ReplaceNodeResults()
32597 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in ReplaceNodeResults()
32598 unsigned NumConcats = 16 / VT.getVectorNumElements(); in ReplaceNodeResults()
32599 SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); in ReplaceNodeResults()
32607 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32608 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
32609 VT == MVT::v2i32 && "Unexpected VT!"); in ReplaceNodeResults()
32610 bool IsSigned = N->getOpcode() == ISD::SMULO; in ReplaceNodeResults()
32612 SDValue Op0 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(0)); in ReplaceNodeResults()
32613 SDValue Op1 = DAG.getNode(ExtOpc, dl, MVT::v2i64, N->getOperand(1)); in ReplaceNodeResults()
32618 Hi = DAG.getVectorShuffle(MVT::v4i32, dl, Hi, Hi, {1, 3, -1, -1}); in ReplaceNodeResults()
32619 Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Hi, in ReplaceNodeResults()
32622 // Truncate the low bits of the result. This will become PSHUFD. in ReplaceNodeResults()
32623 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in ReplaceNodeResults()
32627 // SMULO overflows if the high bits don't match the sign of the low. in ReplaceNodeResults()
32628 HiCmp = DAG.getNode(ISD::SRA, dl, VT, Res, DAG.getConstant(31, dl, VT)); in ReplaceNodeResults()
32630 // UMULO overflows if the high bits are non-zero. in ReplaceNodeResults()
32631 HiCmp = DAG.getConstant(0, dl, VT); in ReplaceNodeResults()
32633 SDValue Ovf = DAG.getSetCC(dl, N->getValueType(1), Hi, HiCmp, ISD::SETNE); in ReplaceNodeResults()
32637 DAG.getUNDEF(VT)); in ReplaceNodeResults()
32646 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32647 EVT InVT = N->getOperand(0).getValueType(); in ReplaceNodeResults()
32648 assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && in ReplaceNodeResults()
32649 "Expected a VT that divides into 128 bits."); in ReplaceNodeResults()
32650 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
32658 VT.getVectorElementType(), in ReplaceNodeResults()
32659 NumConcat * VT.getVectorNumElements()); in ReplaceNodeResults()
32662 Ops[0] = N->getOperand(0); in ReplaceNodeResults()
32664 Ops[0] = N->getOperand(1); in ReplaceNodeResults()
32667 SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); in ReplaceNodeResults()
32676 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32677 assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); in ReplaceNodeResults()
32678 SDValue UNDEF = DAG.getUNDEF(VT); in ReplaceNodeResults()
32680 N->getOperand(0), UNDEF); in ReplaceNodeResults()
32682 N->getOperand(1), UNDEF); in ReplaceNodeResults()
32683 Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); in ReplaceNodeResults()
32690 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32691 if (VT.isVector()) { in ReplaceNodeResults()
32692 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
32696 // TODO: Can we do something for non-splat? in ReplaceNodeResults()
32698 if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) { in ReplaceNodeResults()
32699 unsigned NumConcats = 128 / VT.getSizeInBits(); in ReplaceNodeResults()
32700 SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT)); in ReplaceNodeResults()
32701 Ops0[0] = N->getOperand(0); in ReplaceNodeResults()
32702 EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT); in ReplaceNodeResults()
32705 SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1); in ReplaceNodeResults()
32716 MVT VT = N->getSimpleValueType(0); in ReplaceNodeResults() local
32717 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) in ReplaceNodeResults()
32723 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT(); in ReplaceNodeResults()
32724 SDValue In = N->getOperand(0); in ReplaceNodeResults()
32727 EVT EltVT = VT.getVectorElementType(); in ReplaceNodeResults()
32728 unsigned MinElts = VT.getVectorNumElements(); in ReplaceNodeResults()
32735 matchTruncateWithPACK(PackOpcode, VT, In, dl, DAG, Subtarget)) { in ReplaceNodeResults()
32736 if (SDValue Res = truncateVectorWithPACK(PackOpcode, VT, Src, in ReplaceNodeResults()
32749 SmallVector<int, 16> TruncMask(WidenNumElts, -1); in ReplaceNodeResults()
32772 if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) { in ReplaceNodeResults()
32779 if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && in ReplaceNodeResults()
32791 -1, -1, -1, -1, -1, -1, -1, -1 }); in ReplaceNodeResults()
32814 assert(N->getValueType(0) == MVT::v8i8 && in ReplaceNodeResults()
32819 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32820 SDValue In = N->getOperand(0); in ReplaceNodeResults()
32822 if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && in ReplaceNodeResults()
32826 assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); in ReplaceNodeResults()
32827 // Custom split this so we can extend i8/i16->i32 invec. This is better in ReplaceNodeResults()
32828 // since sign_extend_inreg i8/i16->i64 requires an extend to i32 using in ReplaceNodeResults()
32846 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in ReplaceNodeResults()
32851 if (VT == MVT::v16i32 || VT == MVT::v8i64) { in ReplaceNodeResults()
32863 In = DAG.getNode(N->getOpcode(), dl, InVT, In); in ReplaceNodeResults()
32869 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); in ReplaceNodeResults()
32870 assert(isTypeLegal(LoVT) && "Split VT not legal?"); in ReplaceNodeResults()
32872 SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG); in ReplaceNodeResults()
32882 Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG); in ReplaceNodeResults()
32884 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); in ReplaceNodeResults()
32893 bool IsStrict = N->isStrictFPOpcode(); in ReplaceNodeResults()
32894 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || in ReplaceNodeResults()
32895 N->getOpcode() == ISD::STRICT_FP_TO_SINT; in ReplaceNodeResults()
32896 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
32897 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in ReplaceNodeResults()
32898 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); in ReplaceNodeResults()
32903 EVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; in ReplaceNodeResults()
32906 DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other}, in ReplaceNodeResults()
32911 Res = DAG.getNode(N->getOpcode(), dl, VT, in ReplaceNodeResults()
32921 if (VT.isVector() && Subtarget.hasFP16() && in ReplaceNodeResults()
32923 EVT EleVT = VT.getVectorElementType(); in ReplaceNodeResults()
32938 DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src}); in ReplaceNodeResults()
32965 if (VT.isVector() && VT.getScalarSizeInBits() < 32) { in ReplaceNodeResults()
32966 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
32970 unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); in ReplaceNodeResults()
32972 VT.getVectorNumElements()); in ReplaceNodeResults()
32977 {N->getOperand(0), Src}); in ReplaceNodeResults()
32990 DAG.getValueType(VT.getVectorElementType())); in ReplaceNodeResults()
32997 Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); in ReplaceNodeResults()
33000 unsigned NumConcats = 128 / VT.getSizeInBits(); in ReplaceNodeResults()
33001 MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), in ReplaceNodeResults()
33002 VT.getVectorNumElements() * NumConcats); in ReplaceNodeResults()
33003 SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); in ReplaceNodeResults()
33013 if (VT == MVT::v2i32) { in ReplaceNodeResults()
33017 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
33037 // legalization to v8i32<-v8f64. in ReplaceNodeResults()
33044 Opc = N->getOpcode(); in ReplaceNodeResults()
33050 {N->getOperand(0), Src}); in ReplaceNodeResults()
33061 // Custom widen strict v2f32->v2i32 by padding with zeros. in ReplaceNodeResults()
33066 SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other}, in ReplaceNodeResults()
33067 {N->getOperand(0), Src}); in ReplaceNodeResults()
33078 assert(!VT.isVector() && "Vectors should have been handled above!"); in ReplaceNodeResults()
33080 if ((Subtarget.hasDQI() && VT == MVT::i64 && in ReplaceNodeResults()
33085 // If we use a 128-bit result we might need to use a target specific node. in ReplaceNodeResults()
33090 unsigned Opc = N->getOpcode(); in ReplaceNodeResults()
33105 Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res); in ReplaceNodeResults()
33109 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); in ReplaceNodeResults()
33116 if (VT == MVT::i128 && Subtarget.isTargetWin64()) { in ReplaceNodeResults()
33143 bool IsStrict = N->isStrictFPOpcode(); in ReplaceNodeResults()
33144 bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || in ReplaceNodeResults()
33145 N->getOpcode() == ISD::STRICT_SINT_TO_FP; in ReplaceNodeResults()
33146 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
33147 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in ReplaceNodeResults()
33148 if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() && in ReplaceNodeResults()
33153 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32) in ReplaceNodeResults()
33161 {N->getOperand(0), Src}); in ReplaceNodeResults()
33170 if (VT != MVT::v2f32) in ReplaceNodeResults()
33178 {N->getOperand(0), Src}); in ReplaceNodeResults()
33203 {N->getOperand(0), Elt}); in ReplaceNodeResults()
33220 DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1}); in ReplaceNodeResults()
33235 // Custom widen strict v2i32->v2f32 to avoid scalarization. in ReplaceNodeResults()
33239 SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, in ReplaceNodeResults()
33240 {N->getOperand(0), Src}); in ReplaceNodeResults()
33255 {N->getOperand(0), Or, VBias}); in ReplaceNodeResults()
33262 // TODO: Are there any fast-math-flags to propagate here? in ReplaceNodeResults()
33270 bool IsStrict = N->isStrictFPOpcode(); in ReplaceNodeResults()
33271 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); in ReplaceNodeResults()
33272 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in ReplaceNodeResults()
33273 SDValue Rnd = N->getOperand(IsStrict ? 2 : 1); in ReplaceNodeResults()
33275 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
33277 if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) { in ReplaceNodeResults()
33282 if (!Subtarget.hasFP16() && VT.getVectorElementType() == MVT::f16) { in ReplaceNodeResults()
33300 EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32; in ReplaceNodeResults()
33315 assert(N->getValueType(0) == MVT::v2f32 && in ReplaceNodeResults()
33319 bool IsStrict = N->isStrictFPOpcode(); in ReplaceNodeResults()
33320 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in ReplaceNodeResults()
33328 {N->getOperand(0), V}); in ReplaceNodeResults()
33337 unsigned IntNo = N->getConstantOperandVal(1); in ReplaceNodeResults()
33365 EVT T = N->getValueType(0); in ReplaceNodeResults()
33369 "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); in ReplaceNodeResults()
33373 DAG.SplitScalar(N->getOperand(2), dl, HalfT, HalfT); in ReplaceNodeResults()
33374 cpInL = DAG.getCopyToReg(N->getOperand(0), dl, in ReplaceNodeResults()
33381 DAG.SplitScalar(N->getOperand(3), dl, HalfT, HalfT); in ReplaceNodeResults()
33386 // In 64-bit mode we might need the base pointer in RBX, but we can't know in ReplaceNodeResults()
33391 // live-range. in ReplaceNodeResults()
33394 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); in ReplaceNodeResults()
33396 SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL, in ReplaceNodeResults()
33403 SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), in ReplaceNodeResults()
33420 Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); in ReplaceNodeResults()
33429 (N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) && in ReplaceNodeResults()
33430 "Unexpected VT!"); in ReplaceNodeResults()
33437 if (N->getValueType(0) == MVT::i128) { in ReplaceNodeResults()
33439 SDValue Ld = DAG.getLoad(MVT::v2i64, dl, Node->getChain(), in ReplaceNodeResults()
33440 Node->getBasePtr(), Node->getMemOperand()); in ReplaceNodeResults()
33445 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), in ReplaceNodeResults()
33454 // Then extract the lower 64-bits. in ReplaceNodeResults()
33457 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; in ReplaceNodeResults()
33459 MVT::i64, Node->getMemOperand()); in ReplaceNodeResults()
33468 // then casts to i64. This avoids a 128-bit stack temporary being in ReplaceNodeResults()
33469 // created by type legalization if we were to cast v4f32->v2i64. in ReplaceNodeResults()
33478 // First load this into an 80-bit X87 register. This will put the whole in ReplaceNodeResults()
33481 SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; in ReplaceNodeResults()
33484 Node->getMemOperand()); in ReplaceNodeResults()
33492 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); in ReplaceNodeResults()
33531 EVT DstVT = N->getValueType(0); in ReplaceNodeResults()
33532 EVT SrcVT = N->getOperand(0).getValueType(); in ReplaceNodeResults()
33534 // If this is a bitcast from a v64i1 k-register to a i64 on a 32-bit target in ReplaceNodeResults()
33535 // we can split using the k-register rather than memory. in ReplaceNodeResults()
33537 assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); in ReplaceNodeResults()
33554 N->getOperand(0)); in ReplaceNodeResults()
33563 EVT VT = N->getValueType(0); in ReplaceNodeResults() local
33564 if ((VT == MVT::v2f32 || VT == MVT::v2i32) && in ReplaceNodeResults()
33567 SDValue Index = Gather->getIndex(); in ReplaceNodeResults()
33570 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
33572 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); in ReplaceNodeResults()
33573 SDValue Mask = Gather->getMask(); in ReplaceNodeResults()
33576 Gather->getPassThru(), in ReplaceNodeResults()
33577 DAG.getUNDEF(VT)); in ReplaceNodeResults()
33585 SDValue Ops[] = { Gather->getChain(), PassThru, Mask, in ReplaceNodeResults()
33586 Gather->getBasePtr(), Index, Gather->getScale() }; in ReplaceNodeResults()
33589 Gather->getMemoryVT(), Gather->getMemOperand()); in ReplaceNodeResults()
33598 // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp in ReplaceNodeResults()
33600 MVT VT = N->getSimpleValueType(0); in ReplaceNodeResults() local
33601 assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); in ReplaceNodeResults()
33602 assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && in ReplaceNodeResults()
33608 MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; in ReplaceNodeResults()
33609 SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), in ReplaceNodeResults()
33610 Ld->getPointerInfo(), Ld->getOriginalAlign(), in ReplaceNodeResults()
33611 Ld->getMemOperand()->getFlags()); in ReplaceNodeResults()
33615 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); in ReplaceNodeResults()
33623 SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; in ReplaceNodeResults()
33625 MVT::i64, Ld->getMemOperand()); in ReplaceNodeResults()
33636 assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); in ReplaceNodeResults()
33645 assert(N->getSimpleValueType(0) == MVT::f16 && in ReplaceNodeResults()
33648 SDValue VecOp = N->getOperand(0); in ReplaceNodeResults()
33650 SDValue Split = DAG.getBitcast(ExtVT, N->getOperand(0)); in ReplaceNodeResults()
33652 N->getOperand(1)); in ReplaceNodeResults()
34081 // X86 allows a sign-extended 32-bit immediate field as a displacement. in isLegalAddressingMode()
34097 // If lower 4G is not available, then we must use rip-relative addressing. in isLegalAddressingMode()
34127 unsigned Bits = Ty->getScalarSizeInBits(); in isVectorShiftByScalarCheap()
34151 // These are non-commutative binops. in isBinOp()
34187 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) in isTruncateFree()
34189 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); in isTruncateFree()
34190 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); in isTruncateFree()
34195 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) in allowTruncateForTailCall()
34201 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); in allowTruncateForTailCall()
34230 // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. in isZExtFree()
34231 return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); in isZExtFree()
34235 // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. in isZExtFree()
34256 // X86 has 8, 16, and 32-bit zero-extending loads. in isZExtFree()
34267 FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType()); in shouldSinkOperands()
34271 if (I->getOpcode() == Instruction::Mul && in shouldSinkOperands()
34272 VTy->getElementType()->isIntegerTy(64)) { in shouldSinkOperands()
34273 for (auto &Op : I->operands()) { in shouldSinkOperands()
34275 if (any_of(Ops, [&](Use *U) { return U->get() == Op; })) in shouldSinkOperands()
34283 Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0)); in shouldSinkOperands()
34298 int ShiftAmountOpNum = -1; in shouldSinkOperands()
34299 if (I->isShift()) in shouldSinkOperands()
34302 if (II->getIntrinsicID() == Intrinsic::fshl || in shouldSinkOperands()
34303 II->getIntrinsicID() == Intrinsic::fshr) in shouldSinkOperands()
34307 if (ShiftAmountOpNum == -1) in shouldSinkOperands()
34310 auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum)); in shouldSinkOperands()
34311 if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && in shouldSinkOperands()
34312 isVectorShiftByScalarCheap(I->getType())) { in shouldSinkOperands()
34313 Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); in shouldSinkOperands()
34340 EVT VT) const { in isFMAFasterThanFMulAndFAdd()
34344 VT = VT.getScalarType(); in isFMAFasterThanFMulAndFAdd()
34346 if (!VT.isSimple()) in isFMAFasterThanFMulAndFAdd()
34349 switch (VT.getSimpleVT().SimpleTy) { in isFMAFasterThanFMulAndFAdd()
34368 EVT VT) const { in shouldFoldSelectWithIdentityConstant()
34369 // TODO: This is too general. There are cases where pre-AVX512 codegen would in shouldFoldSelectWithIdentityConstant()
34373 if (!Subtarget.hasVLX() && !VT.is512BitVector()) in shouldFoldSelectWithIdentityConstant()
34375 if (!VT.isVector() || VT.getScalarType() == MVT::i1) in shouldFoldSelectWithIdentityConstant()
34385 bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const { in isShuffleMaskLegal()
34386 if (!VT.isSimple()) in isShuffleMaskLegal()
34390 if (VT.getSimpleVT().getScalarType() == MVT::i1) in isShuffleMaskLegal()
34393 // Very little shuffling can be done for 64-bit vectors right now. in isShuffleMaskLegal()
34394 if (VT.getSimpleVT().getSizeInBits() == 64) in isShuffleMaskLegal()
34399 return isTypeLegal(VT.getSimpleVT()); in isShuffleMaskLegal()
34403 EVT VT) const { in isVectorClearMaskLegal()
34405 // vpblendw and vpshufb for 256-bit vectors are not available on AVX1. in isVectorClearMaskLegal()
34407 if (VT == MVT::v32i8 || VT == MVT::v16i16) in isVectorClearMaskLegal()
34411 return isShuffleMaskLegal(Mask, VT); in isVectorClearMaskLegal()
34426 // zero-extensions. in getPreferredSwitchConditionType()
34433 //===----------------------------------------------------------------------===//
34435 //===----------------------------------------------------------------------===//
34442 for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) { in isEFLAGSLiveAfter()
34452 for (MachineBasicBlock *Succ : BB->successors()) in isEFLAGSLiveAfter()
34453 if (Succ->isLiveIn(X86::EFLAGS)) in isEFLAGSLiveAfter()
34464 const BasicBlock *BB = MBB->getBasicBlock(); in emitXBegin()
34465 MachineFunction::iterator I = ++MBB->getIterator(); in emitXBegin()
34473 // s0 = -1 in emitXBegin()
34483 MachineFunction *MF = MBB->getParent(); in emitXBegin()
34484 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); in emitXBegin()
34485 MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); in emitXBegin()
34486 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); in emitXBegin()
34487 MF->insert(I, mainMBB); in emitXBegin()
34488 MF->insert(I, fallMBB); in emitXBegin()
34489 MF->insert(I, sinkMBB); in emitXBegin()
34492 mainMBB->addLiveIn(X86::EFLAGS); in emitXBegin()
34493 fallMBB->addLiveIn(X86::EFLAGS); in emitXBegin()
34494 sinkMBB->addLiveIn(X86::EFLAGS); in emitXBegin()
34498 sinkMBB->splice(sinkMBB->begin(), MBB, in emitXBegin()
34499 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); in emitXBegin()
34500 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); in emitXBegin()
34502 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitXBegin()
34512 BuildMI(thisMBB, MIMD, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); in emitXBegin()
34513 thisMBB->addSuccessor(mainMBB); in emitXBegin()
34514 thisMBB->addSuccessor(fallMBB); in emitXBegin()
34517 // mainDstReg := -1 in emitXBegin()
34518 BuildMI(mainMBB, MIMD, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); in emitXBegin()
34519 BuildMI(mainMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB); in emitXBegin()
34520 mainMBB->addSuccessor(sinkMBB); in emitXBegin()
34526 BuildMI(fallMBB, MIMD, TII->get(X86::XABORT_DEF)); in emitXBegin()
34527 BuildMI(fallMBB, MIMD, TII->get(TargetOpcode::COPY), fallDstReg) in emitXBegin()
34529 fallMBB->addSuccessor(sinkMBB); in emitXBegin()
34533 BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg) in emitXBegin()
34544 // Emit va_arg instruction on X86-64. in EmitVAARGWithCustomInserter()
34546 // Operands to this pseudo-instruction: in EmitVAARGWithCustomInserter()
34548 // 1-5) Input : va_list address (addr, i64mem) in EmitVAARGWithCustomInserter()
34552 // 9 ) EFLAGS (implicit-def) in EmitVAARGWithCustomInserter()
34567 MachineFunction *MF = MBB->getParent(); in EmitVAARGWithCustomInserter()
34575 MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand( in EmitVAARGWithCustomInserter()
34576 OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore); in EmitVAARGWithCustomInserter()
34577 MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand( in EmitVAARGWithCustomInserter()
34578 OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad); in EmitVAARGWithCustomInserter()
34582 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); in EmitVAARGWithCustomInserter()
34584 getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout())); in EmitVAARGWithCustomInserter()
34643 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); in EmitVAARGWithCustomInserter()
34644 overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitVAARGWithCustomInserter()
34645 offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitVAARGWithCustomInserter()
34646 endMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitVAARGWithCustomInserter()
34648 MachineFunction::iterator MBBIter = ++MBB->getIterator(); in EmitVAARGWithCustomInserter()
34651 MF->insert(MBBIter, offsetMBB); in EmitVAARGWithCustomInserter()
34652 MF->insert(MBBIter, overflowMBB); in EmitVAARGWithCustomInserter()
34653 MF->insert(MBBIter, endMBB); in EmitVAARGWithCustomInserter()
34656 endMBB->splice(endMBB->begin(), thisMBB, in EmitVAARGWithCustomInserter()
34657 std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); in EmitVAARGWithCustomInserter()
34658 endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); in EmitVAARGWithCustomInserter()
34661 thisMBB->addSuccessor(offsetMBB); in EmitVAARGWithCustomInserter()
34662 thisMBB->addSuccessor(overflowMBB); in EmitVAARGWithCustomInserter()
34665 offsetMBB->addSuccessor(endMBB); in EmitVAARGWithCustomInserter()
34666 overflowMBB->addSuccessor(endMBB); in EmitVAARGWithCustomInserter()
34670 BuildMI(thisMBB, MIMD, TII->get(X86::MOV32rm), OffsetReg) in EmitVAARGWithCustomInserter()
34679 BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri)) in EmitVAARGWithCustomInserter()
34681 .addImm(MaxOffset + 8 - ArgSizeA8); in EmitVAARGWithCustomInserter()
34685 BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1)) in EmitVAARGWithCustomInserter()
34697 TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), in EmitVAARGWithCustomInserter()
34707 // Zero-extend the offset in EmitVAARGWithCustomInserter()
34709 BuildMI(offsetMBB, MIMD, TII->get(X86::SUBREG_TO_REG), OffsetReg64) in EmitVAARGWithCustomInserter()
34715 BuildMI(offsetMBB, MIMD, TII->get(X86::ADD64rr), OffsetDestReg) in EmitVAARGWithCustomInserter()
34720 BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32rr), OffsetDestReg) in EmitVAARGWithCustomInserter()
34727 BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg) in EmitVAARGWithCustomInserter()
34732 BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr)) in EmitVAARGWithCustomInserter()
34742 BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1)) in EmitVAARGWithCustomInserter()
34753 TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), in EmitVAARGWithCustomInserter()
34768 // aligned_addr = (addr + (align-1)) & ~(align-1) in EmitVAARGWithCustomInserter()
34771 TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), in EmitVAARGWithCustomInserter()
34774 .addImm(Alignment.value() - 1); in EmitVAARGWithCustomInserter()
34778 TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri), in EmitVAARGWithCustomInserter()
34781 .addImm(~(uint64_t)(Alignment.value() - 1)); in EmitVAARGWithCustomInserter()
34783 BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg) in EmitVAARGWithCustomInserter()
34788 // (the overflow address should be kept 8-byte aligned) in EmitVAARGWithCustomInserter()
34792 TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), in EmitVAARGWithCustomInserter()
34799 TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr)) in EmitVAARGWithCustomInserter()
34810 BuildMI(*endMBB, endMBB->begin(), MIMD, in EmitVAARGWithCustomInserter()
34811 TII->get(X86::PHI), DestReg) in EmitVAARGWithCustomInserter()
34835 SelectItr->addRegisterKilled(X86::EFLAGS, TRI); in checkAndUpdateEFLAGSKill()
34839 // Return true if it is OK for this CMOV pseudo-opcode to be cascaded
34840 // together with other CMOV pseudo-opcodes into a single basic-block with
34885 MachineFunction *MF = TrueMBB->getParent(); in createPHIsForCMOVsInSinkBB()
34886 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); in createPHIsForCMOVsInSinkBB()
34889 X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); in createPHIsForCMOVsInSinkBB()
34892 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); in createPHIsForCMOVsInSinkBB()
34904 Register DestReg = MIIt->getOperand(0).getReg(); in createPHIsForCMOVsInSinkBB()
34905 Register Op1Reg = MIIt->getOperand(1).getReg(); in createPHIsForCMOVsInSinkBB()
34906 Register Op2Reg = MIIt->getOperand(2).getReg(); in createPHIsForCMOVsInSinkBB()
34911 if (MIIt->getOperand(3).getImm() == OppCC) in createPHIsForCMOVsInSinkBB()
34921 BuildMI(*SinkMBB, SinkInsertionPoint, MIMD, TII->get(X86::PHI), DestReg) in createPHIsForCMOVsInSinkBB()
34966 // because this custom-inserter would have generated: in EmitLoweredCascadedSelect()
35013 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); in EmitLoweredCascadedSelect()
35014 MachineFunction *F = ThisMBB->getParent(); in EmitLoweredCascadedSelect()
35015 MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredCascadedSelect()
35016 MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredCascadedSelect()
35017 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredCascadedSelect()
35019 MachineFunction::iterator It = ++ThisMBB->getIterator(); in EmitLoweredCascadedSelect()
35020 F->insert(It, FirstInsertedMBB); in EmitLoweredCascadedSelect()
35021 F->insert(It, SecondInsertedMBB); in EmitLoweredCascadedSelect()
35022 F->insert(It, SinkMBB); in EmitLoweredCascadedSelect()
35027 FirstInsertedMBB->addLiveIn(X86::EFLAGS); in EmitLoweredCascadedSelect()
35034 SecondInsertedMBB->addLiveIn(X86::EFLAGS); in EmitLoweredCascadedSelect()
35035 SinkMBB->addLiveIn(X86::EFLAGS); in EmitLoweredCascadedSelect()
35039 SinkMBB->splice(SinkMBB->begin(), ThisMBB, in EmitLoweredCascadedSelect()
35041 ThisMBB->end()); in EmitLoweredCascadedSelect()
35042 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); in EmitLoweredCascadedSelect()
35045 ThisMBB->addSuccessor(FirstInsertedMBB); in EmitLoweredCascadedSelect()
35047 ThisMBB->addSuccessor(SinkMBB); in EmitLoweredCascadedSelect()
35049 FirstInsertedMBB->addSuccessor(SecondInsertedMBB); in EmitLoweredCascadedSelect()
35051 FirstInsertedMBB->addSuccessor(SinkMBB); in EmitLoweredCascadedSelect()
35053 SecondInsertedMBB->addSuccessor(SinkMBB); in EmitLoweredCascadedSelect()
35057 BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC); in EmitLoweredCascadedSelect()
35061 BuildMI(FirstInsertedMBB, MIMD, TII->get(X86::JCC_1)) in EmitLoweredCascadedSelect()
35071 BuildMI(*SinkMBB, SinkMBB->begin(), MIMD, TII->get(X86::PHI), DestReg) in EmitLoweredCascadedSelect()
35095 // diamond control-flow pattern. The incoming instruction knows the in EmitLoweredSelect()
35104 // fallthrough --> FalseMBB in EmitLoweredSelect()
35106 // This code lowers all pseudo-CMOV instructions. Generally it lowers these in EmitLoweredSelect()
35139 // function - EmitLoweredCascadedSelect. in EmitLoweredSelect()
35153 while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && in EmitLoweredSelect()
35154 (NextMIIt->getOperand(3).getImm() == CC || in EmitLoweredSelect()
35155 NextMIIt->getOperand(3).getImm() == OppCC)) { in EmitLoweredSelect()
35157 NextMIIt = next_nodbg(NextMIIt, ThisMBB->end()); in EmitLoweredSelect()
35163 if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && in EmitLoweredSelect()
35164 NextMIIt->getOpcode() == MI.getOpcode() && in EmitLoweredSelect()
35165 NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && in EmitLoweredSelect()
35166 NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && in EmitLoweredSelect()
35167 NextMIIt->getOperand(1).isKill()) { in EmitLoweredSelect()
35171 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); in EmitLoweredSelect()
35172 MachineFunction *F = ThisMBB->getParent(); in EmitLoweredSelect()
35173 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredSelect()
35174 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredSelect()
35176 MachineFunction::iterator It = ++ThisMBB->getIterator(); in EmitLoweredSelect()
35177 F->insert(It, FalseMBB); in EmitLoweredSelect()
35178 F->insert(It, SinkMBB); in EmitLoweredSelect()
35181 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI); in EmitLoweredSelect()
35182 FalseMBB->setCallFrameSize(CallFrameSize); in EmitLoweredSelect()
35183 SinkMBB->setCallFrameSize(CallFrameSize); in EmitLoweredSelect()
35188 if (!LastCMOV->killsRegister(X86::EFLAGS, /*TRI=*/nullptr) && in EmitLoweredSelect()
35190 FalseMBB->addLiveIn(X86::EFLAGS); in EmitLoweredSelect()
35191 SinkMBB->addLiveIn(X86::EFLAGS); in EmitLoweredSelect()
35199 SinkMBB->push_back(MI.removeFromParent()); in EmitLoweredSelect()
35202 SinkMBB->splice(SinkMBB->end(), ThisMBB, in EmitLoweredSelect()
35204 ThisMBB->end()); in EmitLoweredSelect()
35205 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); in EmitLoweredSelect()
35208 ThisMBB->addSuccessor(FalseMBB); in EmitLoweredSelect()
35210 ThisMBB->addSuccessor(SinkMBB); in EmitLoweredSelect()
35212 FalseMBB->addSuccessor(SinkMBB); in EmitLoweredSelect()
35215 BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); in EmitLoweredSelect()
35226 ThisMBB->erase(MIItBegin, MIItEnd); in EmitLoweredSelect()
35241 MachineFunction *MF = MBB->getParent(); in EmitLoweredProbedAlloca()
35245 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); in EmitLoweredProbedAlloca()
35249 MachineRegisterInfo &MRI = MF->getRegInfo(); in EmitLoweredProbedAlloca()
35250 MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredProbedAlloca()
35251 MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredProbedAlloca()
35252 MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredProbedAlloca()
35254 MachineFunction::iterator MBBIter = ++MBB->getIterator(); in EmitLoweredProbedAlloca()
35255 MF->insert(MBBIter, testMBB); in EmitLoweredProbedAlloca()
35256 MF->insert(MBBIter, blockMBB); in EmitLoweredProbedAlloca()
35257 MF->insert(MBBIter, tailMBB); in EmitLoweredProbedAlloca()
35268 BuildMI(*MBB, {MI}, MIMD, TII->get(TargetOpcode::COPY), TmpStackPtr) in EmitLoweredProbedAlloca()
35272 BuildMI(*MBB, {MI}, MIMD, TII->get(Opc), FinalStackPtr) in EmitLoweredProbedAlloca()
35280 TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) in EmitLoweredProbedAlloca()
35284 BuildMI(testMBB, MIMD, TII->get(X86::JCC_1)) in EmitLoweredProbedAlloca()
35287 testMBB->addSuccessor(blockMBB); in EmitLoweredProbedAlloca()
35288 testMBB->addSuccessor(tailMBB); in EmitLoweredProbedAlloca()
35294 // + ---- <- ------------ <- ------------- <- ------------ + in EmitLoweredProbedAlloca()
35296 …// [free probe] -> [page alloc] -> [alloc probe] -> [tail alloc] + -> [dyn probe] -> [page alloc] … in EmitLoweredProbedAlloca()
35298 … + <- ----------- <- ------------ <- ----------- <- ----… in EmitLoweredProbedAlloca()
35304 addRegOffset(BuildMI(blockMBB, MIMD, TII->get(XORMIOpc)), physSPReg, false, 0) in EmitLoweredProbedAlloca()
35307 BuildMI(blockMBB, MIMD, TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)), in EmitLoweredProbedAlloca()
35312 BuildMI(blockMBB, MIMD, TII->get(X86::JMP_1)).addMBB(testMBB); in EmitLoweredProbedAlloca()
35313 blockMBB->addSuccessor(testMBB); in EmitLoweredProbedAlloca()
35316 BuildMI(tailMBB, MIMD, TII->get(TargetOpcode::COPY), in EmitLoweredProbedAlloca()
35320 tailMBB->splice(tailMBB->end(), MBB, in EmitLoweredProbedAlloca()
35321 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); in EmitLoweredProbedAlloca()
35322 tailMBB->transferSuccessorsAndUpdatePHIs(MBB); in EmitLoweredProbedAlloca()
35323 MBB->addSuccessor(testMBB); in EmitLoweredProbedAlloca()
35335 MachineFunction *MF = BB->getParent(); in EmitLoweredSegAlloca()
35338 const BasicBlock *LLVM_BB = BB->getBasicBlock(); in EmitLoweredSegAlloca()
35340 assert(MF->shouldSplitStack()); in EmitLoweredSegAlloca()
35364 MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredSegAlloca()
35365 MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredSegAlloca()
35366 MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitLoweredSegAlloca()
35368 MachineRegisterInfo &MRI = MF->getRegInfo(); in EmitLoweredSegAlloca()
35370 getRegClassFor(getPointerTy(MF->getDataLayout())); in EmitLoweredSegAlloca()
35380 MachineFunction::iterator MBBIter = ++BB->getIterator(); in EmitLoweredSegAlloca()
35382 MF->insert(MBBIter, bumpMBB); in EmitLoweredSegAlloca()
35383 MF->insert(MBBIter, mallocMBB); in EmitLoweredSegAlloca()
35384 MF->insert(MBBIter, continueMBB); in EmitLoweredSegAlloca()
35386 continueMBB->splice(continueMBB->begin(), BB, in EmitLoweredSegAlloca()
35387 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitLoweredSegAlloca()
35388 continueMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitLoweredSegAlloca()
35392 BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); in EmitLoweredSegAlloca()
35393 BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) in EmitLoweredSegAlloca()
35395 BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) in EmitLoweredSegAlloca()
35398 BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G); in EmitLoweredSegAlloca()
35402 BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg) in EmitLoweredSegAlloca()
35404 BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) in EmitLoweredSegAlloca()
35406 BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB); in EmitLoweredSegAlloca()
35410 Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); in EmitLoweredSegAlloca()
35412 BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI) in EmitLoweredSegAlloca()
35414 BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32)) in EmitLoweredSegAlloca()
35420 BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI) in EmitLoweredSegAlloca()
35422 BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32)) in EmitLoweredSegAlloca()
35428 BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) in EmitLoweredSegAlloca()
35430 BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg); in EmitLoweredSegAlloca()
35431 BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32)) in EmitLoweredSegAlloca()
35438 BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) in EmitLoweredSegAlloca()
35441 BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg) in EmitLoweredSegAlloca()
35443 BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB); in EmitLoweredSegAlloca()
35446 BB->addSuccessor(bumpMBB); in EmitLoweredSegAlloca()
35447 BB->addSuccessor(mallocMBB); in EmitLoweredSegAlloca()
35448 mallocMBB->addSuccessor(continueMBB); in EmitLoweredSegAlloca()
35449 bumpMBB->addSuccessor(continueMBB); in EmitLoweredSegAlloca()
35452 BuildMI(*continueMBB, continueMBB->begin(), MIMD, TII->get(X86::PHI), in EmitLoweredSegAlloca()
35469 MachineFunction *MF = BB->getParent(); in EmitLoweredCatchRet()
35475 classifyEHPersonality(MF->getFunction().getPersonalityFn())) && in EmitLoweredCatchRet()
35478 // Only 32-bit EH needs to worry about manually restoring stack pointers. in EmitLoweredCatchRet()
35485 MF->CreateMachineBasicBlock(BB->getBasicBlock()); in EmitLoweredCatchRet()
35486 assert(BB->succ_size() == 1); in EmitLoweredCatchRet()
35487 MF->insert(std::next(BB->getIterator()), RestoreMBB); in EmitLoweredCatchRet()
35488 RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitLoweredCatchRet()
35489 BB->addSuccessor(RestoreMBB); in EmitLoweredCatchRet()
35494 RestoreMBB->setIsEHPad(true); in EmitLoweredCatchRet()
35496 auto RestoreMBBI = RestoreMBB->begin(); in EmitLoweredCatchRet()
35505 // adjust_stackdown -> TLSADDR -> adjust_stackup. in EmitLoweredTLSAddr()
35507 // inside MC, therefore without the two markers shrink-wrapping in EmitLoweredTLSAddr()
35511 MachineFunction &MF = *BB->getParent(); in EmitLoweredTLSAddr()
35518 BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); in EmitLoweredTLSAddr()
35526 BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); in EmitLoweredTLSAddr()
35535 // our load from the relocation, sticking it in either RDI (x86-64) in EmitLoweredTLSCall()
35538 MachineFunction *F = BB->getParent(); in EmitLoweredTLSCall()
35546 // FIXME: The 32-bit calls have non-standard calling conventions. Use a in EmitLoweredTLSCall()
35550 Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : in EmitLoweredTLSCall()
35551 Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); in EmitLoweredTLSCall()
35554 BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI) in EmitLoweredTLSCall()
35561 MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m)); in EmitLoweredTLSCall()
35566 BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) in EmitLoweredTLSCall()
35573 MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); in EmitLoweredTLSCall()
35578 BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX) in EmitLoweredTLSCall()
35579 .addReg(TII->getGlobalBaseReg(F)) in EmitLoweredTLSCall()
35585 MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m)); in EmitLoweredTLSCall()
35614 // aliases and are doing non-trivial configuration of the thunk's body. For in getIndirectThunkSymbol()
35615 // example, the Linux kernel will do boot-time hot patching of the thunk in getIndirectThunkSymbol()
35621 // LLVM will generate calls to specific thunks, we merely make a best-effort in getIndirectThunkSymbol()
35626 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35629 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35632 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35635 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35638 assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); in getIndirectThunkSymbol()
35646 // When targeting an internal COMDAT thunk use an LLVM-specific name. in getIndirectThunkSymbol()
35649 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35652 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35655 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35658 assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); in getIndirectThunkSymbol()
35661 assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); in getIndirectThunkSymbol()
35668 assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); in getIndirectThunkSymbol()
35684 // Find an available scratch register to hold the callee. On 64-bit, we can in EmitLoweredIndirectThunk()
35686 // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't in EmitLoweredIndirectThunk()
35704 // Choose the first remaining non-zero available register. in EmitLoweredIndirectThunk()
35718 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), AvailableReg) in EmitLoweredIndirectThunk()
35721 MI.setDesc(TII->get(Opc)); in EmitLoweredIndirectThunk()
35722 MachineInstrBuilder(*BB->getParent(), &MI) in EmitLoweredIndirectThunk()
35742 MachineFunction *MF = MBB->getParent(); in emitSetJmpShadowStackFix()
35744 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitSetJmpShadowStackFix()
35752 MVT PVT = getPointerTy(MF->getDataLayout()); in emitSetJmpShadowStackFix()
35756 BuildMI(*MBB, MI, MIMD, TII->get(XorRROpc)) in emitSetJmpShadowStackFix()
35764 BuildMI(*MBB, MI, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); in emitSetJmpShadowStackFix()
35768 MIB = BuildMI(*MBB, MI, MIMD, TII->get(PtrStoreOpc)); in emitSetJmpShadowStackFix()
35785 MachineFunction *MF = MBB->getParent(); in emitEHSjLjSetJmp()
35788 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitEHSjLjSetJmp()
35790 const BasicBlock *BB = MBB->getBasicBlock(); in emitEHSjLjSetJmp()
35791 MachineFunction::iterator I = ++MBB->getIterator(); in emitEHSjLjSetJmp()
35804 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); in emitEHSjLjSetJmp()
35811 MVT PVT = getPointerTy(MF->getDataLayout()); in emitEHSjLjSetJmp()
35818 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB in emitEHSjLjSetJmp()
35832 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); in emitEHSjLjSetJmp()
35833 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); in emitEHSjLjSetJmp()
35834 MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); in emitEHSjLjSetJmp()
35835 MF->insert(I, mainMBB); in emitEHSjLjSetJmp()
35836 MF->insert(I, sinkMBB); in emitEHSjLjSetJmp()
35837 MF->push_back(restoreMBB); in emitEHSjLjSetJmp()
35838 restoreMBB->setMachineBlockAddressTaken(); in emitEHSjLjSetJmp()
35843 sinkMBB->splice(sinkMBB->begin(), MBB, in emitEHSjLjSetJmp()
35844 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); in emitEHSjLjSetJmp()
35845 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); in emitEHSjLjSetJmp()
35851 bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && in emitEHSjLjSetJmp()
35860 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg) in emitEHSjLjSetJmp()
35868 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg) in emitEHSjLjSetJmp()
35869 .addReg(XII->getGlobalBaseReg(MF)) in emitEHSjLjSetJmp()
35878 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc)); in emitEHSjLjSetJmp()
35891 if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) { in emitEHSjLjSetJmp()
35896 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup)) in emitEHSjLjSetJmp()
35900 MIB.addRegMask(RegInfo->getNoPreservedMask()); in emitEHSjLjSetJmp()
35901 thisMBB->addSuccessor(mainMBB); in emitEHSjLjSetJmp()
35902 thisMBB->addSuccessor(restoreMBB); in emitEHSjLjSetJmp()
35906 BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg); in emitEHSjLjSetJmp()
35907 mainMBB->addSuccessor(sinkMBB); in emitEHSjLjSetJmp()
35910 BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg) in emitEHSjLjSetJmp()
35917 if (RegInfo->hasBasePointer(*MF)) { in emitEHSjLjSetJmp()
35920 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); in emitEHSjLjSetJmp()
35921 X86FI->setRestoreBasePointer(MF); in emitEHSjLjSetJmp()
35922 Register FramePtr = RegInfo->getFrameRegister(*MF); in emitEHSjLjSetJmp()
35923 Register BasePtr = RegInfo->getBaseRegister(); in emitEHSjLjSetJmp()
35925 addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr), in emitEHSjLjSetJmp()
35926 FramePtr, true, X86FI->getRestoreBasePointerOffset()) in emitEHSjLjSetJmp()
35929 BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); in emitEHSjLjSetJmp()
35930 BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB); in emitEHSjLjSetJmp()
35931 restoreMBB->addSuccessor(sinkMBB); in emitEHSjLjSetJmp()
35946 MachineFunction *MF = MBB->getParent(); in emitLongJmpShadowStackFix()
35948 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitLongJmpShadowStackFix()
35954 MVT PVT = getPointerTy(MF->getDataLayout()); in emitLongJmpShadowStackFix()
35981 MachineFunction::iterator I = ++MBB->getIterator(); in emitLongJmpShadowStackFix()
35982 const BasicBlock *BB = MBB->getBasicBlock(); in emitLongJmpShadowStackFix()
35984 MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35985 MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35986 MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35987 MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35988 MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35989 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); in emitLongJmpShadowStackFix()
35990 MF->insert(I, checkSspMBB); in emitLongJmpShadowStackFix()
35991 MF->insert(I, fallMBB); in emitLongJmpShadowStackFix()
35992 MF->insert(I, fixShadowMBB); in emitLongJmpShadowStackFix()
35993 MF->insert(I, fixShadowLoopPrepareMBB); in emitLongJmpShadowStackFix()
35994 MF->insert(I, fixShadowLoopMBB); in emitLongJmpShadowStackFix()
35995 MF->insert(I, sinkMBB); in emitLongJmpShadowStackFix()
35998 sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), in emitLongJmpShadowStackFix()
35999 MBB->end()); in emitLongJmpShadowStackFix()
36000 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); in emitLongJmpShadowStackFix()
36002 MBB->addSuccessor(checkSspMBB); in emitLongJmpShadowStackFix()
36006 BuildMI(checkSspMBB, MIMD, TII->get(X86::MOV32r0), ZReg); in emitLongJmpShadowStackFix()
36010 BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg) in emitLongJmpShadowStackFix()
36020 BuildMI(checkSspMBB, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); in emitLongJmpShadowStackFix()
36025 BuildMI(checkSspMBB, MIMD, TII->get(TestRROpc)) in emitLongJmpShadowStackFix()
36028 BuildMI(checkSspMBB, MIMD, TII->get(X86::JCC_1)) in emitLongJmpShadowStackFix()
36031 checkSspMBB->addSuccessor(sinkMBB); in emitLongJmpShadowStackFix()
36032 checkSspMBB->addSuccessor(fallMBB); in emitLongJmpShadowStackFix()
36039 BuildMI(fallMBB, MIMD, TII->get(PtrLoadOpc), PrevSSPReg); in emitLongJmpShadowStackFix()
36055 BuildMI(fallMBB, MIMD, TII->get(SubRROpc), SspSubReg) in emitLongJmpShadowStackFix()
36060 BuildMI(fallMBB, MIMD, TII->get(X86::JCC_1)) in emitLongJmpShadowStackFix()
36063 fallMBB->addSuccessor(sinkMBB); in emitLongJmpShadowStackFix()
36064 fallMBB->addSuccessor(fixShadowMBB); in emitLongJmpShadowStackFix()
36070 BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspFirstShrReg) in emitLongJmpShadowStackFix()
36076 BuildMI(fixShadowMBB, MIMD, TII->get(IncsspOpc)).addReg(SspFirstShrReg); in emitLongJmpShadowStackFix()
36080 BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspSecondShrReg) in emitLongJmpShadowStackFix()
36085 BuildMI(fixShadowMBB, MIMD, TII->get(X86::JCC_1)) in emitLongJmpShadowStackFix()
36088 fixShadowMBB->addSuccessor(sinkMBB); in emitLongJmpShadowStackFix()
36089 fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); in emitLongJmpShadowStackFix()
36094 BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(ShlR1Opc), SspAfterShlReg) in emitLongJmpShadowStackFix()
36101 BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(MovRIOpc), Value128InReg) in emitLongJmpShadowStackFix()
36103 fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); in emitLongJmpShadowStackFix()
36109 BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::PHI), CounterReg) in emitLongJmpShadowStackFix()
36116 BuildMI(fixShadowLoopMBB, MIMD, TII->get(IncsspOpc)).addReg(Value128InReg); in emitLongJmpShadowStackFix()
36120 BuildMI(fixShadowLoopMBB, MIMD, TII->get(DecROpc), DecReg).addReg(CounterReg); in emitLongJmpShadowStackFix()
36123 BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::JCC_1)) in emitLongJmpShadowStackFix()
36126 fixShadowLoopMBB->addSuccessor(sinkMBB); in emitLongJmpShadowStackFix()
36127 fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); in emitLongJmpShadowStackFix()
36136 MachineFunction *MF = MBB->getParent(); in emitEHSjLjLongJmp()
36138 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitEHSjLjLongJmp()
36144 MVT PVT = getPointerTy(MF->getDataLayout()); in emitEHSjLjLongJmp()
36154 Register SP = RegInfo->getStackRegister(); in emitEHSjLjLongJmp()
36167 if (MF->getFunction().getParent()->getModuleFlag("cf-protection-return")) { in emitEHSjLjLongJmp()
36172 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), FP); in emitEHSjLjLongJmp()
36184 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), Tmp); in emitEHSjLjLongJmp()
36198 MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), SP); in emitEHSjLjLongJmp()
36209 BuildMI(*thisMBB, MI, MIMD, TII->get(IJmpOpc)).addReg(Tmp); in emitEHSjLjLongJmp()
36220 MachineFunction *MF = MBB->getParent(); in SetupEntryBlockForSjLj()
36221 MachineRegisterInfo *MRI = &MF->getRegInfo(); in SetupEntryBlockForSjLj()
36224 MVT PVT = getPointerTy(MF->getDataLayout()); in SetupEntryBlockForSjLj()
36230 bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && in SetupEntryBlockForSjLj()
36238 VR = MRI->createVirtualRegister(TRC); in SetupEntryBlockForSjLj()
36242 BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA64r), VR) in SetupEntryBlockForSjLj()
36249 BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA32r), VR) in SetupEntryBlockForSjLj()
36250 .addReg(0) /* TII->getGlobalBaseReg(MF) */ in SetupEntryBlockForSjLj()
36257 MachineInstrBuilder MIB = BuildMI(*MBB, MI, MIMD, TII->get(Op)); in SetupEntryBlockForSjLj()
36269 MachineFunction *MF = BB->getParent(); in EmitSjLjDispatchBlock()
36270 MachineRegisterInfo *MRI = &MF->getRegInfo(); in EmitSjLjDispatchBlock()
36272 int FI = MF->getFrameInfo().getFunctionContextIndex(); in EmitSjLjDispatchBlock()
36292 if (!MF->hasCallSiteLandingPad(Sym)) in EmitSjLjDispatchBlock()
36295 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { in EmitSjLjDispatchBlock()
36309 InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); in EmitSjLjDispatchBlock()
36319 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); in EmitSjLjDispatchBlock()
36320 DispatchBB->setIsEHPad(true); in EmitSjLjDispatchBlock()
36322 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); in EmitSjLjDispatchBlock()
36323 BuildMI(TrapBB, MIMD, TII->get(X86::TRAP)); in EmitSjLjDispatchBlock()
36324 DispatchBB->addSuccessor(TrapBB); in EmitSjLjDispatchBlock()
36326 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); in EmitSjLjDispatchBlock()
36327 DispatchBB->addSuccessor(DispContBB); in EmitSjLjDispatchBlock()
36330 MF->push_back(DispatchBB); in EmitSjLjDispatchBlock()
36331 MF->push_back(DispContBB); in EmitSjLjDispatchBlock()
36332 MF->push_back(TrapBB); in EmitSjLjDispatchBlock()
36340 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); in EmitSjLjDispatchBlock()
36341 unsigned MJTI = JTI->createJumpTableIndex(LPadList); in EmitSjLjDispatchBlock()
36343 const X86RegisterInfo &RI = TII->getRegisterInfo(); in EmitSjLjDispatchBlock()
36349 X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>(); in EmitSjLjDispatchBlock()
36350 MFI->setRestoreBasePointer(MF); in EmitSjLjDispatchBlock()
36355 addRegOffset(BuildMI(DispatchBB, MIMD, TII->get(Op), BP), FP, true, in EmitSjLjDispatchBlock()
36356 MFI->getRestoreBasePointerOffset()) in EmitSjLjDispatchBlock()
36359 BuildMI(DispatchBB, MIMD, TII->get(X86::NOOP)) in EmitSjLjDispatchBlock()
36364 Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); in EmitSjLjDispatchBlock()
36365 addFrameReference(BuildMI(DispatchBB, MIMD, TII->get(X86::MOV32rm), IReg), FI, in EmitSjLjDispatchBlock()
36367 BuildMI(DispatchBB, MIMD, TII->get(X86::CMP32ri)) in EmitSjLjDispatchBlock()
36370 BuildMI(DispatchBB, MIMD, TII->get(X86::JCC_1)) in EmitSjLjDispatchBlock()
36375 Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); in EmitSjLjDispatchBlock()
36376 Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); in EmitSjLjDispatchBlock()
36379 BuildMI(DispContBB, MIMD, TII->get(X86::LEA64r), BReg) in EmitSjLjDispatchBlock()
36386 BuildMI(DispContBB, MIMD, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) in EmitSjLjDispatchBlock()
36394 BuildMI(DispContBB, MIMD, TII->get(X86::JMP64m)) in EmitSjLjDispatchBlock()
36402 Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); in EmitSjLjDispatchBlock()
36403 Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); in EmitSjLjDispatchBlock()
36404 Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); in EmitSjLjDispatchBlock()
36407 BuildMI(DispContBB, MIMD, TII->get(X86::MOV32rm), OReg) in EmitSjLjDispatchBlock()
36414 BuildMI(DispContBB, MIMD, TII->get(X86::MOVSX64rr32), OReg64) in EmitSjLjDispatchBlock()
36417 BuildMI(DispContBB, MIMD, TII->get(X86::ADD64rr), TReg) in EmitSjLjDispatchBlock()
36421 BuildMI(DispContBB, MIMD, TII->get(X86::JMP64r)).addReg(TReg); in EmitSjLjDispatchBlock()
36429 BuildMI(DispContBB, MIMD, TII->get(X86::JMP32m)) in EmitSjLjDispatchBlock()
36441 DispContBB->addSuccessor(LP); in EmitSjLjDispatchBlock()
36445 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); in EmitSjLjDispatchBlock()
36450 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(), in EmitSjLjDispatchBlock()
36451 MBB->succ_rend()); in EmitSjLjDispatchBlock()
36454 if (MBBS->isEHPad()) { in EmitSjLjDispatchBlock()
36455 MBB->removeSuccessor(MBBS); in EmitSjLjDispatchBlock()
36460 MBB->addSuccessor(DispatchBB); in EmitSjLjDispatchBlock()
36462 // Find the invoke call and mark all of the callee-saved registers as in EmitSjLjDispatchBlock()
36486 // Mark all former landing pads as non-landing pads. The dispatch is the only in EmitSjLjDispatchBlock()
36489 LP->setIsEHPad(false); in EmitSjLjDispatchBlock()
36503 MachineFunction &MF = *BB->getParent(); in emitPatchableEventCall()
36510 BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); in emitPatchableEventCall()
36516 BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); in emitPatchableEventCall()
36524 MachineFunction *MF = BB->getParent(); in EmitInstrWithCustomInserter()
36591 MF->getFrameInfo().CreateStackObject(2, Align(2), false); in EmitInstrWithCustomInserter()
36592 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)), in EmitInstrWithCustomInserter()
36596 Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); in EmitInstrWithCustomInserter()
36597 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW), in EmitInstrWithCustomInserter()
36602 Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); in EmitInstrWithCustomInserter()
36603 BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW) in EmitInstrWithCustomInserter()
36609 MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); in EmitInstrWithCustomInserter()
36610 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16) in EmitInstrWithCustomInserter()
36615 MF->getFrameInfo().CreateStackObject(2, Align(2), false); in EmitInstrWithCustomInserter()
36616 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)), in EmitInstrWithCustomInserter()
36621 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)), in EmitInstrWithCustomInserter()
36626 BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80)) in EmitInstrWithCustomInserter()
36631 BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80m32)) in EmitInstrWithCustomInserter()
36642 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)), in EmitInstrWithCustomInserter()
36661 MF->getFrameInfo().CreateStackObject(2, Align(2), false); in EmitInstrWithCustomInserter()
36662 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)), in EmitInstrWithCustomInserter()
36666 Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); in EmitInstrWithCustomInserter()
36667 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW), in EmitInstrWithCustomInserter()
36671 Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); in EmitInstrWithCustomInserter()
36672 BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW) in EmitInstrWithCustomInserter()
36677 MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); in EmitInstrWithCustomInserter()
36678 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16) in EmitInstrWithCustomInserter()
36683 MF->getFrameInfo().CreateStackObject(2, Align(2), false); in EmitInstrWithCustomInserter()
36684 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)), in EmitInstrWithCustomInserter()
36690 TII->get(X86::FLDCW16m)), NewCWFrameIdx); in EmitInstrWithCustomInserter()
36695 // clang-format off in EmitInstrWithCustomInserter()
36706 // clang-format on in EmitInstrWithCustomInserter()
36710 addFullAddress(BuildMI(*BB, MI, MIMD, TII->get(Opc)), AM) in EmitInstrWithCustomInserter()
36714 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)), in EmitInstrWithCustomInserter()
36758 // - which is ESI for i686 - register allocator would not be able to in EmitInstrWithCustomInserter()
36760 // - there never would be enough unreserved registers during regalloc in EmitInstrWithCustomInserter()
36765 // If it is not i686 or there is no base pointer - nothing to do here. in EmitInstrWithCustomInserter()
36766 if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) in EmitInstrWithCustomInserter()
36773 assert(TRI->getBaseRegister() == X86::ESI && in EmitInstrWithCustomInserter()
36777 MachineRegisterInfo &MRI = MF->getRegInfo(); in EmitInstrWithCustomInserter()
36778 MVT SPTy = getPointerTy(MF->getDataLayout()); in EmitInstrWithCustomInserter()
36792 while (RMBBI != BB->rend() && in EmitInstrWithCustomInserter()
36793 (RMBBI->definesRegister(X86::EAX, /*TRI=*/nullptr) || in EmitInstrWithCustomInserter()
36794 RMBBI->definesRegister(X86::EBX, /*TRI=*/nullptr) || in EmitInstrWithCustomInserter()
36795 RMBBI->definesRegister(X86::ECX, /*TRI=*/nullptr) || in EmitInstrWithCustomInserter()
36796 RMBBI->definesRegister(X86::EDX, /*TRI=*/nullptr))) { in EmitInstrWithCustomInserter()
36801 BuildMI(*BB, *MBBI, MIMD, TII->get(X86::LEA32r), computedAddrVReg), AM); in EmitInstrWithCustomInserter()
36809 Register BasePtr = TRI->getBaseRegister(); in EmitInstrWithCustomInserter()
36810 if (TRI->hasBasePointer(*MF) && in EmitInstrWithCustomInserter()
36812 if (!BB->isLiveIn(BasePtr)) in EmitInstrWithCustomInserter()
36813 BB->addLiveIn(BasePtr); in EmitInstrWithCustomInserter()
36816 MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); in EmitInstrWithCustomInserter()
36817 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX) in EmitInstrWithCustomInserter()
36819 Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); in EmitInstrWithCustomInserter()
36821 BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst); in EmitInstrWithCustomInserter()
36828 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::RBX) in EmitInstrWithCustomInserter()
36831 BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B)); in EmitInstrWithCustomInserter()
36840 Register BasePtr = TRI->getBaseRegister(); in EmitInstrWithCustomInserter()
36844 if (!IsRBX || !TRI->hasBasePointer(*MF)) { in EmitInstrWithCustomInserter()
36845 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX) in EmitInstrWithCustomInserter()
36847 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX) in EmitInstrWithCustomInserter()
36849 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EBX) in EmitInstrWithCustomInserter()
36851 BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITXrrr)); in EmitInstrWithCustomInserter()
36854 if (!BB->isLiveIn(BasePtr)) { in EmitInstrWithCustomInserter()
36855 BB->addLiveIn(BasePtr); in EmitInstrWithCustomInserter()
36858 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX) in EmitInstrWithCustomInserter()
36860 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX) in EmitInstrWithCustomInserter()
36862 assert(Subtarget.is64Bit() && "Expected 64-bit mode!"); in EmitInstrWithCustomInserter()
36865 MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); in EmitInstrWithCustomInserter()
36866 BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX) in EmitInstrWithCustomInserter()
36869 Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); in EmitInstrWithCustomInserter()
36870 BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITX_SAVE_RBX)) in EmitInstrWithCustomInserter()
36879 assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); in EmitInstrWithCustomInserter()
36880 auto *MFI = MF->getInfo<X86MachineFunctionInfo>(); in EmitInstrWithCustomInserter()
36881 MFI->setHasPreallocatedCall(true); in EmitInstrWithCustomInserter()
36883 size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId); in EmitInstrWithCustomInserter()
36887 BuildMI(*BB, MI, MIMD, TII->get(X86::SUB32ri), X86::ESP) in EmitInstrWithCustomInserter()
36894 assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); in EmitInstrWithCustomInserter()
36897 auto *MFI = MF->getInfo<X86MachineFunctionInfo>(); in EmitInstrWithCustomInserter()
36898 size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; in EmitInstrWithCustomInserter()
36902 addRegOffset(BuildMI(*BB, MI, MIMD, TII->get(X86::LEA32r), in EmitInstrWithCustomInserter()
36916 // clang-format off in EmitInstrWithCustomInserter()
36924 // clang-format on in EmitInstrWithCustomInserter()
36927 MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc)); in EmitInstrWithCustomInserter()
36938 BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); in EmitInstrWithCustomInserter()
36940 auto *MFI = MF->getInfo<X86MachineFunctionInfo>(); in EmitInstrWithCustomInserter()
36941 MFI->setAMXProgModel(AMXProgModelEnum::DirectReg); in EmitInstrWithCustomInserter()
36945 auto *MFI = MF->getInfo<X86MachineFunctionInfo>(); in EmitInstrWithCustomInserter()
36946 MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA); in EmitInstrWithCustomInserter()
36968 MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc)); in EmitInstrWithCustomInserter()
36976 MIB.add(MI.getOperand(CurOp++)); // index -- stride in EmitInstrWithCustomInserter()
36992 // clang-format off in EmitInstrWithCustomInserter()
36996 // clang-format on in EmitInstrWithCustomInserter()
36998 MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc)); in EmitInstrWithCustomInserter()
37009 //===----------------------------------------------------------------------===//
37011 //===----------------------------------------------------------------------===//
37018 EVT VT = Op.getValueType(); in targetShrinkDemandedConstant() local
37020 unsigned EltSize = VT.getScalarSizeInBits(); in targetShrinkDemandedConstant()
37022 if (VT.isVector()) { in targetShrinkDemandedConstant()
37039 // For vectors - if we have a constant, then try to sign extend. in targetShrinkDemandedConstant()
37042 if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) && in targetShrinkDemandedConstant()
37047 VT.getVectorNumElements()); in targetShrinkDemandedConstant()
37049 TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT, in targetShrinkDemandedConstant()
37052 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC); in targetShrinkDemandedConstant()
37068 const APInt &Mask = C->getAPIntValue(); in targetShrinkDemandedConstant()
37070 // Clear all non-demanded bits initially. in targetShrinkDemandedConstant()
37094 // and non-demanded bits. in targetShrinkDemandedConstant()
37100 SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); in targetShrinkDemandedConstant()
37101 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); in targetShrinkDemandedConstant()
37205 EVT VT = Op.getValueType(); in computeKnownBitsForTargetNode() local
37246 if (ShAmt >= VT.getScalarSizeInBits()) { in computeKnownBitsForTargetNode()
37254 ShAmt = VT.getScalarSizeInBits() - 1; in computeKnownBitsForTargetNode()
37261 // Low bits are known zero. in computeKnownBitsForTargetNode()
37277 getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); in computeKnownBitsForTargetNode()
37303 // PSHUFB is being used as a LUT (ctpop etc.) - the target shuffle handling in computeKnownBitsForTargetNode()
37348 assert(VT.getScalarType() == MVT::i64 && in computeKnownBitsForTargetNode()
37375 assert(VT.getVectorElementType() == MVT::i32 && in computeKnownBitsForTargetNode()
37385 assert(VT.getVectorElementType() == MVT::i16 && in computeKnownBitsForTargetNode()
37419 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); in computeKnownBitsForTargetNode()
37420 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); in computeKnownBitsForTargetNode()
37442 // The result will have at least as many trailing zeros as the non-mask in computeKnownBitsForTargetNode()
37474 // Truncations/Conversions - upper elements are known zero. in computeKnownBitsForTargetNode()
37489 // Strict Conversions - upper elements are known zero. in computeKnownBitsForTargetNode()
37538 switch (Op->getConstantOperandVal(0)) { in computeKnownBitsForTargetNode()
37544 assert(VT.getScalarType() == MVT::i32 && in computeKnownBitsForTargetNode()
37556 assert(VT.getScalarType() == MVT::i16 && in computeKnownBitsForTargetNode()
37568 assert(VT.getScalarType() == MVT::i64 && in computeKnownBitsForTargetNode()
37581 // TODO - use resolveTargetShuffleInputs once we can limit recursive depth. in computeKnownBitsForTargetNode()
37587 unsigned NumElts = VT.getVectorNumElements(); in computeKnownBitsForTargetNode()
37610 if (Ops[OpIdx].getValueType() != VT) { in computeKnownBitsForTargetNode()
37611 // TODO - handle target shuffle ops with different value types. in computeKnownBitsForTargetNode()
37633 EVT VT = Op.getValueType(); in ComputeNumSignBitsForTargetNode() local
37634 unsigned VTBits = VT.getScalarSizeInBits(); in ComputeNumSignBitsForTargetNode()
37648 if (Tmp > (NumSrcBits - VTBits)) in ComputeNumSignBitsForTargetNode()
37649 return Tmp - (NumSrcBits - VTBits); in ComputeNumSignBitsForTargetNode()
37661 auto NumSignBitsPACKSS = [&](SDValue V, const APInt &Elts) -> unsigned { in ComputeNumSignBitsForTargetNode()
37684 if (Tmp > (SrcBits - VTBits)) in ComputeNumSignBitsForTargetNode()
37685 return Tmp - (SrcBits - VTBits); in ComputeNumSignBitsForTargetNode()
37700 return VTBits; // Shifted all bits out --> zero. in ComputeNumSignBitsForTargetNode()
37703 return 1; // Shifted all sign bits out --> unknown. in ComputeNumSignBitsForTargetNode()
37704 return Tmp - ShiftVal.getZExtValue(); in ComputeNumSignBitsForTargetNode()
37710 if (ShiftVal.uge(VTBits - 1)) in ComputeNumSignBitsForTargetNode()
37718 // cmpss/cmpsd return zero/all-bits result values in the bottom element. in ComputeNumSignBitsForTargetNode()
37719 if (VT == MVT::f32 || VT == MVT::f64 || in ComputeNumSignBitsForTargetNode()
37720 ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1)) in ComputeNumSignBitsForTargetNode()
37729 // Vector compares return zero/all-bits result values. in ComputeNumSignBitsForTargetNode()
37750 // TODO - use resolveTargetShuffleInputs once we can limit recursive depth. in ComputeNumSignBitsForTargetNode()
37756 unsigned NumElts = VT.getVectorNumElements(); in ComputeNumSignBitsForTargetNode()
37776 if (Ops[OpIdx].getValueType() != VT) { in ComputeNumSignBitsForTargetNode()
37777 // TODO - handle target shuffle ops with different value types. in ComputeNumSignBitsForTargetNode()
37800 if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) in unwrapAddress()
37801 return N->getOperand(0); in unwrapAddress()
37806 // specified VT and memory VT. Returns SDValue() on failure.
37807 static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, in narrowLoadToVZLoad() argument
37810 if (!LN->isSimple()) in narrowLoadToVZLoad()
37813 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in narrowLoadToVZLoad()
37814 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; in narrowLoadToVZLoad()
37816 LN->getPointerInfo(), LN->getOriginalAlign(), in narrowLoadToVZLoad()
37817 LN->getMemOperand()->getFlags()); in narrowLoadToVZLoad()
37831 // Match against a VZEXT_MOVL vXi32 and vXi16 zero-extending instruction. in matchUnaryShuffle()
37834 if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) || in matchUnaryShuffle()
37836 isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) { in matchUnaryShuffle()
37847 // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). in matchUnaryShuffle()
37865 unsigned Len = Scale - 1; in matchUnaryShuffle()
37891 // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS). in matchUnaryShuffle()
37895 isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { in matchUnaryShuffle()
37926 assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); in matchUnaryShuffle()
37948 "AVX512 required for 512-bit vector shuffles"); in matchUnaryShuffle()
37994 // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). in matchUnaryPermuteShuffle()
38011 // VPERMILPD can permute with a non-repeating shuffle. in matchUnaryPermuteShuffle()
38032 // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). in matchUnaryPermuteShuffle()
38037 // Narrow the repeated mask to create 32-bit element permutes. in matchUnaryPermuteShuffle()
38075 OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); in matchUnaryPermuteShuffle()
38189 // Use (SSE41) PACKUSWD if the leading zerobits goto the lowest 16-bits. in matchBinaryShuffle()
38198 // Use PACKUSBW if the leading zerobits goto the lowest 8-bits. in matchBinaryShuffle()
38205 // Use PACKSSWD if the signbits extend to the lowest 16-bits. in matchBinaryShuffle()
38231 // non-blended source element is zero in each case. in matchBinaryShuffle()
38441 S0 = (SM_SentinelUndef == M0 ? -1 : 0); in matchBinaryPermuteShuffle()
38442 S1 = (SM_SentinelUndef == M1 ? -1 : 1); in matchBinaryPermuteShuffle()
38445 S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); in matchBinaryPermuteShuffle()
38446 S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); in matchBinaryPermuteShuffle()
38449 S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); in matchBinaryPermuteShuffle()
38450 S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); in matchBinaryPermuteShuffle()
38457 int ShufMask[4] = {-1, -1, -1, -1}; in matchBinaryPermuteShuffle()
38494 /// chain of single-use x86 shuffle instructions and accumulated the combined
38516 auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { in combineX86ShuffleChain() argument
38517 if (VT.getSizeInBits() > Op.getValueSizeInBits()) in combineX86ShuffleChain()
38518 Op = widenSubVector(Op, false, Subtarget, DAG, DL, VT.getSizeInBits()); in combineX86ShuffleChain()
38519 else if (VT.getSizeInBits() < Op.getValueSizeInBits()) in combineX86ShuffleChain()
38520 Op = extractSubVector(Op, 0, DAG, DL, VT.getSizeInBits()); in combineX86ShuffleChain()
38521 return DAG.getBitcast(VT, Op); in combineX86ShuffleChain()
38551 // is different from the root element size - this would prevent writemasks in combineX86ShuffleChain()
38555 if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT && in combineX86ShuffleChain()
38556 Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) { in combineX86ShuffleChain()
38573 // See if the shuffle is a hidden identity shuffle - repeated args in HOPs in combineX86ShuffleChain()
38588 // Handle 128/256-bit lane shuffles of 512-bit vectors. in combineX86ShuffleChain()
38594 if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) { in combineX86ShuffleChain()
38615 int PermMask[4] = {-1, -1, -1, -1}; in combineX86ShuffleChain()
38619 assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value"); in combineX86ShuffleChain()
38660 // Handle 128-bit lane shuffles of 256-bit vectors. in combineX86ShuffleChain()
38675 // If we're inserting the low subvector, an insert-subvector 'concat' in combineX86ShuffleChain()
38708 // TODO - handle AVX512VL cases with X86ISD::SHUF128. in combineX86ShuffleChain()
38727 // For masks that have been widened to 128-bit elements or more, in combineX86ShuffleChain()
38728 // narrow back down to 64-bit elements. in combineX86ShuffleChain()
38739 // TODO - variable shuffles might need this to be widened again. in combineX86ShuffleChain()
38779 // Attempt to match against broadcast-from-vector. in combineX86ShuffleChain()
38965 // Don't try to re-form single instruction chains under any circumstances now in combineX86ShuffleChain()
38987 // If we have a single input lane-crossing shuffle then lower to VPERMV. in combineX86ShuffleChain()
38996 // AVX512 variants (non-VLX will pad to 512-bit shuffles). in combineX86ShuffleChain()
39011 // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero in combineX86ShuffleChain()
39012 // vector as the second source (non-VLX will pad to 512-bit shuffles). in combineX86ShuffleChain()
39023 // Adjust shuffle mask - replace SM_SentinelZero with second source index. in combineX86ShuffleChain()
39041 // If we have a dual input lane-crossing shuffle then lower to VPERMV3, in combineX86ShuffleChain()
39042 // (non-VLX will pad to 512-bit shuffles). in combineX86ShuffleChain()
39061 // See if we can combine a single input shuffle with zeros to a bit-mask, in combineX86ShuffleChain()
39089 // the 128-bit lanes use the variable mask to VPERMILPS. in combineX86ShuffleChain()
39106 // With XOP, binary shuffles of 128/256-bit floating point vectors can combine in combineX86ShuffleChain()
39112 // Bits[3] - Match Bit. in combineX86ShuffleChain()
39113 // Bits[2:1] - (Per Lane) PD Shuffle Mask. in combineX86ShuffleChain()
39114 // Bits[2:0] - (Per Lane) PS Shuffle Mask. in combineX86ShuffleChain()
39121 VPerm2Idx.push_back(-1); in combineX86ShuffleChain()
39174 // With XOP, if we have a 128-bit binary input shuffle we can always combine in combineX86ShuffleChain()
39175 // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never in combineX86ShuffleChain()
39180 // Bits[4:0] - Byte Index (0 - 31) in combineX86ShuffleChain()
39181 // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO) in combineX86ShuffleChain()
39214 // (non-VLX will pad to 512-bit shuffles) in combineX86ShuffleChain()
39244 // -->
39286 WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef); in combineX86ShuffleChainWithExtract()
39325 // elements, and shrink them to the half-width mask. It does this in a loop in combineX86ShuffleChainWithExtract()
39413 // the HOP args are pre-shuffled. in canonicalizeShuffleMaskWithHorizOp()
39425 if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode())) in canonicalizeShuffleMaskWithHorizOp()
39439 // shuffle(hop(x,y),hop(z,w)) -> permute(hop(x,z)) etc. in canonicalizeShuffleMaskWithHorizOp()
39459 int PostMask[4] = {-1, -1, -1, -1}; in canonicalizeShuffleMaskWithHorizOp()
39481 SDValue BC1 = BC[BC.size() - 1]; in canonicalizeShuffleMaskWithHorizOp()
39505 M -= NumElts + (SubLane * NumHalfEltsPerLane); in canonicalizeShuffleMaskWithHorizOp()
39519 M -= NumHalfEltsPerLane; in canonicalizeShuffleMaskWithHorizOp()
39522 M -= NumHalfEltsPerLane; in canonicalizeShuffleMaskWithHorizOp()
39552 // If we are post-shuffling a 256-bit hop and not requiring the upper in canonicalizeShuffleMaskWithHorizOp()
39553 // elements, then try to narrow to a 128-bit hop directly. in canonicalizeShuffleMaskWithHorizOp()
39577 static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops, in combineX86ShufflesConstants() argument
39582 unsigned SizeInBits = VT.getSizeInBits(); in combineX86ShufflesConstants()
39602 llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); })) in combineX86ShufflesConstants()
39645 return getZeroVector(VT, Subtarget, DAG, DL); in combineX86ShufflesConstants()
39649 if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) in combineX86ShufflesConstants()
39659 return DAG.getBitcast(VT, CstOp); in combineX86ShufflesConstants()
39674 /// of single-use shuffle instructions, build a generic model of the cumulative
39681 /// special-purpose shuffle.
39697 /// combine-ordering. To fix this, we should do the redundant instruction
39722 EVT VT = Op.getValueType(); in combineX86ShufflesRecursively() local
39723 if (!VT.isVector() || !VT.isSimple()) in combineX86ShufflesRecursively()
39724 return SDValue(); // Bail if we hit a non-simple non-vector. in combineX86ShufflesRecursively()
39727 if (VT.getVectorElementType() == MVT::f16) in combineX86ShufflesRecursively()
39730 assert((RootSizeInBits % VT.getSizeInBits()) == 0 && in combineX86ShufflesRecursively()
39738 OpDemandedElts.setBit(M - BaseIdx); in combineX86ShufflesRecursively()
39740 if (RootSizeInBits != VT.getSizeInBits()) { in combineX86ShufflesRecursively()
39741 // Op is smaller than Root - extract the demanded elts for the subvector. in combineX86ShufflesRecursively()
39742 unsigned Scale = RootSizeInBits / VT.getSizeInBits(); in combineX86ShufflesRecursively()
39746 .extractBits(RootMask.size() - NumOpMaskElts, NumOpMaskElts) in combineX86ShufflesRecursively()
39752 APIntOps::ScaleBitMask(OpDemandedElts, VT.getVectorNumElements()); in combineX86ShufflesRecursively()
39763 if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { in combineX86ShufflesRecursively()
39764 return OpInput.getValueSizeInBits() > VT.getSizeInBits(); in combineX86ShufflesRecursively()
39772 unsigned NumElts = VT.getVectorNumElements(); in combineX86ShufflesRecursively()
39783 if (RootSizeInBits > VT.getSizeInBits()) { in combineX86ShufflesRecursively()
39784 unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits(); in combineX86ShufflesRecursively()
39798 OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef); in combineX86ShufflesRecursively()
39834 auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int { in combineX86ShufflesRecursively()
39840 // Match failed - should we replace an existing Op? in combineX86ShufflesRecursively()
39847 return Ops.size() - 1; in combineX86ShufflesRecursively()
39853 AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1)); in combineX86ShufflesRecursively()
39862 // This function can be performance-critical, so we rely on the power-of-2 in combineX86ShufflesRecursively()
39864 // bit-masks and shifts. in combineX86ShufflesRecursively()
39866 "Non-power-of-2 shuffle mask sizes"); in combineX86ShufflesRecursively()
39868 "Non-power-of-2 shuffle mask sizes"); in combineX86ShufflesRecursively()
39879 assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); in combineX86ShufflesRecursively()
39880 assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); in combineX86ShufflesRecursively()
39881 assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); in combineX86ShufflesRecursively()
39902 : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); in combineX86ShufflesRecursively()
39912 RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); in combineX86ShufflesRecursively()
39921 // Ok, we have non-zero lanes, map them through to one of the Op's inputs. in combineX86ShufflesRecursively()
39924 (RootMaskedIdx & (OpRatio - 1)); in combineX86ShufflesRecursively()
39926 OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); in combineX86ShufflesRecursively()
39987 if (Ops.size() < (MaxDepth - Depth)) { in combineX86ShufflesRecursively()
39996 if (Ops[i].getNode()->hasOneUse() || in combineX86ShufflesRecursively()
40014 // If constant fold failed and we only have constants - then we have in combineX86ShufflesRecursively()
40015 // multiple uses by a single non-variable shuffle - just bail. in combineX86ShufflesRecursively()
40047 int OpEltIdx = MaskElt - Lo; in combineX86ShufflesRecursively()
40059 NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) && in combineX86ShufflesRecursively()
40064 // The Op itself may be of different VT, so we need to scale the mask. in combineX86ShufflesRecursively()
40086 // Reresolve - we might have repeated subvector sources. in combineX86ShufflesRecursively()
40094 // elements, and shrink them to the half-width mask. It does this in a loop in combineX86ShufflesRecursively()
40146 /// Get the PSHUF-style mask from PSHUF node.
40149 /// PSHUF-style masks that can be reused with such instructions.
40151 MVT VT = N.getSimpleValueType(); in getPSHUFShuffleMask() local
40158 // If we have more than 128-bits, only the low 128-bits of shuffle mask in getPSHUFShuffleMask()
40160 if (VT.getSizeInBits() > 128) { in getPSHUFShuffleMask()
40161 int LaneElts = 128 / VT.getScalarSizeInBits(); in getPSHUFShuffleMask()
40163 for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) in getPSHUFShuffleMask()
40165 assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && in getPSHUFShuffleMask()
40166 "Mask doesn't repeat in high 128-bit lanes!"); in getPSHUFShuffleMask()
40180 M -= 4; in getPSHUFShuffleMask()
40197 "Called with something other than an x86 128-bit half shuffle!"); in combineRedundantDWordShuffle()
40199 // Walk up a single-use chain looking for a combinable shuffle. Keep a stack in combineRedundantDWordShuffle()
40219 // Check that the low words (being shuffled) are the identity in the in combineRedundantDWordShuffle()
40220 // dword shuffle, and the high words are self-contained. in combineRedundantDWordShuffle()
40230 // dword shuffle, and the low words are self-contained. in combineRedundantDWordShuffle()
40240 // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword in combineRedundantDWordShuffle()
40246 // Search for a half-shuffle which we can combine with. in combineRedundantDWordShuffle()
40250 !V->isOnlyUserOf(V.getOperand(0).getNode())) in combineRedundantDWordShuffle()
40321 // permilps(shufps(load(),x)) --> permilps(shufps(x,load()))
40322 static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, in combineCommutableSHUFP() argument
40325 if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32) in combineCommutableSHUFP()
40328 // SHUFP(LHS, RHS) -> SHUFP(RHS, LHS) iff LHS is foldable + RHS is not. in combineCommutableSHUFP()
40329 auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) { in combineCommutableSHUFP()
40330 if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode())) in combineCommutableSHUFP()
40340 return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0, in combineCommutableSHUFP()
40348 return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP, in combineCommutableSHUFP()
40358 return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP, in combineCommutableSHUFP()
40361 return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1, in combineCommutableSHUFP()
40364 return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP, in combineCommutableSHUFP()
40374 // Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
40377 combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1, ArrayRef<int> BlendMask, in combineBlendOfPermutes() argument
40384 unsigned NumElts = VT.getVectorNumElements(); in combineBlendOfPermutes()
40444 // the blend mask is the same in the 128-bit subvectors (or can widen to in combineBlendOfPermutes()
40446 if (VT == MVT::v16i16) { in combineBlendOfPermutes()
40447 if (!is128BitLaneRepeatedShuffleMask(VT, NewBlendMask) && in combineBlendOfPermutes()
40454 // Don't introduce lane-crossing permutes without AVX2, unless it can be in combineBlendOfPermutes()
40456 if (VT.is256BitVector() && !Subtarget.hasAVX2() && in combineBlendOfPermutes()
40457 isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), in combineBlendOfPermutes()
40463 DAG.getVectorShuffle(VT, DL, DAG.getBitcast(VT, Ops0[0]), in combineBlendOfPermutes()
40464 DAG.getBitcast(VT, Ops1[0]), NewBlendMask); in combineBlendOfPermutes()
40465 return DAG.getVectorShuffle(VT, DL, NewBlend, DAG.getUNDEF(VT), in combineBlendOfPermutes()
40469 // TODO - move this to TLI like isBinOp?
40480 // Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)).
40481 // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)).
40499 (Op.getOpcode() == Opc && Op->hasOneUse()) || in canonicalizeShuffleWithOp()
40500 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) || in canonicalizeShuffleWithOp()
40501 (FoldShuf && isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) || in canonicalizeShuffleWithOp()
40530 N->isOnlyUserOf(N.getOperand(0).getNode())) { in canonicalizeShuffleWithOp()
40575 if (N->isOnlyUserOf(N.getOperand(0).getNode()) && in canonicalizeShuffleWithOp()
40576 N->isOnlyUserOf(N.getOperand(1).getNode())) { in canonicalizeShuffleWithOp()
40639 /// Attempt to fold vpermf128(op(),op()) -> op(vpermf128(),vpermf128()).
40645 MVT VT = V.getSimpleValueType(); in canonicalizeLaneShuffleWithRepeatedOps() local
40663 return DAG.getBitcast(VT, Res); in canonicalizeLaneShuffleWithRepeatedOps()
40666 // TODO: Handle v4f64 permutes with different low/high lane masks. in canonicalizeLaneShuffleWithRepeatedOps()
40683 return DAG.getBitcast(VT, Res); in canonicalizeLaneShuffleWithRepeatedOps()
40696 MVT VT = N.getSimpleValueType(); in combineTargetShuffle() local
40701 if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) in combineTargetShuffle()
40708 // Turn a 128-bit MOVDDUP of a full vector load into movddup+vzload. in combineTargetShuffle()
40709 if (VT == MVT::v2f64 && Src.hasOneUse() && in combineTargetShuffle()
40730 // TODO - we really need a general SimplifyDemandedVectorElts mechanism. in combineTargetShuffle()
40732 VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { in combineTargetShuffle()
40733 unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); in combineTargetShuffle()
40743 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, in combineTargetShuffle()
40747 // broadcast(bitcast(src)) -> bitcast(broadcast(src)) in combineTargetShuffle()
40748 // 32-bit targets have to bitcast i64 to f64, so better to bitcast upward. in combineTargetShuffle()
40755 VT.getVectorNumElements()); in combineTargetShuffle()
40756 return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); in combineTargetShuffle()
40759 // vbroadcast(bitcast(vbroadcast(src))) -> bitcast(vbroadcast(src)) in combineTargetShuffle()
40760 // If we're re-broadcasting a smaller type then broadcast with that type and in combineTargetShuffle()
40766 (VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits()) == 0 && in combineTargetShuffle()
40767 (VT.getSizeInBits() % BCVT.getSizeInBits()) == 0) { in combineTargetShuffle()
40770 VT.getSizeInBits() / BCVT.getScalarSizeInBits()); in combineTargetShuffle()
40771 return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); in combineTargetShuffle()
40774 // Reduce broadcast source vector to lowest 128-bits. in combineTargetShuffle()
40776 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, in combineTargetShuffle()
40779 // broadcast(scalar_to_vector(x)) -> broadcast(x). in combineTargetShuffle()
40782 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); in combineTargetShuffle()
40784 // broadcast(extract_vector_elt(x, 0)) -> broadcast(x). in combineTargetShuffle()
40790 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); in combineTargetShuffle()
40792 // Share broadcast with the longest vector and extract low subvector (free). in combineTargetShuffle()
40794 for (SDNode *User : Src->uses()) in combineTargetShuffle()
40795 if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && in combineTargetShuffle()
40796 Src == User->getOperand(0) && in combineTargetShuffle()
40797 User->getValueSizeInBits(0).getFixedValue() > in combineTargetShuffle()
40798 VT.getFixedSizeInBits()) { in combineTargetShuffle()
40800 VT.getSizeInBits()); in combineTargetShuffle()
40803 // vbroadcast(scalarload X) -> vbroadcast_load X in combineTargetShuffle()
40805 if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && in combineTargetShuffle()
40808 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40809 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; in combineTargetShuffle()
40812 LN->getMemoryVT(), LN->getMemOperand()); in combineTargetShuffle()
40839 if (LN->isSimple()) { in combineTargetShuffle()
40840 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40841 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; in combineTargetShuffle()
40844 LN->getPointerInfo(), LN->getOriginalAlign(), in combineTargetShuffle()
40845 LN->getMemOperand()->getFlags()); in combineTargetShuffle()
40857 if (LN->getMemoryVT().getSizeInBits() == 16) { in combineTargetShuffle()
40858 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40859 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; in combineTargetShuffle()
40862 LN->getMemoryVT(), LN->getMemOperand()); in combineTargetShuffle()
40881 LN->isSimple()) { in combineTargetShuffle()
40883 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40885 LN->getBasePtr(), TypeSize::getFixed(Offset), DL); in combineTargetShuffle()
40886 SDValue Ops[] = { LN->getChain(), Ptr }; in combineTargetShuffle()
40889 LN->getPointerInfo().getWithOffset(Offset), in combineTargetShuffle()
40890 LN->getOriginalAlign(), in combineTargetShuffle()
40891 LN->getMemOperand()->getFlags()); in combineTargetShuffle()
40900 // vbroadcast(vzload X) -> vbroadcast_load X in combineTargetShuffle()
40903 if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) { in combineTargetShuffle()
40904 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40905 SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; in combineTargetShuffle()
40908 LN->getMemoryVT(), LN->getMemOperand()); in combineTargetShuffle()
40916 // vbroadcast(vector load X) -> vbroadcast_load in combineTargetShuffle()
40922 if (LN->isSimple()) { in combineTargetShuffle()
40923 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40924 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; in combineTargetShuffle()
40927 LN->getPointerInfo(), LN->getOriginalAlign(), in combineTargetShuffle()
40928 LN->getMemOperand()->getFlags()); in combineTargetShuffle()
40946 narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) { in combineTargetShuffle()
40959 if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { in combineTargetShuffle()
40960 SDVTList Tys = DAG.getVTList(VT, MVT::Other); in combineTargetShuffle()
40961 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; in combineTargetShuffle()
40964 LN->getMemoryVT(), LN->getMemOperand()); in combineTargetShuffle()
40982 MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); in combineTargetShuffle()
40985 return DAG.getBitcast(VT, Movl); in combineTargetShuffle()
40991 // vzext_movl (scalar_to_vector C) --> load [C,0...] in combineTargetShuffle()
40994 // Create a vector constant - scalar constant followed by zeros. in combineTargetShuffle()
40997 unsigned NumElts = VT.getVectorNumElements(); in combineTargetShuffle()
41000 ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue()); in combineTargetShuffle()
41007 Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); in combineTargetShuffle()
41008 return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment, in combineTargetShuffle()
41016 // 128-bit scalar_to_vector. This reduces the number of isel patterns. in combineTargetShuffle()
41023 MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), in combineTargetShuffle()
41025 VT.getScalarSizeInBits()); in combineTargetShuffle()
41028 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, in combineTargetShuffle()
41029 getZeroVector(VT, Subtarget, DAG, DL), Movl, in combineTargetShuffle()
41039 unsigned EltBits = VT.getScalarSizeInBits(); in combineTargetShuffle()
41042 // blend(bitcast(x),bitcast(y)) -> bitcast(blend(x,y)) to narrower types. in combineTargetShuffle()
41048 unsigned Size = VT.getVectorNumElements(); in combineTargetShuffle()
41053 VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), in combineTargetShuffle()
41061 // --> m3 = blend(m1,m2) in combineTargetShuffle()
41087 return DAG.getNode(X86ISD::BLENDI, DL, VT, in combineTargetShuffle()
41088 DAG.getBitcast(VT, NewLHS), in combineTargetShuffle()
41089 DAG.getBitcast(VT, NewRHS), N.getOperand(2)); in combineTargetShuffle()
41097 // Fold shufps(shuffle(x),shuffle(y)) -> shufps(x,y). in combineTargetShuffle()
41100 if (VT == MVT::v4f32) { in combineTargetShuffle()
41116 Ops[i] = DAG.getBitcast(VT, SubOps[0]); in combineTargetShuffle()
41125 return DAG.getNode(X86ISD::SHUFP, DL, VT, Ops); in combineTargetShuffle()
41131 // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements. in combineTargetShuffle()
41135 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in combineTargetShuffle()
41141 return DAG.getBitcast(VT, Res); in combineTargetShuffle()
41146 // If we're permuting the upper 256-bits subvectors of a concatenation, then in combineTargetShuffle()
41148 if (VT.is512BitVector()) { in combineTargetShuffle()
41149 // 512-bit mask uses 4 x i2 indices - if the msb is always set then only the in combineTargetShuffle()
41151 SDValue LHS = N->getOperand(0); in combineTargetShuffle()
41152 SDValue RHS = N->getOperand(1); in combineTargetShuffle()
41153 uint64_t Mask = N->getConstantOperandVal(2); in combineTargetShuffle()
41167 return DAG.getNode(X86ISD::SHUF128, DL, VT, NewLHS ? NewLHS : LHS, in combineTargetShuffle()
41174 // Fold vperm2x128(bitcast(x),bitcast(y),c) -> bitcast(vperm2x128(x,y,c)). in combineTargetShuffle()
41175 SDValue LHS = N->getOperand(0); in combineTargetShuffle()
41176 SDValue RHS = N->getOperand(1); in combineTargetShuffle()
41181 return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT, in combineTargetShuffle()
41184 N->getOperand(2))); in combineTargetShuffle()
41188 // Fold vperm2x128(op(),op()) -> op(vperm2x128(),vperm2x128()). in combineTargetShuffle()
41193 // vperm2x128(concat(X,Y),concat(Z,W)) --> concat X,Y etc. in combineTargetShuffle()
41212 MVT SubVT = VT.getHalfNumVectorElementsVT(); in combineTargetShuffle()
41215 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi); in combineTargetShuffle()
41225 if (N0->hasOneUse()) { in combineTargetShuffle()
41237 if (InnerVT.getScalarSizeInBits() <= VT.getScalarSizeInBits()) { in combineTargetShuffle()
41238 SDValue Res = DAG.getNode(Opcode, DL, VT, in combineTargetShuffle()
41239 DAG.getBitcast(VT, V.getOperand(0)), N1); in combineTargetShuffle()
41242 return DAG.getBitcast(VT, Res); in combineTargetShuffle()
41260 // MOVS*(N0, OP(N0, N1)) --> MOVS*(N0, SCALAR_TO_VECTOR(OP(N0[0], N1[0]))) in combineTargetShuffle()
41271 MVT SVT = VT.getVectorElementType(); in combineTargetShuffle()
41276 SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); in combineTargetShuffle()
41277 return DAG.getNode(Opcode, DL, VT, N0, SclVec); in combineTargetShuffle()
41284 assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); in combineTargetShuffle()
41294 return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, in combineTargetShuffle()
41299 return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), in combineTargetShuffle()
41309 // Zero/UNDEF insertion - zero out element and remove dependency. in combineTargetShuffle()
41311 return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), in combineTargetShuffle()
41319 return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, in combineTargetShuffle()
41366 return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, in combineTargetShuffle()
41375 if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { in combineTargetShuffle()
41376 SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), in combineTargetShuffle()
41377 MemIntr->getBasePtr(), in combineTargetShuffle()
41378 MemIntr->getMemOperand()); in combineTargetShuffle()
41379 SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, in combineTargetShuffle()
41380 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, in combineTargetShuffle()
41405 VT.getScalarType(), NVT.getSizeInBits() / VT.getScalarSizeInBits()); in combineTargetShuffle()
41410 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Perm, in combineTargetShuffle()
41420 // Nuke no-op shuffles that show up after combining. in combineTargetShuffle()
41431 assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); in combineTargetShuffle()
41435 // dwords as otherwise it would have been removed as a no-op. in combineTargetShuffle()
41441 MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); in combineTargetShuffle()
41445 return DAG.getBitcast(VT, V); in combineTargetShuffle()
41450 // only works when we have a PSHUFD followed by two half-shuffles. in combineTargetShuffle()
41474 V = DAG.getBitcast(VT, D.getOperand(0)); in combineTargetShuffle()
41477 DL, VT, V, V); in combineTargetShuffle()
41499 int ParitySrc[2] = {-1, -1}; in isAddSubOrSubAddMask()
41538 EVT VT = N->getValueType(0); in isAddSubOrSubAdd() local
41540 if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) || in isAddSubOrSubAdd()
41541 !VT.getSimpleVT().isFloatingPoint()) in isAddSubOrSubAdd()
41544 // We only handle target-independent shuffles. in isAddSubOrSubAdd()
41547 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) in isAddSubOrSubAdd()
41550 SDValue V1 = N->getOperand(0); in isAddSubOrSubAdd()
41551 SDValue V2 = N->getOperand(1); in isAddSubOrSubAdd()
41560 if (!V1->hasOneUse() || !V2->hasOneUse()) in isAddSubOrSubAdd()
41567 LHS = V1->getOperand(0); RHS = V1->getOperand(1); in isAddSubOrSubAdd()
41568 if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && in isAddSubOrSubAdd()
41569 (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) in isAddSubOrSubAdd()
41573 LHS = V2->getOperand(0); RHS = V2->getOperand(1); in isAddSubOrSubAdd()
41574 if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && in isAddSubOrSubAdd()
41575 (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) in isAddSubOrSubAdd()
41579 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); in isAddSubOrSubAdd()
41585 IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD in isAddSubOrSubAdd()
41586 : V2->getOpcode() == ISD::FADD; in isAddSubOrSubAdd()
41597 // We only handle target-independent shuffles. in combineShuffleToFMAddSub()
41600 if (N->getOpcode() != ISD::VECTOR_SHUFFLE) in combineShuffleToFMAddSub()
41603 MVT VT = N->getSimpleValueType(0); in combineShuffleToFMAddSub() local
41605 if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) in combineShuffleToFMAddSub()
41609 SDValue Op0 = N->getOperand(0); in combineShuffleToFMAddSub()
41610 SDValue Op1 = N->getOperand(1); in combineShuffleToFMAddSub()
41622 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); in combineShuffleToFMAddSub()
41630 return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), in combineShuffleToFMAddSub()
41634 /// Try to combine a shuffle into a target-specific add-sub or
41635 /// mul-add-sub node.
41647 MVT VT = N->getSimpleValueType(0); in combineShuffleToAddSubOrFMAddSub() local
41653 return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); in combineShuffleToAddSubOrFMAddSub()
41659 // Do not generate X86ISD::ADDSUB node for 512-bit types even though in combineShuffleToAddSubOrFMAddSub()
41661 // X86 targets with 512-bit ADDSUB instructions! in combineShuffleToAddSubOrFMAddSub()
41662 if (VT.is512BitVector()) in combineShuffleToAddSubOrFMAddSub()
41668 if (VT.getVectorElementType() == MVT::f16) in combineShuffleToAddSubOrFMAddSub()
41671 return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); in combineShuffleToAddSubOrFMAddSub()
41676 // if we can express this as a single-source shuffle, that's preferable.
41683 EVT VT = N->getValueType(0); in combineShuffleOfConcatUndef() local
41685 // We only care about shuffles of 128/256-bit vectors of 32/64-bit values. in combineShuffleOfConcatUndef()
41686 if (!VT.is128BitVector() && !VT.is256BitVector()) in combineShuffleOfConcatUndef()
41689 if (VT.getVectorElementType() != MVT::i32 && in combineShuffleOfConcatUndef()
41690 VT.getVectorElementType() != MVT::i64 && in combineShuffleOfConcatUndef()
41691 VT.getVectorElementType() != MVT::f32 && in combineShuffleOfConcatUndef()
41692 VT.getVectorElementType() != MVT::f64) in combineShuffleOfConcatUndef()
41695 SDValue N0 = N->getOperand(0); in combineShuffleOfConcatUndef()
41696 SDValue N1 = N->getOperand(1); in combineShuffleOfConcatUndef()
41708 int NumElts = VT.getVectorNumElements(); in combineShuffleOfConcatUndef()
41711 for (int Elt : SVOp->getMask()) in combineShuffleOfConcatUndef()
41712 Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); in combineShuffleOfConcatUndef()
41714 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), in combineShuffleOfConcatUndef()
41716 return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); in combineShuffleOfConcatUndef()
41720 /// low half of each source vector and does not set any high half elements in
41723 EVT VT = Shuf->getValueType(0); in narrowShuffle() local
41724 if (!DAG.getTargetLoweringInfo().isTypeLegal(Shuf->getValueType(0))) in narrowShuffle()
41726 if (!VT.is256BitVector() && !VT.is512BitVector()) in narrowShuffle()
41730 ArrayRef<int> Mask = Shuf->getMask(); in narrowShuffle()
41734 // Check if the shuffle mask accesses only the low half of each input vector in narrowShuffle()
41735 // (half-index output is 0 or 2). in narrowShuffle()
41742 // Create a half-width shuffle to replace the unnecessarily wide shuffle. in narrowShuffle()
41744 // subregister (zmm<->ymm or ymm<->xmm) ops. That leaves us with a shuffle in narrowShuffle()
41747 return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), in narrowShuffle()
41748 Shuf->getOperand(1), HalfMask, HalfIdx1, in narrowShuffle()
41762 EVT VT = N->getValueType(0); in combineShuffle() local
41764 if (TLI.isTypeLegal(VT) && !isSoftF16(VT, Subtarget)) in combineShuffle()
41771 VT, SDValue(N, 0), dl, DAG, Subtarget, /*IsAfterLegalize*/ true)) in combineShuffle()
41783 if (isTargetShuffle(N->getOpcode())) { in combineShuffle()
41789 // instructions into higher-order shuffles. We do this after combining in combineShuffle()
41797 // TODO - merge this into combineX86ShufflesRecursively. in combineShuffle()
41798 APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); in combineShuffle()
41802 // Canonicalize SHUFFLE(UNARYOP(X)) -> UNARYOP(SHUFFLE(X)). in combineShuffle()
41803 // Canonicalize SHUFFLE(BINOP(X,Y)) -> BINOP(SHUFFLE(X),SHUFFLE(Y)). in combineShuffle()
41838 if (!Load || !Load->getBasePtr().hasOneUse()) in SimplifyDemandedVectorEltsForTargetShuffle()
41845 Type *CTy = C->getType(); in SimplifyDemandedVectorEltsForTargetShuffle()
41846 if (!CTy->isVectorTy() || in SimplifyDemandedVectorEltsForTargetShuffle()
41847 CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits()) in SimplifyDemandedVectorEltsForTargetShuffle()
41850 // Handle scaling for i64 elements on 32-bit targets. in SimplifyDemandedVectorEltsForTargetShuffle()
41851 unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements(); in SimplifyDemandedVectorEltsForTargetShuffle()
41860 Constant *Elt = C->getAggregateElement(i); in SimplifyDemandedVectorEltsForTargetShuffle()
41862 ConstVecOps.push_back(UndefValue::get(Elt->getType())); in SimplifyDemandedVectorEltsForTargetShuffle()
41878 Load->getAlign()); in SimplifyDemandedVectorEltsForTargetShuffle()
41887 EVT VT = Op.getValueType(); in SimplifyDemandedVectorEltsForTargetNode() local
41938 assert(VT.getScalarType() == MVT::i64 && in SimplifyDemandedVectorEltsForTargetNode()
41955 Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS)); in SimplifyDemandedVectorEltsForTargetNode()
41963 // We only need the bottom 64-bits of the (128-bit) shift amount. in SimplifyDemandedVectorEltsForTargetNode()
41969 // only the bottom 64-bits are only ever used. in SimplifyDemandedVectorEltsForTargetNode()
41970 bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) { in SimplifyDemandedVectorEltsForTargetNode()
41971 unsigned UseOpc = Use->getOpcode(); in SimplifyDemandedVectorEltsForTargetNode()
41974 Use->getOperand(0) != Amt; in SimplifyDemandedVectorEltsForTargetNode()
41994 // Fold shift(0,x) -> 0 in SimplifyDemandedVectorEltsForTargetNode()
41997 Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); in SimplifyDemandedVectorEltsForTargetNode()
42004 Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1))); in SimplifyDemandedVectorEltsForTargetNode()
42020 // Fold shift(0,x) -> 0 in SimplifyDemandedVectorEltsForTargetNode()
42023 Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); in SimplifyDemandedVectorEltsForTargetNode()
42049 assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); in SimplifyDemandedVectorEltsForTargetNode()
42050 unsigned ShiftAmt = Amt->getZExtValue(); in SimplifyDemandedVectorEltsForTargetNode()
42062 int Diff = ShiftAmt - C1; in SimplifyDemandedVectorEltsForTargetNode()
42064 Diff = -Diff; in SimplifyDemandedVectorEltsForTargetNode()
42071 Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); in SimplifyDemandedVectorEltsForTargetNode()
42088 assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); in SimplifyDemandedVectorEltsForTargetNode()
42089 unsigned ShiftAmt = Amt->getZExtValue(); in SimplifyDemandedVectorEltsForTargetNode()
42101 int Diff = ShiftAmt - C1; in SimplifyDemandedVectorEltsForTargetNode()
42103 Diff = -Diff; in SimplifyDemandedVectorEltsForTargetNode()
42110 Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); in SimplifyDemandedVectorEltsForTargetNode()
42132 int NumElts = VT.getVectorNumElements(); in SimplifyDemandedVectorEltsForTargetNode()
42133 int EltSizeInBits = VT.getScalarSizeInBits(); in SimplifyDemandedVectorEltsForTargetNode()
42144 // We can't assume an undef src element gives an undef dst - the in SimplifyDemandedVectorEltsForTargetNode()
42180 Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS)); in SimplifyDemandedVectorEltsForTargetNode()
42204 getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); in SimplifyDemandedVectorEltsForTargetNode()
42215 // TODO - pass on known zero/undef. in SimplifyDemandedVectorEltsForTargetNode()
42218 // TODO - we should do this for all target/faux shuffles ops. in SimplifyDemandedVectorEltsForTargetNode()
42228 TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); in SimplifyDemandedVectorEltsForTargetNode()
42241 getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); in SimplifyDemandedVectorEltsForTargetNode()
42252 // TODO - pass on known zero/undef. in SimplifyDemandedVectorEltsForTargetNode()
42265 TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); in SimplifyDemandedVectorEltsForTargetNode()
42288 VT.getSimpleVT(), Op.getOperand(0), Op.getOperand(1), BlendMask, in SimplifyDemandedVectorEltsForTargetNode()
42326 MVT SVT = VT.getSimpleVT().getVectorElementType(); in SimplifyDemandedVectorEltsForTargetNode()
42330 SDValue Elt = TLO.DAG.getLoad(SVT, DL, Mem->getChain(), Mem->getBasePtr(), in SimplifyDemandedVectorEltsForTargetNode()
42331 Mem->getMemOperand()); in SimplifyDemandedVectorEltsForTargetNode()
42332 SDValue Vec = TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Elt); in SimplifyDemandedVectorEltsForTargetNode()
42333 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Vec)); in SimplifyDemandedVectorEltsForTargetNode()
42344 if (Src.getValueType() != VT) in SimplifyDemandedVectorEltsForTargetNode()
42345 Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG, in SimplifyDemandedVectorEltsForTargetNode()
42355 // TODO - we should do this for all target/faux shuffles ops. in SimplifyDemandedVectorEltsForTargetNode()
42358 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); in SimplifyDemandedVectorEltsForTargetNode()
42381 // For 256/512-bit ops that are 128/256-bit ops glued together, if we do not in SimplifyDemandedVectorEltsForTargetNode()
42382 // demand any of the high elements, then narrow the op to 128/256-bits: e.g. in SimplifyDemandedVectorEltsForTargetNode()
42383 // (op ymm0, ymm1) --> insert undef, (op xmm0, xmm1), 0 in SimplifyDemandedVectorEltsForTargetNode()
42384 if ((VT.is256BitVector() || VT.is512BitVector()) && in SimplifyDemandedVectorEltsForTargetNode()
42386 unsigned SizeInBits = VT.getSizeInBits(); in SimplifyDemandedVectorEltsForTargetNode()
42389 // See if 512-bit ops only use the bottom 128-bits. in SimplifyDemandedVectorEltsForTargetNode()
42390 if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0) in SimplifyDemandedVectorEltsForTargetNode()
42400 EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), in SimplifyDemandedVectorEltsForTargetNode()
42401 ExtSizeInBits / VT.getScalarSizeInBits()); in SimplifyDemandedVectorEltsForTargetNode()
42403 return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, in SimplifyDemandedVectorEltsForTargetNode()
42409 EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), in SimplifyDemandedVectorEltsForTargetNode()
42410 ExtSizeInBits / VT.getScalarSizeInBits()); in SimplifyDemandedVectorEltsForTargetNode()
42412 SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; in SimplifyDemandedVectorEltsForTargetNode()
42414 X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(), in SimplifyDemandedVectorEltsForTargetNode()
42415 MemIntr->getMemOperand()); in SimplifyDemandedVectorEltsForTargetNode()
42418 return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, in SimplifyDemandedVectorEltsForTargetNode()
42424 EVT MemVT = MemIntr->getMemoryVT(); in SimplifyDemandedVectorEltsForTargetNode()
42428 TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(), in SimplifyDemandedVectorEltsForTargetNode()
42429 MemIntr->getBasePtr(), MemIntr->getMemOperand()); in SimplifyDemandedVectorEltsForTargetNode()
42432 return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0, in SimplifyDemandedVectorEltsForTargetNode()
42436 EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), in SimplifyDemandedVectorEltsForTargetNode()
42437 ExtSizeInBits / VT.getScalarSizeInBits()); in SimplifyDemandedVectorEltsForTargetNode()
42441 insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0, in SimplifyDemandedVectorEltsForTargetNode()
42462 SDValue UndefVec = TLO.DAG.getUNDEF(VT); in SimplifyDemandedVectorEltsForTargetNode()
42470 if (VT == MVT::v4f64 || VT == MVT::v4i64) { in SimplifyDemandedVectorEltsForTargetNode()
42476 SDValue UndefVec = TLO.DAG.getUNDEF(VT); in SimplifyDemandedVectorEltsForTargetNode()
42489 Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL)); in SimplifyDemandedVectorEltsForTargetNode()
42494 SDValue UndefVec = TLO.DAG.getUNDEF(VT); in SimplifyDemandedVectorEltsForTargetNode()
42506 // (Non-Lane Crossing) Target Shuffles. in SimplifyDemandedVectorEltsForTargetNode()
42544 MVT ExtVT = VT.getSimpleVT(); in SimplifyDemandedVectorEltsForTargetNode()
42548 SDValue UndefVec = TLO.DAG.getUNDEF(VT); in SimplifyDemandedVectorEltsForTargetNode()
42572 llvm::any_of(OpInputs, [VT](SDValue V) { in SimplifyDemandedVectorEltsForTargetNode()
42573 return VT.getSizeInBits() != V.getValueSizeInBits() || in SimplifyDemandedVectorEltsForTargetNode()
42589 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); in SimplifyDemandedVectorEltsForTargetNode()
42594 Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); in SimplifyDemandedVectorEltsForTargetNode()
42598 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); in SimplifyDemandedVectorEltsForTargetNode()
42603 if (OpInputs[Src].getValueType() != VT) in SimplifyDemandedVectorEltsForTargetNode()
42610 int M = OpMask[i] - Lo; in SimplifyDemandedVectorEltsForTargetNode()
42615 // TODO - Propagate input undef/zero elts. in SimplifyDemandedVectorEltsForTargetNode()
42625 // can handle - so pretend its Depth == 0 again, and reduce the max depth in SimplifyDemandedVectorEltsForTargetNode()
42638 {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth, in SimplifyDemandedVectorEltsForTargetNode()
42653 EVT VT = Op.getValueType(); in SimplifyDemandedBitsForTargetNode() local
42676 // Don't mask bits on 32-bit AVX512 targets which might lose a broadcast. in SimplifyDemandedBitsForTargetNode()
42695 // PMULUDQ(X,1) -> AND(X,(1<<32)-1) 'getZeroExtendInReg'. in SimplifyDemandedBitsForTargetNode()
42700 SDValue Mask = TLO.DAG.getConstant(DemandedMask, DL, VT); in SimplifyDemandedBitsForTargetNode()
42701 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, DL, VT, LHS, Mask)); in SimplifyDemandedBitsForTargetNode()
42704 // Aggressively peek through ops to get at the demanded low bits. in SimplifyDemandedBitsForTargetNode()
42713 Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); in SimplifyDemandedBitsForTargetNode()
42756 int Diff = ShAmt - Shift2Amt; in SimplifyDemandedBitsForTargetNode()
42762 NewOpc, SDLoc(Op), VT, Op0.getOperand(0), in SimplifyDemandedBitsForTargetNode()
42771 unsigned UpperDemandedBits = BitWidth - OriginalDemandedBits.countr_zero(); in SimplifyDemandedBitsForTargetNode()
42772 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) in SimplifyDemandedBitsForTargetNode()
42782 // Low bits known zero. in SimplifyDemandedBitsForTargetNode()
42808 unsigned ShAmt = Op1->getAsZExtVal(); in SimplifyDemandedBitsForTargetNode()
42818 // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 in SimplifyDemandedBitsForTargetNode()
42842 if (Known.Zero[BitWidth - ShAmt - 1] || in SimplifyDemandedBitsForTargetNode()
42845 Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1)); in SimplifyDemandedBitsForTargetNode()
42848 if (Known.One[BitWidth - ShAmt - 1]) in SimplifyDemandedBitsForTargetNode()
42869 return TLO.CombineTo(Op, TLO.DAG.getNode(X86ISD::BLENDV, SDLoc(Op), VT, in SimplifyDemandedBitsForTargetNode()
42881 if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) { in SimplifyDemandedBitsForTargetNode()
42882 unsigned Idx = CIdx->getZExtValue(); in SimplifyDemandedBitsForTargetNode()
42886 // bits from the implict zext - simplify to zero. in SimplifyDemandedBitsForTargetNode()
42889 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); in SimplifyDemandedBitsForTargetNode()
42905 Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); in SimplifyDemandedBitsForTargetNode()
42919 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { in SimplifyDemandedBitsForTargetNode()
42920 unsigned Idx = CIdx->getZExtValue(); in SimplifyDemandedBitsForTargetNode()
42946 // TODO - add known bits handling. in SimplifyDemandedBitsForTargetNode()
42949 getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS); in SimplifyDemandedBitsForTargetNode()
42960 // Attempt to avoid multi-use ops if we don't need anything from them. in SimplifyDemandedBitsForTargetNode()
42968 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1)); in SimplifyDemandedBitsForTargetNode()
42971 // TODO - add general PACKSS/PACKUS SimplifyDemandedBits support. in SimplifyDemandedBitsForTargetNode()
42986 Src->hasOneUse()) { in SimplifyDemandedBitsForTargetNode()
42990 MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2); in SimplifyDemandedBitsForTargetNode()
42993 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst)); in SimplifyDemandedBitsForTargetNode()
42998 // icmp sgt(0, R) == ashr(R, BitWidth-1). in SimplifyDemandedBitsForTargetNode()
43012 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); in SimplifyDemandedBitsForTargetNode()
43014 // See if we only demand bits from the lower 128-bit vector. in SimplifyDemandedBitsForTargetNode()
43018 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); in SimplifyDemandedBitsForTargetNode()
43029 Known.Zero.setHighBits(BitWidth - NumElts); in SimplifyDemandedBitsForTargetNode()
43038 if (KnownSrc.One[SrcBits - 1]) in SimplifyDemandedBitsForTargetNode()
43040 else if (KnownSrc.Zero[SrcBits - 1]) in SimplifyDemandedBitsForTargetNode()
43043 // Attempt to avoid multi-use os if we don't need anything from it. in SimplifyDemandedBitsForTargetNode()
43046 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); in SimplifyDemandedBitsForTargetNode()
43060 bool AssumeSingleUse = (Op0 == Op1) && Op->isOnlyUserOf(Op0.getNode()); in SimplifyDemandedBitsForTargetNode()
43084 // Only bottom 16-bits of the control bits are required. in SimplifyDemandedBitsForTargetNode()
43087 uint64_t Val1 = Cst1->getZExtValue(); in SimplifyDemandedBitsForTargetNode()
43092 Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0, in SimplifyDemandedBitsForTargetNode()
43093 TLO.DAG.getConstant(MaskedVal1, DL, VT))); in SimplifyDemandedBitsForTargetNode()
43096 unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); in SimplifyDemandedBitsForTargetNode()
43097 unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); in SimplifyDemandedBitsForTargetNode()
43124 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); in SimplifyDemandedBitsForTargetNode()
43134 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); in SimplifyDemandedBitsForTargetNode()
43152 // The result will have at least as many trailing zeros as the non-mask in SimplifyDemandedBitsForTargetNode()
43168 EVT VT = Op.getValueType(); in SimplifyMultipleUseDemandedBitsForTargetNode() local
43177 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && in SimplifyMultipleUseDemandedBitsForTargetNode()
43178 !DemandedElts[CIdx->getZExtValue()]) in SimplifyMultipleUseDemandedBitsForTargetNode()
43189 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero(); in SimplifyMultipleUseDemandedBitsForTargetNode()
43190 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) in SimplifyMultipleUseDemandedBitsForTargetNode()
43201 // icmp sgt(0, R) == ashr(R, BitWidth-1). in SimplifyMultipleUseDemandedBitsForTargetNode()
43246 llvm::all_of(ShuffleOps, [VT](SDValue V) { in SimplifyMultipleUseDemandedBitsForTargetNode()
43247 return VT.getSizeInBits() == V.getValueSizeInBits(); in SimplifyMultipleUseDemandedBitsForTargetNode()
43251 return DAG.getUNDEF(VT); in SimplifyMultipleUseDemandedBitsForTargetNode()
43253 return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); in SimplifyMultipleUseDemandedBitsForTargetNode()
43275 return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countr_zero()]); in SimplifyMultipleUseDemandedBitsForTargetNode()
43394 // clang-format off in getAltBitOpcode()
43399 // clang-format on in getAltBitOpcode()
43404 // Helper to adjust v4i32 MOVMSK expansion to work with SSE1-only targets.
43415 cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) { in adjustBitcastSrcVectorSSE1()
43466 // ->
43470 static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, in combineBitcastvxi1() argument
43483 return DAG.getZExtOrTrunc(V, DL, VT); in combineBitcastvxi1()
43499 cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT && in combineBitcastvxi1()
43508 // With AVX512 vxi1 types are legal and we prefer using k-regs. in combineBitcastvxi1()
43522 EVT SubVT = VT.getIntegerVT( in combineBitcastvxi1()
43525 EVT IntVT = VT.getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); in combineBitcastvxi1()
43526 return DAG.getBitcast(VT, DAG.getNode(ISD::ANY_EXTEND, DL, IntVT, V)); in combineBitcastvxi1()
43532 // v8f64. So all legal 128-bit and 256-bit vectors are covered except for in combineBitcastvxi1()
43538 // avoid sign-extending to this type entirely. in combineBitcastvxi1()
43552 // sign-extend to a 256-bit operation to avoid truncation. in combineBitcastvxi1()
43562 // sign-extend to a 256-bit operation to match the compare. in combineBitcastvxi1()
43563 // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over in combineBitcastvxi1()
43564 // 256-bit because the shuffle is cheaper than sign extending the result of in combineBitcastvxi1()
43575 // it is not profitable to sign-extend to 256-bit because this will in combineBitcastvxi1()
43576 // require an extra cross-lane shuffle which is more expensive than in combineBitcastvxi1()
43577 // truncating the result of the compare to 128-bits. in combineBitcastvxi1()
43615 return DAG.getBitcast(VT, V); in combineBitcastvxi1()
43629 if (!In.isUndef() && (In->getAsZExtVal() & 0x1)) in combinevXi1ConstantToInteger()
43639 assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast"); in combineCastedMaskArithmetic()
43644 // Only do this if we have k-registers. in combineCastedMaskArithmetic()
43648 EVT DstVT = N->getValueType(0); in combineCastedMaskArithmetic()
43649 SDValue Op = N->getOperand(0); in combineCastedMaskArithmetic()
43695 unsigned NumElts = BV->getNumOperands(); in createMMXBuildVector()
43696 SDValue Splat = BV->getSplatValue(); in createMMXBuildVector()
43720 // Broadcast - use (PUNPCKL+)PSHUFW to broadcast single element. in createMMXBuildVector()
43728 // Unpack v8i8 to splat i8 elements to lowest 16-bits. in createMMXBuildVector()
43736 // Use PSHUFW to repeat 16-bit elements. in createMMXBuildVector()
43747 Ops.push_back(CreateMMXElement(BV->getOperand(i))); in createMMXBuildVector()
43772 static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, in combineBitcastToBoolVector() argument
43783 // Bitcast from a vector/float/double, we can cheaply bitcast to VT. in combineBitcastToBoolVector()
43787 return DAG.getBitcast(VT, Src); in combineBitcastToBoolVector()
43792 if (C->isZero()) in combineBitcastToBoolVector()
43793 return DAG.getConstant(0, DL, VT); in combineBitcastToBoolVector()
43794 if (C->isAllOnes()) in combineBitcastToBoolVector()
43795 return DAG.getAllOnesConstant(DL, VT); in combineBitcastToBoolVector()
43806 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0, in combineBitcastToBoolVector()
43819 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, in combineBitcastToBoolVector()
43820 Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT) in combineBitcastToBoolVector()
43821 : DAG.getConstant(0, DL, VT), in combineBitcastToBoolVector()
43829 if (SDValue N0 = combineBitcastToBoolVector(VT, V.getOperand(0), DL, DAG, in combineBitcastToBoolVector()
43831 if (SDValue N1 = combineBitcastToBoolVector(VT, V.getOperand(1), DL, DAG, in combineBitcastToBoolVector()
43833 return DAG.getNode(Opc, DL, VT, N0, N1); in combineBitcastToBoolVector()
43839 if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) || in combineBitcastToBoolVector()
43840 ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI())) in combineBitcastToBoolVector()
43844 if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget, in combineBitcastToBoolVector()
43847 X86ISD::KSHIFTL, DL, VT, N0, in combineBitcastToBoolVector()
43848 DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8)); in combineBitcastToBoolVector()
43855 if (SDNode *Alt = DAG.getNodeIfExists(ISD::BITCAST, DAG.getVTList(VT), {V})) in combineBitcastToBoolVector()
43864 SDValue N0 = N->getOperand(0); in combineBitcast()
43865 EVT VT = N->getValueType(0); in combineBitcast() local
43871 // -> in combineBitcast()
43877 if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) in combineBitcast()
43882 if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && in combineBitcast()
43886 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, in combineBitcast()
43892 if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && in combineBitcast()
43902 SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); in combineBitcast()
43906 SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end()); in combineBitcast()
43910 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); in combineBitcast()
43919 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); in combineBitcast()
43924 if (VT.isVector() && VT.getScalarType() == MVT::i1 && in combineBitcast()
43925 SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) { in combineBitcast()
43927 combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget)) in combineBitcast()
43937 if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && in combineBitcast()
43941 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, in combineBitcast()
43948 VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) { in combineBitcast()
43951 unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits(); in combineBitcast()
43954 MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize) in combineBitcast()
43956 MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize) in combineBitcast()
43961 SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; in combineBitcast()
43964 MemVT, BCast->getMemOperand()); in combineBitcast()
43966 return DAG.getBitcast(VT, ResNode); in combineBitcast()
43972 // avoiding store-load conversions. in combineBitcast()
43973 if (VT == MVT::x86mmx) { in combineBitcast()
43981 // Handle zero-extension of i32 with MOVD. in combineBitcast()
43983 return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, in combineBitcast()
43986 // TODO - investigate supporting sext 32-bit immediates on x86_64. in combineBitcast()
43988 return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64)); in combineBitcast()
43991 // Detect bitcasts to x86mmx low word. in combineBitcast()
44006 return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00); in combineBitcast()
44010 // Detect bitcasts of 64-bit build vectors and convert to a in combineBitcast()
44024 return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, in combineBitcast()
44033 return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, in combineBitcast()
44040 if (Subtarget.hasAVX512() && VT.isScalarInteger() && in combineBitcast()
44046 if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && VT.isVector() && in combineBitcast()
44047 VT.getVectorElementType() == MVT::i1) { in combineBitcast()
44049 if (C->isAllOnes()) in combineBitcast()
44050 return DAG.getConstant(1, SDLoc(N0), VT); in combineBitcast()
44051 if (C->isZero()) in combineBitcast()
44052 return DAG.getConstant(0, SDLoc(N0), VT); in combineBitcast()
44057 // Turn it into a sign bit compare that produces a k-register. This avoids in combineBitcast()
44060 VT.isVector() && VT.getVectorElementType() == MVT::i1 && in combineBitcast()
44061 isPowerOf2_32(VT.getVectorNumElements())) { in combineBitcast()
44062 unsigned NumElts = VT.getVectorNumElements(); in combineBitcast()
44084 if (EVT(CmpVT) == VT) in combineBitcast()
44087 // Pad with zeroes up to original VT to replace the zeroes that were in combineBitcast()
44092 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops); in combineBitcast()
44098 // remove GPR<->K-register crossings. in combineBitcast()
44103 // floating-point operand into a floating-point logic operation. This may in combineBitcast()
44109 // clang-format off in combineBitcast()
44114 // clang-format on in combineBitcast()
44118 if (!((Subtarget.hasSSE1() && VT == MVT::f32) || in combineBitcast()
44119 (Subtarget.hasSSE2() && VT == MVT::f64) || in combineBitcast()
44120 (Subtarget.hasFP16() && VT == MVT::f16) || in combineBitcast()
44121 (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() && in combineBitcast()
44122 TLI.isTypeLegal(VT)))) in combineBitcast()
44129 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y)) in combineBitcast()
44132 LogicOp0.getOperand(0).getValueType() == VT && in combineBitcast()
44134 SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); in combineBitcast()
44135 unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); in combineBitcast()
44136 return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); in combineBitcast()
44138 // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y) in combineBitcast()
44141 LogicOp1.getOperand(0).getValueType() == VT && in combineBitcast()
44143 SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); in combineBitcast()
44144 unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); in combineBitcast()
44145 return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); in combineBitcast()
44161 auto IsFreeTruncation = [](SDValue &Op) -> bool { in detectExtMul()
44168 return (BV && BV->isConstant()); in detectExtMul()
44186 SDValue AbsOp1 = Abs->getOperand(0); in detectZextAbsDiff()
44193 // Check if the operands of the sub are zero-extended from vectors of i8. in detectZextAbsDiff()
44223 // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we in createVPDPBUSD()
44237 MVT VT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); in createVPDPBUSD() local
44238 return DAG.getNode(X86ISD::VPDPBUSD, DL, VT, Ops); in createVPDPBUSD()
44256 // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we in createPSADBW()
44269 MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); in createPSADBW() local
44270 return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); in createPSADBW()
44285 EVT ExtractVT = Extract->getValueType(0); in combineMinMaxReduction()
44304 // First, reduce the source down to 128-bit, applying BinOp to lo/hi. in combineMinMaxReduction()
44331 // v16i8 UMIN will leave the upper element as zero, performing zero-extension in combineMinMaxReduction()
44359 EVT ExtractVT = Extract->getValueType(0); in combinePredicateReduction()
44387 // Special case for (pre-legalization) vXi1 reductions. in combinePredicateReduction()
44391 ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get(); in combinePredicateReduction()
44394 // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y. in combinePredicateReduction()
44395 // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y. in combinePredicateReduction()
44425 // FIXME: Better handling of k-registers or 512-bit vectors? in combinePredicateReduction()
44466 // parity -> (PARITY(MOVMSK X)) in combinePredicateReduction()
44474 // any_of -> MOVMSK != 0 in combinePredicateReduction()
44478 // all_of -> MOVMSK == ((1 << NumElts) - 1) in combinePredicateReduction()
44485 // negate to get the final 0/-1 mask value. in combinePredicateReduction()
44497 EVT ExtractVT = Extract->getValueType(0); in combineVPDPBUSDPattern()
44503 EVT VT = Extract->getOperand(0).getValueType(); in combineVPDPBUSDPattern() local
44504 if (!isPowerOf2_32(VT.getVectorNumElements())) in combineVPDPBUSDPattern()
44512 // done by vpdpbusd compute a signed 16-bit product that will be sign extended in combineVPDPBUSDPattern()
44537 unsigned Stages = Log2_32(VT.getVectorNumElements()); in combineVPDPBUSDPattern()
44543 for (unsigned i = Stages - StageBias; i > 0; --i) { in combineVPDPBUSDPattern()
44544 SmallVector<int, 16> Mask(DpElems, -1); in combineVPDPBUSDPattern()
44545 for (unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) in combineVPDPBUSDPattern()
44560 Extract->getOperand(1)); in combineVPDPBUSDPattern()
44569 EVT ExtractVT = Extract->getValueType(0); in combineBasicSADPattern()
44575 EVT VT = Extract->getOperand(0).getValueType(); in combineBasicSADPattern() local
44576 if (!isPowerOf2_32(VT.getVectorNumElements())) in combineBasicSADPattern()
44597 // abs-diff pattern. in combineBasicSADPattern()
44601 // Check whether we have an abs-diff pattern feeding into the select. in combineBasicSADPattern()
44612 unsigned Stages = Log2_32(VT.getVectorNumElements()); in combineBasicSADPattern()
44617 for(unsigned i = Stages - 3; i > 0; --i) { in combineBasicSADPattern()
44618 SmallVector<int, 16> Mask(SadElems, -1); in combineBasicSADPattern()
44619 for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) in combineBasicSADPattern()
44634 Extract->getOperand(1)); in combineBasicSADPattern()
44638 // integer, that requires a potentially expensive XMM -> GPR transfer.
44643 // to a single-use of the loaded vector. For the reasons above, we
44649 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in combineExtractFromVectorLoad()
44653 EVT VT = N->getValueType(0); in combineExtractFromVectorLoad() local
44655 bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) { in combineExtractFromVectorLoad()
44656 return Use->getOpcode() == ISD::STORE || in combineExtractFromVectorLoad()
44657 Use->getOpcode() == ISD::INSERT_VECTOR_ELT || in combineExtractFromVectorLoad()
44658 Use->getOpcode() == ISD::SCALAR_TO_VECTOR; in combineExtractFromVectorLoad()
44662 if (LoadVec && ISD::isNormalLoad(LoadVec) && VT.isInteger() && in combineExtractFromVectorLoad()
44663 VecVT.getVectorElementType() == VT && in combineExtractFromVectorLoad()
44665 DCI.isAfterLegalizeDAG() && !LikelyUsedAsVector && LoadVec->isSimple()) { in combineExtractFromVectorLoad()
44667 DAG, LoadVec->getBasePtr(), VecVT, DAG.getVectorIdxConstant(Idx, dl)); in combineExtractFromVectorLoad()
44668 unsigned PtrOff = VT.getSizeInBits() * Idx / 8; in combineExtractFromVectorLoad()
44669 MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff); in combineExtractFromVectorLoad()
44670 Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff); in combineExtractFromVectorLoad()
44672 DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment, in combineExtractFromVectorLoad()
44673 LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo()); in combineExtractFromVectorLoad()
44690 SDValue Src = N->getOperand(0); in combineExtractWithShuffle()
44691 SDValue Idx = N->getOperand(1); in combineExtractWithShuffle()
44693 EVT VT = N->getValueType(0); in combineExtractWithShuffle() local
44703 const APInt &IdxC = N->getConstantOperandAPInt(1); in combineExtractWithShuffle()
44713 if (SrcOpVT.isScalarInteger() && VT.isInteger() && in combineExtractWithShuffle()
44717 // TODO support non-zero offsets. in combineExtractWithShuffle()
44720 SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT); in combineExtractWithShuffle()
44731 if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && in combineExtractWithShuffle()
44732 VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) { in combineExtractWithShuffle()
44733 SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), in combineExtractWithShuffle()
44734 MemIntr->getBasePtr(), in combineExtractWithShuffle()
44735 MemIntr->getPointerInfo(), in combineExtractWithShuffle()
44736 MemIntr->getOriginalAlign(), in combineExtractWithShuffle()
44737 MemIntr->getMemOperand()->getFlags()); in combineExtractWithShuffle()
44745 if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() && in combineExtractWithShuffle()
44760 Scl = DAG.getZExtOrTrunc(Scl, dl, VT); in combineExtractWithShuffle()
44772 return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src), in combineExtractWithShuffle()
44776 // We can only legally extract other elements from 128-bit vectors and in in combineExtractWithShuffle()
44777 // certain circumstances, depending on SSE-level. in combineExtractWithShuffle()
44787 unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits; in combineExtractWithShuffle()
44791 Idx &= (NumEltsPerLane - 1); in combineExtractWithShuffle()
44844 // If narrowing/widening failed, see if we can extract+zero-extend. in combineExtractWithShuffle()
44855 if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1)) in combineExtractWithShuffle()
44866 return DAG.getUNDEF(VT); in combineExtractWithShuffle()
44869 return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) in combineExtractWithShuffle()
44870 : DAG.getConstant(0, dl, VT); in combineExtractWithShuffle()
44875 return DAG.getZExtOrTrunc(V, dl, VT); in combineExtractWithShuffle()
44877 if (N->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ExtractVT == SrcVT) in combineExtractWithShuffle()
44889 assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); in scalarizeExtEltFP()
44890 SDValue Vec = ExtElt->getOperand(0); in scalarizeExtEltFP()
44891 SDValue Index = ExtElt->getOperand(1); in scalarizeExtEltFP()
44892 EVT VT = ExtElt->getValueType(0); in scalarizeExtEltFP() local
44896 // non-zero element because the shuffle+scalar op will be cheaper? in scalarizeExtEltFP()
44897 if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT) in scalarizeExtEltFP()
44901 // extract, the condition code), so deal with those as a special-case. in scalarizeExtEltFP()
44902 if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) { in scalarizeExtEltFP()
44907 // extract (setcc X, Y, CC), 0 --> setcc (extract X, 0), (extract Y, 0), CC in scalarizeExtEltFP()
44913 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2)); in scalarizeExtEltFP()
44916 if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 && in scalarizeExtEltFP()
44917 VT != MVT::f64) in scalarizeExtEltFP()
44930 // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0) in scalarizeExtEltFP()
44935 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, in scalarizeExtEltFP()
44937 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, in scalarizeExtEltFP()
44939 return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); in scalarizeExtEltFP()
44942 // TODO: This switch could include FNEG and the x86-specific FP logic ops in scalarizeExtEltFP()
44973 // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ... in scalarizeExtEltFP()
44976 for (SDValue Op : Vec->ops()) in scalarizeExtEltFP()
44977 ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index)); in scalarizeExtEltFP()
44978 return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps); in scalarizeExtEltFP()
44990 assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); in combineArithReduction()
45002 SDValue Index = ExtElt->getOperand(1); in combineArithReduction()
45006 EVT VT = ExtElt->getValueType(0); in combineArithReduction() local
45008 if (VecVT.getScalarType() != VT) in combineArithReduction()
45015 // Extend v4i8/v8i8 vector to v16i8, with undef upper 64-bits. in combineArithReduction()
45033 // vXi8 mul reduction - promote to vXi16 mul reduction. in combineArithReduction()
45035 if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts)) in combineArithReduction()
45056 {4, 5, 6, 7, -1, -1, -1, -1})); in combineArithReduction()
45059 {2, 3, -1, -1, -1, -1, -1, -1})); in combineArithReduction()
45062 {1, -1, -1, -1, -1, -1, -1, -1})); in combineArithReduction()
45064 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); in combineArithReduction()
45067 // vXi8 add reduction - sub 128-bit vector. in combineArithReduction()
45073 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); in combineArithReduction()
45076 // Must be a >=128-bit vector with pow2 elements. in combineArithReduction()
45080 // vXi8 add reduction - sum lo/hi halves then use PSADBW. in combineArithReduction()
45081 if (VT == MVT::i8) { in combineArithReduction()
45092 {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); in combineArithReduction()
45097 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); in combineArithReduction()
45101 // If the source vector values are 0-255, then we can use PSADBW to in combineArithReduction()
45121 MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); in combineArithReduction() local
45123 return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops[0], Zero); in combineArithReduction()
45138 SDValue RdxHi = DAG.getVectorShuffle(MVT::v2i64, DL, Rdx, Rdx, {1, -1}); in combineArithReduction()
45142 VecVT = MVT::getVectorVT(VT.getSimpleVT(), 128 / VT.getSizeInBits()); in combineArithReduction()
45144 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); in combineArithReduction()
45153 // 256-bit horizontal instructions operate on 128-bit chunks rather than in combineArithReduction()
45156 // TODO: We could extend this to handle 512-bit or even longer vectors. in combineArithReduction()
45169 // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0 in combineArithReduction()
45174 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); in combineArithReduction()
45180 /// scalars back, while for x64 we should use 64-bit extracts and shifts.
45187 SDValue InputVector = N->getOperand(0); in combineExtractVectorElt()
45188 SDValue EltIdx = N->getOperand(1); in combineExtractVectorElt()
45192 EVT VT = N->getValueType(0); in combineExtractVectorElt() local
45194 bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT; in combineExtractVectorElt()
45196 unsigned NumEltBits = VT.getScalarSizeInBits(); in combineExtractVectorElt()
45199 if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts)) in combineExtractVectorElt()
45200 return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); in combineExtractVectorElt()
45203 if (CIdx && VT.isInteger()) { in combineExtractVectorElt()
45210 uint64_t Idx = CIdx->getZExtValue(); in combineExtractVectorElt()
45212 return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); in combineExtractVectorElt()
45213 return DAG.getConstant(EltBits[Idx].zext(NumEltBits), dl, VT); in combineExtractVectorElt()
45216 // Convert extract_element(bitcast(<X x i1>) -> bitcast(extract_subvector()). in combineExtractVectorElt()
45224 DAG.getIntPtrConstant(CIdx->getZExtValue() * NumEltBits, dl)); in combineExtractVectorElt()
45225 return DAG.getBitcast(VT, Sub); in combineExtractVectorElt()
45235 // PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling). in combineExtractVectorElt()
45243 return DAG.getZExtOrTrunc(Scl, dl, VT); in combineExtractVectorElt()
45246 // TODO - Remove this once we can handle the implicit zero-extension of in combineExtractVectorElt()
45253 if (VT == MVT::i64 && SrcVT == MVT::v1i64 && in combineExtractVectorElt()
45257 return DAG.getBitcast(VT, InputVector); in combineExtractVectorElt()
45260 if (VT == MVT::i32 && SrcVT == MVT::v2i32 && in combineExtractVectorElt()
45269 // pre-legalization, in combineExtractVectorElt()
45293 N, InputVector.getValueType(), InputVector, CIdx->getZExtValue(), in combineExtractVectorElt()
45312 if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in combineExtractVectorElt()
45313 Use->getOperand(0).getResNo() == ResNo && in combineExtractVectorElt()
45314 Use->getValueType(0) == MVT::i1) { in combineExtractVectorElt()
45316 IsVar |= !isa<ConstantSDNode>(Use->getOperand(1)); in combineExtractVectorElt()
45322 if (all_of(InputVector->uses(), IsBoolExtract) && in combineExtractVectorElt()
45328 // extractelement vXi1 X, MaskIdx --> ((movmsk X) & Mask) == Mask in combineExtractVectorElt()
45330 SDValue MaskIdx = DAG.getZExtOrTrunc(Use->getOperand(1), dl, MVT::i8); in combineExtractVectorElt()
45342 // Attempt to fold extract(trunc(x),c) -> trunc(extract(x,c)). in combineExtractVectorElt()
45349 return DAG.getAnyExtOrTrunc(NewExt, dl, VT); in combineExtractVectorElt()
45359 unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N0, SelectionDAG &DAG, in combineToExtendBoolVectorInReg() argument
45369 EVT SVT = VT.getScalarType(); in combineToExtendBoolVectorInReg()
45373 // Input type must be extending a bool vector (bit-casted from a scalar in combineToExtendBoolVectorInReg()
45375 if (!VT.isVector()) in combineToExtendBoolVectorInReg()
45389 unsigned NumElts = VT.getVectorNumElements(); in combineToExtendBoolVectorInReg()
45395 // must split it down into sub-sections for broadcasting. For example: in combineToExtendBoolVectorInReg()
45396 // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections. in combineToExtendBoolVectorInReg()
45397 // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. in combineToExtendBoolVectorInReg()
45402 Vec = DAG.getBitcast(VT, Vec); in combineToExtendBoolVectorInReg()
45406 Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); in combineToExtendBoolVectorInReg()
45420 Vec = DAG.getBitcast(VT, Vec); in combineToExtendBoolVectorInReg()
45422 // For smaller scalar integers, we can simply any-extend it to the vector in combineToExtendBoolVectorInReg()
45426 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); in combineToExtendBoolVectorInReg()
45428 Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); in combineToExtendBoolVectorInReg()
45438 SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); in combineToExtendBoolVectorInReg()
45439 Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); in combineToExtendBoolVectorInReg()
45444 Vec = DAG.getSExtOrTrunc(Vec, DL, VT); in combineToExtendBoolVectorInReg()
45447 // zero-extension. in combineToExtendBoolVectorInReg()
45450 return DAG.getNode(ISD::SRL, DL, VT, Vec, in combineToExtendBoolVectorInReg()
45451 DAG.getConstant(EltSizeInBits - 1, DL, VT)); in combineToExtendBoolVectorInReg()
45454 /// If a vector select has an operand that is -1 or 0, try to simplify the
45461 SDValue Cond = N->getOperand(0); in combineVSelectWithAllOnesOrZeros()
45462 SDValue LHS = N->getOperand(1); in combineVSelectWithAllOnesOrZeros()
45463 SDValue RHS = N->getOperand(2); in combineVSelectWithAllOnesOrZeros()
45464 EVT VT = LHS.getValueType(); in combineVSelectWithAllOnesOrZeros() local
45468 if (N->getOpcode() != ISD::VSELECT) in combineVSelectWithAllOnesOrZeros()
45482 if (VT.isFloatingPoint()) in combineVSelectWithAllOnesOrZeros()
45483 return DAG.getConstantFP(0.0, DL, VT); in combineVSelectWithAllOnesOrZeros()
45484 return DAG.getConstant(0, DL, VT); in combineVSelectWithAllOnesOrZeros()
45491 // vector floating-point selects. in combineVSelectWithAllOnesOrZeros()
45492 if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) in combineVSelectWithAllOnesOrZeros()
45502 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == in combineVSelectWithAllOnesOrZeros()
45509 cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType()); in combineVSelectWithAllOnesOrZeros()
45522 // vselect Cond, 111..., 000... -> Cond in combineVSelectWithAllOnesOrZeros()
45524 return DAG.getBitcast(VT, Cond); in combineVSelectWithAllOnesOrZeros()
45529 // vselect Cond, 111..., X -> or Cond, X in combineVSelectWithAllOnesOrZeros()
45533 return DAG.getBitcast(VT, Or); in combineVSelectWithAllOnesOrZeros()
45536 // vselect Cond, X, 000... -> and Cond, X in combineVSelectWithAllOnesOrZeros()
45540 return DAG.getBitcast(VT, And); in combineVSelectWithAllOnesOrZeros()
45543 // vselect Cond, 000..., X -> andn Cond, X in combineVSelectWithAllOnesOrZeros()
45547 // The canonical form differs for i1 vectors - x86andnp is not used in combineVSelectWithAllOnesOrZeros()
45553 return DAG.getBitcast(VT, AndN); in combineVSelectWithAllOnesOrZeros()
45560 /// and concatenate the result to eliminate a wide (256-bit) vector instruction:
45561 /// vselect Cond, (concat T0, T1), (concat F0, F1) -->
45565 unsigned Opcode = N->getOpcode(); in narrowVectorSelect()
45569 // TODO: Split 512-bit vectors too? in narrowVectorSelect()
45570 EVT VT = N->getValueType(0); in narrowVectorSelect() local
45571 if (!VT.is256BitVector()) in narrowVectorSelect()
45575 SDValue Cond = N->getOperand(0); in narrowVectorSelect()
45576 SDValue TVal = N->getOperand(1); in narrowVectorSelect()
45577 SDValue FVal = N->getOperand(2); in narrowVectorSelect()
45587 return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Cond, TVal, FVal}, makeBlend, in narrowVectorSelect()
45593 SDValue Cond = N->getOperand(0); in combineSelectOfTwoConstants()
45594 SDValue LHS = N->getOperand(1); in combineSelectOfTwoConstants()
45595 SDValue RHS = N->getOperand(2); in combineSelectOfTwoConstants()
45603 EVT VT = N->getValueType(0); in combineSelectOfTwoConstants() local
45604 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) in combineSelectOfTwoConstants()
45608 // this with a wider condition value (post-legalization it becomes an i8), in combineSelectOfTwoConstants()
45613 // A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by in combineSelectOfTwoConstants()
45615 // TODO: For constants that overflow or do not differ by power-of-2 or small in combineSelectOfTwoConstants()
45617 const APInt &TrueVal = TrueC->getAPIntValue(); in combineSelectOfTwoConstants()
45618 const APInt &FalseVal = FalseC->getAPIntValue(); in combineSelectOfTwoConstants()
45620 // We have a more efficient lowering for "(X == 0) ? Y : -1" using SBB. in combineSelectOfTwoConstants()
45623 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in combineSelectOfTwoConstants()
45635 ((VT == MVT::i32 || VT == MVT::i64) && in combineSelectOfTwoConstants()
45646 // select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC in combineSelectOfTwoConstants()
45647 SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); in combineSelectOfTwoConstants()
45649 // Multiply condition by the difference if non-one. in combineSelectOfTwoConstants()
45651 R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); in combineSelectOfTwoConstants()
45653 // Add the base if non-zero. in combineSelectOfTwoConstants()
45654 if (!FalseC->isZero()) in combineSelectOfTwoConstants()
45655 R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); in combineSelectOfTwoConstants()
45663 /// If this is a *dynamic* select (non-constant condition) and we can match
45672 SDValue Cond = N->getOperand(0); in combineVSelectToBLENDV()
45673 if ((N->getOpcode() != ISD::VSELECT && in combineVSelectToBLENDV()
45674 N->getOpcode() != X86ISD::BLENDV) || in combineVSelectToBLENDV()
45680 EVT VT = N->getValueType(0); in combineVSelectToBLENDV() local
45686 // cases where a *dynamic* blend will fail even though a constant-condition in combineVSelectToBLENDV()
45689 // Potentially, we should combine constant-condition vselect nodes in combineVSelectToBLENDV()
45690 // pre-legalization into shuffles and not mark as many types as custom in combineVSelectToBLENDV()
45692 if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) in combineVSelectToBLENDV()
45694 // FIXME: We don't support i16-element blends currently. We could and in combineVSelectToBLENDV()
45696 // rather than just the high bit and using an i8-element blend. in combineVSelectToBLENDV()
45697 if (VT.getVectorElementType() == MVT::i16) in combineVSelectToBLENDV()
45700 if (VT.is128BitVector() && !Subtarget.hasSSE41()) in combineVSelectToBLENDV()
45703 if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) in combineVSelectToBLENDV()
45705 // There are no 512-bit blend instructions that use sign bits. in combineVSelectToBLENDV()
45706 if (VT.is512BitVector()) in combineVSelectToBLENDV()
45710 // and don't ever optimize vector selects that map to AVX512 mask-registers. in combineVSelectToBLENDV()
45715 for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); in combineVSelectToBLENDV()
45717 if ((UI->getOpcode() != ISD::VSELECT && in combineVSelectToBLENDV()
45718 UI->getOpcode() != X86ISD::BLENDV) || in combineVSelectToBLENDV()
45739 for (SDNode *U : Cond->uses()) { in combineVSelectToBLENDV()
45740 if (U->getOpcode() == X86ISD::BLENDV) in combineVSelectToBLENDV()
45743 SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), in combineVSelectToBLENDV()
45744 Cond, U->getOperand(1), U->getOperand(2)); in combineVSelectToBLENDV()
45754 return DAG.getNode(X86ISD::BLENDV, DL, N->getValueType(0), V, in combineVSelectToBLENDV()
45755 N->getOperand(1), N->getOperand(2)); in combineVSelectToBLENDV()
45767 // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
45770 // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
45771 // ( M ? -X : X) == ((X ^ M ) + (M & 1))
45777 EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, in combineLogicBlendIntoConditionalNegate() argument
45782 "Mask must be zero/all-bits"); in combineLogicBlendIntoConditionalNegate()
45790 return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && in combineLogicBlendIntoConditionalNegate()
45791 ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); in combineLogicBlendIntoConditionalNegate()
45808 // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M) in combineLogicBlendIntoConditionalNegate()
45811 // above, -(vselect M, (sub 0, X), X), and therefore the replacement in combineLogicBlendIntoConditionalNegate()
45813 // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the in combineLogicBlendIntoConditionalNegate()
45819 return DAG.getBitcast(VT, Res); in combineLogicBlendIntoConditionalNegate()
45826 if (N->getOpcode() != ISD::VSELECT) in commuteSelect()
45829 SDValue Cond = N->getOperand(0); in commuteSelect()
45830 SDValue LHS = N->getOperand(1); in commuteSelect()
45831 SDValue RHS = N->getOperand(2); in commuteSelect()
45843 // (vselect M, L, R) -> (vselect ~M, R, L) in commuteSelect()
45845 ISD::getSetCCInverse(cast<CondCodeSDNode>(Cond.getOperand(2))->get(), in commuteSelect()
45852 /// Do target-specific dag combines on SELECT and VSELECT nodes.
45857 SDValue Cond = N->getOperand(0); in combineSelect()
45858 SDValue LHS = N->getOperand(1); in combineSelect()
45859 SDValue RHS = N->getOperand(2); in combineSelect()
45873 EVT VT = LHS.getValueType(); in combineSelect() local
45878 // Attempt to combine (select M, (sub 0, X), X) -> (sub (xor X, M), M). in combineSelect()
45879 // Limit this to cases of non-constant masks that createShuffleMaskFromVSELECT in combineSelect()
45882 CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() && in combineSelect()
45885 if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS, in combineSelect()
45891 (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::BLENDV)) { in combineSelect()
45894 N->getOpcode() == X86ISD::BLENDV)) in combineSelect()
45895 return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); in combineSelect()
45898 // fold vselect(cond, pshufb(x), pshufb(y)) -> or (pshufb(x), pshufb(y)) in combineSelect()
45901 if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && in combineSelect()
45904 MVT SimpleVT = VT.getSimpleVT(); in combineSelect()
45910 int NumElts = VT.getVectorNumElements(); in combineSelect()
45922 LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), in combineSelect()
45924 RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), in combineSelect()
45926 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); in combineSelect()
45933 // ignored in unsafe-math mode). in combineSelect()
45935 if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && in combineSelect()
45936 VT != MVT::f80 && VT != MVT::f128 && !isSoftF16(VT, Subtarget) && in combineSelect()
45937 (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && in combineSelect()
45939 (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { in combineSelect()
45940 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in combineSelect()
46012 // Check for x CC y ? y : x -- a min/max with reversed arms. in combineSelect()
46080 return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); in combineSelect()
46087 // fold (select (setcc (and (X, 1), 0, seteq), Y, Z)) -> select(and(X, 1),Z,Y) in combineSelect()
46088 if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT && in combineSelect()
46089 Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) { in combineSelect()
46090 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in combineSelect()
46098 return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS); in combineSelect()
46111 (VT.getVectorElementType() == MVT::i8 || in combineSelect()
46112 VT.getVectorElementType() == MVT::i16)) { in combineSelect()
46113 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); in combineSelect()
46114 return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); in combineSelect()
46117 // AVX512 - Extend select with zero to merge with target shuffle. in combineSelect()
46118 // select(mask, extract_subvector(shuffle(x)), zero) --> in combineSelect()
46120 // TODO - support non target shuffles as well. in combineSelect()
46141 VT.getSizeInBits()); in combineSelect()
46143 VT.getSizeInBits()); in combineSelect()
46148 return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); in combineSelect()
46155 if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && in combineSelect()
46160 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in combineSelect()
46163 // (x > 0) ? x : 0 -> (x >= 0) ? x : 0 in combineSelect()
46164 // (x < -1) ? x : -1 -> (x <= -1) ? x : -1 in combineSelect()
46167 // (a - b) > 0 : (a - b) ? 0 -> (a - b) >= 0 : (a - b) ? 0 in combineSelect()
46178 // (x s> 1) ? x : 1 -> (x s>= 1) ? x : 1 -> (x s> 0) ? x : 1 in combineSelect()
46179 // (x u> 1) ? x : 1 -> (x u>= 1) ? x : 1 -> (x != 0) ? x : 1 in combineSelect()
46186 return DAG.getSelect(DL, VT, Cond, LHS, RHS); in combineSelect()
46191 return DAG.getSelect(DL, VT, Cond, LHS, RHS); in combineSelect()
46198 // --> (select (cmpuge Cond0, Cond1), LHS, Y) in combineSelect()
46200 // --> (select (cmpsle Cond0, Cond1), LHS, Y) in combineSelect()
46206 cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get(); in combineSelect()
46212 // clang-format off in combineSelect()
46218 // clang-format on in combineSelect()
46222 return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2)); in combineSelect()
46231 if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && in combineSelect()
46238 return DAG.getSelect(DL, VT, CondNew, RHS, LHS); in combineSelect()
46243 if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST && in combineSelect()
46245 TLI.isTypeLegal(VT.getScalarType())) { in combineSelect()
46246 EVT ExtCondVT = VT.changeVectorElementTypeToInteger(); in combineSelect()
46250 return DAG.getSelect(DL, VT, ExtCond, LHS, RHS); in combineSelect()
46255 // with out-of-bounds clamping. in combineSelect()
46259 // to bitwidth-1 for unsigned shifts, effectively performing a maximum left in combineSelect()
46260 // shift of bitwidth-1 positions. and returns zero for unsigned right shifts in combineSelect()
46261 // exceeding bitwidth-1. in combineSelect()
46262 if (N->getOpcode() == ISD::VSELECT) { in combineSelect()
46264 // fold select(icmp_ult(amt,BW),shl(x,amt),0) -> avx2 psllv(x,amt) in combineSelect()
46265 // fold select(icmp_ult(amt,BW),srl(x,amt),0) -> avx2 psrlv(x,amt) in combineSelect()
46267 supportedVectorVarShift(VT, Subtarget, LHS.getOpcode()) && in combineSelect()
46270 m_SpecificInt(VT.getScalarSizeInBits()), in combineSelect()
46274 DL, VT, LHS.getOperand(0), LHS.getOperand(1)); in combineSelect()
46276 // fold select(icmp_uge(amt,BW),0,shl(x,amt)) -> avx2 psllv(x,amt) in combineSelect()
46277 // fold select(icmp_uge(amt,BW),0,srl(x,amt)) -> avx2 psrlv(x,amt) in combineSelect()
46279 supportedVectorVarShift(VT, Subtarget, RHS.getOpcode()) && in combineSelect()
46282 m_SpecificInt(VT.getScalarSizeInBits()), in combineSelect()
46286 DL, VT, RHS.getOperand(0), RHS.getOperand(1)); in combineSelect()
46291 if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget)) in combineSelect()
46303 // select(~Cond, X, Y) -> select(Cond, Y, X) in combineSelect()
46306 return DAG.getNode(N->getOpcode(), DL, VT, in combineSelect()
46309 // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the in combineSelect()
46316 return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); in combineSelect()
46325 if (N->getOpcode() == ISD::SELECT && VT.isVector() && in combineSelect()
46326 VT.getVectorElementType() == MVT::i1 && in combineSelect()
46327 (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) { in combineSelect()
46328 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); in combineSelect()
46348 return DAG.getBitcast(VT, Select); in combineSelect()
46355 // This can lower using a vector shift bit-hack rather than mask and compare. in combineSelect()
46357 N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && in combineSelect()
46361 cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ && in combineSelect()
46362 Cond.getOperand(0).getValueType() == VT) { in combineSelect()
46363 // The 'and' mask must be composed of power-of-2 constants. in combineSelect()
46366 if (C && C->getAPIntValue().isPowerOf2()) { in combineSelect()
46367 // vselect (X & C == 0), LHS, RHS --> vselect (X & C != 0), RHS, LHS in combineSelect()
46370 return DAG.getSelect(DL, VT, NotCond, RHS, LHS); in combineSelect()
46373 // If we have a non-splat but still powers-of-2 mask, AVX1 can use pmulld in combineSelect()
46374 // and AVX2 can use vpsllv{dq}. 8-bit lacks a proper shift or multiply. in combineSelect()
46375 // 16-bit lacks a proper blendv. in combineSelect()
46376 unsigned EltBitWidth = VT.getScalarSizeInBits(); in combineSelect()
46378 TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) || in combineSelect()
46383 return C->getAPIntValue().isPowerOf2(); in combineSelect()
46385 // Create a left-shift constant to get the mask bits over to the sign-bit. in combineSelect()
46388 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { in combineSelect()
46390 ShlVals.push_back(EltBitWidth - 1 - in combineSelect()
46391 MaskVal->getAPIntValue().exactLogBase2()); in combineSelect()
46393 // vsel ((X & C) == 0), LHS, RHS --> vsel ((shl X, C') < 0), RHS, LHS in combineSelect()
46394 SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL); in combineSelect()
46395 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt); in combineSelect()
46398 return DAG.getSelect(DL, VT, NewCond, RHS, LHS); in combineSelect()
46414 // This combine only operates on CMP-like nodes. in combineSetCCAtomicArith()
46416 (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) in combineSetCCAtomicArith()
46426 // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) in combineSetCCAtomicArith()
46427 // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) in combineSetCCAtomicArith()
46428 // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) in combineSetCCAtomicArith()
46429 // (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0) in combineSetCCAtomicArith()
46433 // - XOR/OR/AND (if they were made to survive AtomicExpand) in combineSetCCAtomicArith()
46434 // - LHS != 1 in combineSetCCAtomicArith()
46453 APInt Addend = OpRHSC->getAPIntValue(); in combineSetCCAtomicArith()
46455 Addend = -Addend; in combineSetCCAtomicArith()
46461 APInt Comparison = CmpRHSC->getAPIntValue(); in combineSetCCAtomicArith()
46462 APInt NegAddend = -Addend; in combineSetCCAtomicArith()
46477 APInt DecComparison = Comparison - 1; in combineSetCCAtomicArith()
46499 AN->getMemOperand()); in combineSetCCAtomicArith()
46515 else if (CC == X86::COND_G && Addend == -1) in combineSetCCAtomicArith()
46517 else if (CC == X86::COND_LE && Addend == -1) in combineSetCCAtomicArith()
46540 // CMP(X,0) -> signbit test in checkSignTestSetCCCombine()
46545 // TODO: Remove one use limit once sdiv-fix regressions are fixed. in checkSignTestSetCCCombine()
46551 // OR(X,Y) -> see if only one operand contributes to the signbit. in checkSignTestSetCCCombine()
46552 // TODO: XOR(X,Y) -> see if only one operand contributes to the signbit. in checkSignTestSetCCCombine()
46600 // This combine only operates on CMP-like nodes. in checkBoolTestSetCCCombine()
46602 (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) in checkBoolTestSetCCCombine()
46626 if (C->getZExtValue() == 1) { in checkBoolTestSetCCCombine()
46629 } else if (C->getZExtValue() != 0) in checkBoolTestSetCCCombine()
46639 int OpIdx = -1; in checkBoolTestSetCCCombine()
46691 if (FVal && FVal->getZExtValue() != 0) { in checkBoolTestSetCCCombine()
46692 if (FVal->getZExtValue() != 1) in checkBoolTestSetCCCombine()
46699 if (FValIsFalse && TVal->getZExtValue() != 1) in checkBoolTestSetCCCombine()
46701 if (!FValIsFalse && TVal->getZExtValue() != 0) in checkBoolTestSetCCCombine()
46720 if (Cond->getOpcode() == X86ISD::CMP) { in checkBoolTestAndOrSetCCCombine()
46721 if (!isNullConstant(Cond->getOperand(1))) in checkBoolTestAndOrSetCCCombine()
46724 Cond = Cond->getOperand(0); in checkBoolTestAndOrSetCCCombine()
46730 switch (Cond->getOpcode()) { in checkBoolTestAndOrSetCCCombine()
46738 SetCC0 = Cond->getOperand(0); in checkBoolTestAndOrSetCCCombine()
46739 SetCC1 = Cond->getOperand(1); in checkBoolTestAndOrSetCCCombine()
46746 SetCC0->getOperand(1) != SetCC1->getOperand(1)) in checkBoolTestAndOrSetCCCombine()
46749 CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); in checkBoolTestAndOrSetCCCombine()
46750 CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); in checkBoolTestAndOrSetCCCombine()
46751 Flags = SetCC0->getOperand(1); in checkBoolTestAndOrSetCCCombine()
46755 // When legalizing carry, we create carries via add X, -1
46785 CarryOp1.getNode()->hasOneUse() && in combineCarryThroughADD()
46789 DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(), in combineCarryThroughADD()
46829 MVT VT = EFLAGS.getSimpleValueType(); in combinePTESTCC() local
46840 // testc -> testz. in combinePTESTCC()
46844 // !testc -> !testz. in combinePTESTCC()
46848 // testz -> testc. in combinePTESTCC()
46852 // !testz -> !testc. in combinePTESTCC()
46857 // testnzc -> testnzc (no change). in combinePTESTCC()
46867 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, in combinePTESTCC()
46873 // TESTC(X,~X) == TESTC(X,-1) in combinePTESTCC()
46878 EFLAGS.getOpcode(), DL, VT, DAG.getBitcast(OpVT, NotOp1), in combinePTESTCC()
46889 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, in combinePTESTCC()
46899 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, in combinePTESTCC()
46907 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, in combinePTESTCC()
46912 // If every element is an all-sign value, see if we can use TESTP/MOVMSK in combinePTESTCC()
46920 assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result"); in combinePTESTCC()
46932 return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res); in combinePTESTCC()
46949 // TESTZ(-1,X) == TESTZ(X,X) in combinePTESTCC()
46951 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1); in combinePTESTCC()
46953 // TESTZ(X,-1) == TESTZ(X,X) in combinePTESTCC()
46955 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); in combinePTESTCC()
46957 // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y) in combinePTESTCC()
46969 return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, in combinePTESTCC()
46985 // Handle eq/ne against -1 (all_of). in combineSetCCMOVMSK()
46996 const APInt &CmpVal = CmpConstant->getAPIntValue(); in combineSetCCMOVMSK()
47027 bool IsOneUse = CmpOp.getNode()->hasOneUse(); in combineSetCCMOVMSK()
47030 // signbits extend down to all the sub-elements as well. in combineSetCCMOVMSK()
47044 DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) { in combineSetCCMOVMSK()
47053 // MOVMSK(CONCAT(X,Y)) == 0 -> MOVMSK(OR(X,Y)). in combineSetCCMOVMSK()
47054 // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)). in combineSetCCMOVMSK()
47055 // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)). in combineSetCCMOVMSK()
47056 // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)). in combineSetCCMOVMSK()
47074 // MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X). in combineSetCCMOVMSK()
47075 // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X). in combineSetCCMOVMSK()
47076 // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(XOR(X,Y),XOR(X,Y)). in combineSetCCMOVMSK()
47077 // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(XOR(X,Y),XOR(X,Y)). in combineSetCCMOVMSK()
47089 // Check for 256-bit split vector cases. in combineSetCCMOVMSK()
47117 // PMOVMSKB(PACKSSBW(X, undef)) -> PMOVMSKB(BITCAST_v16i8(X)) & 0xAAAA. in combineSetCCMOVMSK()
47131 // -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA. in combineSetCCMOVMSK()
47159 // MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X) iff every element is referenced. in combineSetCCMOVMSK()
47165 // on the "low" elements take place during other simplifications. in combineSetCCMOVMSK()
47190 // MOVMSKPS(V) !=/== 0 -> TESTPS(V,V) in combineSetCCMOVMSK()
47191 // MOVMSKPD(V) !=/== 0 -> TESTPD(V,V) in combineSetCCMOVMSK()
47192 // MOVMSKPS(V) !=/== -1 -> TESTPS(V,V) in combineSetCCMOVMSK()
47193 // MOVMSKPD(V) !=/== -1 -> TESTPD(V,V) in combineSetCCMOVMSK()
47244 SDValue FalseOp = N->getOperand(0); in combineCMov()
47245 SDValue TrueOp = N->getOperand(1); in combineCMov()
47246 X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); in combineCMov()
47247 SDValue Cond = N->getOperand(3); in combineCMov()
47249 // cmov X, X, ?, ? --> X in combineCMov()
47262 return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); in combineCMov()
47273 if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { in combineCMov()
47279 // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. in combineCMov()
47282 if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { in combineCMov()
47286 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); in combineCMov()
47288 unsigned ShAmt = TrueC->getAPIntValue().logBase2(); in combineCMov()
47294 // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient in combineCMov()
47296 if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { in combineCMov()
47301 FalseC->getValueType(0), Cond); in combineCMov()
47309 if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { in combineCMov()
47310 APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); in combineCMov()
47311 assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && in combineCMov()
47333 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), in combineCMov()
47340 // Add the base if non-zero. in combineCMov()
47341 if (FalseC->getAPIntValue() != 0) in combineCMov()
47351 // (select (x != c), e, c) -> select (x != c), e, x), in combineCMov()
47352 // (select (x == c), c, e) -> select (x == c), x, e) in combineCMov()
47356 // The rationale for this change is that the conditional-move from a constant in combineCMov()
47357 // needs two instructions, however, conditional-move from a register needs in combineCMov()
47361 // some instruction-combining opportunities. This opt needs to be in combineCMov()
47383 return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); in combineCMov()
47396 Cond.getOpcode() == X86ISD::SUB && Cond->hasOneUse()) { in combineCMov()
47401 if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) { in combineCMov()
47402 EVT CondVT = Cond->getValueType(0); in combineCMov()
47403 EVT OuterVT = N->getValueType(0); in combineCMov()
47406 DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), Cond.getOperand(0), in combineCMov()
47415 // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2) in combineCMov()
47416 // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2) in combineCMov()
47444 SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); in combineCMov()
47447 SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); in combineCMov()
47452 // Fold (CMOV C1, (ADD (CTTZ X), C2), (X != 0)) -> in combineCMov()
47453 // (ADD (CMOV C1-C2, (CTTZ X), (X != 0)), C2) in combineCMov()
47454 // Or (CMOV (ADD (CTTZ X), C2), C1, (X == 0)) -> in combineCMov()
47455 // (ADD (CMOV (CTTZ X), C1-C2, (X == 0)), C2) in combineCMov()
47475 EVT VT = N->getValueType(0); in combineCMov() local
47477 SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); in combineCMov()
47479 DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), in combineCMov()
47481 return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); in combineCMov()
47492 EVT VT = N->getOperand(0).getValueType(); in canReduceVMulWidth() local
47493 if (VT.getScalarSizeInBits() != 32) in canReduceVMulWidth()
47496 assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2"); in canReduceVMulWidth()
47500 SDValue Opd = N->getOperand(i); in canReduceVMulWidth()
47508 // When ranges are from -128 ~ 127, use MULS8 mode. in canReduceVMulWidth()
47514 // When ranges are from -32768 ~ 32767, use MULS16 mode. in canReduceVMulWidth()
47542 /// -128 to 128, and the scalar value range of %4 is also -128 to 128,
47548 /// -32768 to 32767, and the scalar value range of %4 is also -32768 to 32767,
47572 SDValue N0 = N->getOperand(0); in reduceVMULWidth()
47573 SDValue N1 = N->getOperand(1); in reduceVMULWidth()
47574 EVT VT = N->getOperand(0).getValueType(); in reduceVMULWidth() local
47575 unsigned NumElts = VT.getVectorNumElements(); in reduceVMULWidth()
47591 DL, VT, MulLo); in reduceVMULWidth()
47619 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); in reduceVMULWidth()
47623 EVT VT, const SDLoc &DL) { in combineMulSpecial() argument
47626 SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), in combineMulSpecial()
47627 DAG.getConstant(Mult, DL, VT)); in combineMulSpecial()
47628 Result = DAG.getNode(ISD::SHL, DL, VT, Result, in combineMulSpecial()
47630 Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, in combineMulSpecial()
47631 N->getOperand(0)); in combineMulSpecial()
47636 SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), in combineMulSpecial()
47637 DAG.getConstant(Mul1, DL, VT)); in combineMulSpecial()
47638 Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, in combineMulSpecial()
47639 DAG.getConstant(Mul2, DL, VT)); in combineMulSpecial()
47640 Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, in combineMulSpecial()
47641 N->getOperand(0)); in combineMulSpecial()
47659 return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), in combineMulSpecial()
47684 return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), in combineMulSpecial()
47693 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { in combineMulSpecial()
47696 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); in combineMulSpecial()
47697 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMulSpecial()
47699 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMulSpecial()
47701 return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); in combineMulSpecial()
47720 EVT VT = N->getValueType(0); in combineMulToPMADDWD() local
47723 if (!VT.isVector() || VT.getVectorElementType() != MVT::i32) in combineMulToPMADDWD()
47728 unsigned NumElts = VT.getVectorNumElements(); in combineMulToPMADDWD()
47736 SDValue N0 = N->getOperand(0); in combineMulToPMADDWD()
47737 SDValue N1 = N->getOperand(1); in combineMulToPMADDWD()
47772 // Mask off upper 16-bits of sign-extended constants. in combineMulToPMADDWD()
47774 return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(0xFFFF, DL, VT)); in combineMulToPMADDWD()
47775 if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) { in combineMulToPMADDWD()
47778 if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128) in combineMulToPMADDWD()
47779 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src); in combineMulToPMADDWD()
47780 // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets in combineMulToPMADDWD()
47783 EVT ExtVT = VT.changeVectorElementType(MVT::i16); in combineMulToPMADDWD()
47785 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src); in combineMulToPMADDWD()
47790 N->isOnlyUserOf(Op.getNode())) { in combineMulToPMADDWD()
47793 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Src); in combineMulToPMADDWD()
47797 N->isOnlyUserOf(Op.getNode())) { in combineMulToPMADDWD()
47798 return DAG.getNode(X86ISD::VSRLI, DL, VT, Op.getOperand(0), in combineMulToPMADDWD()
47819 return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMADDWDBuilder); in combineMulToPMADDWD()
47827 EVT VT = N->getValueType(0); in combineMulToPMULDQ() local
47830 if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || in combineMulToPMULDQ()
47831 VT.getVectorNumElements() < 2 || in combineMulToPMULDQ()
47832 !isPowerOf2_32(VT.getVectorNumElements())) in combineMulToPMULDQ()
47835 SDValue N0 = N->getOperand(0); in combineMulToPMULDQ()
47836 SDValue N1 = N->getOperand(1); in combineMulToPMULDQ()
47838 // MULDQ returns the 64-bit result of the signed multiplication of the lower in combineMulToPMULDQ()
47839 // 32-bits. We can lower with this if the sign bits stretch that far. in combineMulToPMULDQ()
47846 return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULDQBuilder, in combineMulToPMULDQ()
47857 return SplitOpsAndApply(DAG, Subtarget, DL, VT, {N0, N1}, PMULUDQBuilder, in combineMulToPMULDQ()
47867 EVT VT = N->getValueType(0); in combineMul() local
47876 if (DCI.isBeforeLegalize() && VT.isVector()) in combineMul()
47891 if (VT != MVT::i64 && VT != MVT::i32 && in combineMul()
47892 (!VT.isVector() || !VT.isSimple() || !VT.isInteger())) in combineMul()
47896 N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false); in combineMul()
47899 if (VT.isVector()) in combineMul()
47900 if (auto *RawC = getTargetConstantFromNode(N->getOperand(1))) in combineMul()
47901 if (auto *SplatC = RawC->getSplatValue()) in combineMul()
47903 C = &(SplatCI->getValue()); in combineMul()
47905 if (!C || C->getBitWidth() != VT.getScalarSizeInBits()) in combineMul()
47908 C = &(CNode->getAPIntValue()); in combineMul()
47911 if (isPowerOf2_64(C->getZExtValue())) in combineMul()
47914 int64_t SignMulAmt = C->getSExtValue(); in combineMul()
47916 uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; in combineMul()
47919 if (VT == MVT::i64 || VT == MVT::i32) { in combineMul()
47921 NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), in combineMul()
47922 DAG.getConstant(AbsMulAmt, DL, VT)); in combineMul()
47924 NewMul = DAG.getNegative(NewMul, DL, VT); in combineMul()
47947 if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() && in combineMul()
47948 N->use_begin()->getOpcode() == ISD::ADD)) in combineMul()
47956 NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
47959 NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), in combineMul()
47960 DAG.getConstant(MulAmt1, DL, VT)); in combineMul()
47963 NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, in combineMul()
47966 NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, in combineMul()
47967 DAG.getConstant(MulAmt2, DL, VT)); in combineMul()
47971 NewMul = DAG.getNegative(NewMul, DL, VT); in combineMul()
47973 NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); in combineMul()
47976 EVT ShiftVT = VT.isVector() ? VT : MVT::i8; in combineMul()
47977 assert(C->getZExtValue() != 0 && in combineMul()
47978 C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) && in combineMul()
47981 if (isPowerOf2_64(AbsMulAmt - 1)) { in combineMul()
47984 ISD::ADD, DL, VT, N->getOperand(0), in combineMul()
47985 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
47986 DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, ShiftVT))); in combineMul()
47988 NewMul = DAG.getNegative(NewMul, DL, VT); in combineMul()
47990 // (mul x, 2^N - 1) => (sub (shl x, N), x) in combineMul()
47992 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
47996 NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); in combineMul()
47998 NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); in combineMul()
47999 } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2) && in combineMul()
48000 (!VT.isVector() || Subtarget.fastImmVectorShift())) { in combineMul()
48003 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
48004 DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, ShiftVT)); in combineMul()
48006 ISD::ADD, DL, VT, NewMul, in combineMul()
48007 DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0))); in combineMul()
48009 (!VT.isVector() || Subtarget.fastImmVectorShift())) { in combineMul()
48010 // (mul x, 2^N - 2) => (sub (shl x, N), (add x, x)) in combineMul()
48012 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
48015 ISD::SUB, DL, VT, NewMul, in combineMul()
48016 DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0))); in combineMul()
48017 } else if (SignMulAmt >= 0 && VT.isVector() && in combineMul()
48019 uint64_t AbsMulAmtLowBit = AbsMulAmt & (-AbsMulAmt); in combineMul()
48022 if (isPowerOf2_64(AbsMulAmt - AbsMulAmtLowBit)) { in combineMul()
48023 ShiftAmt1 = AbsMulAmt - AbsMulAmtLowBit; in combineMul()
48032 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
48035 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMul()
48037 NewMul = DAG.getNode(*Opc, DL, VT, Shift1, Shift2); in combineMul()
48055 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && in combineShiftToPMULH()
48062 SDValue ShiftOperand = N->getOperand(0); in combineShiftToPMULH()
48067 EVT VT = N->getValueType(0); in combineShiftToPMULH() local
48068 if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32) in combineShiftToPMULH()
48073 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) || in combineShiftToPMULH()
48097 ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; in combineShiftToPMULH()
48098 return DAG.getNode(ExtOpc, DL, VT, Mulh); in combineShiftToPMULH()
48104 SDValue N0 = N->getOperand(0); in combineShiftLeft()
48105 SDValue N1 = N->getOperand(1); in combineShiftLeft()
48107 EVT VT = N0.getValueType(); in combineShiftLeft() local
48108 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in combineShiftLeft()
48112 // with out-of-bounds clamping. in combineShiftLeft()
48114 supportedVectorVarShift(VT, Subtarget, ISD::SHL)) { in combineShiftLeft()
48118 // fold shl(select(icmp_ult(amt,BW),x,0),amt) -> avx2 psllv(x,amt) in combineShiftLeft()
48122 return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1); in combineShiftLeft()
48124 // fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt) in combineShiftLeft()
48128 return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1); in combineShiftLeft()
48132 // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) in combineShiftLeft()
48134 if (VT.isInteger() && !VT.isVector() && in combineShiftLeft()
48139 Mask <<= N1C->getAPIntValue(); in combineShiftLeft()
48141 // We can handle cases concerning bit-widening nodes containing setcc_c if in combineShiftLeft()
48147 // zext(setcc_c) -> i32 0x0000FFFF in combineShiftLeft()
48148 // c1 -> i32 0x0000FFFF in combineShiftLeft()
48149 // c2 -> i32 0x00000001 in combineShiftLeft()
48150 // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE in combineShiftLeft()
48151 // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE in combineShiftLeft()
48163 return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); in combineShiftLeft()
48172 SDValue N0 = N->getOperand(0); in combineShiftRightArithmetic()
48173 SDValue N1 = N->getOperand(1); in combineShiftRightArithmetic()
48174 EVT VT = N0.getValueType(); in combineShiftRightArithmetic() local
48175 unsigned Size = VT.getSizeInBits(); in combineShiftRightArithmetic()
48181 // fold sra(x,umin(amt,bw-1)) -> avx2 psrav(x,amt) in combineShiftRightArithmetic()
48182 if (supportedVectorVarShift(VT, Subtarget, ISD::SRA)) { in combineShiftRightArithmetic()
48185 m_SpecificInt(VT.getScalarSizeInBits() - 1)))) in combineShiftRightArithmetic()
48186 return DAG.getNode(X86ISD::VSRAV, DL, VT, N0, ShrAmtVal); in combineShiftRightArithmetic()
48190 // into (SHL (sext_in_reg X), ShlConst - SraConst) in combineShiftRightArithmetic()
48192 // or (SRA (sext_in_reg X), SraConst - ShlConst) in combineShiftRightArithmetic()
48194 // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows in combineShiftRightArithmetic()
48203 if (VT.isVector() || N1.getOpcode() != ISD::Constant || in combineShiftRightArithmetic()
48210 APInt ShlConst = N01->getAsAPIntVal(); in combineShiftRightArithmetic()
48211 APInt SraConst = N1->getAsAPIntVal(); in combineShiftRightArithmetic()
48221 // Only deal with (Size - ShlConst) being equal to 8, 16 or 32. in combineShiftRightArithmetic()
48222 if (ShiftSize >= Size || ShlConst != Size - ShiftSize) in combineShiftRightArithmetic()
48225 DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); in combineShiftRightArithmetic()
48229 return DAG.getNode(ISD::SHL, DL, VT, NN, in combineShiftRightArithmetic()
48230 DAG.getConstant(ShlConst - SraConst, DL, CVT)); in combineShiftRightArithmetic()
48231 return DAG.getNode(ISD::SRA, DL, VT, NN, in combineShiftRightArithmetic()
48232 DAG.getConstant(SraConst - ShlConst, DL, CVT)); in combineShiftRightArithmetic()
48241 SDValue N0 = N->getOperand(0); in combineShiftRightLogical()
48242 SDValue N1 = N->getOperand(1); in combineShiftRightLogical()
48243 EVT VT = N0.getValueType(); in combineShiftRightLogical() local
48244 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in combineShiftRightLogical()
48251 // with out-of-bounds clamping. in combineShiftRightLogical()
48253 supportedVectorVarShift(VT, Subtarget, ISD::SRL)) { in combineShiftRightLogical()
48257 // fold srl(select(icmp_ult(amt,BW),x,0),amt) -> avx2 psrlv(x,amt) in combineShiftRightLogical()
48261 return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1); in combineShiftRightLogical()
48263 // fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt) in combineShiftRightLogical()
48267 return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1); in combineShiftRightLogical()
48277 // TODO: This is a generic DAG combine that became an x86-only combine to in combineShiftRightLogical()
48278 // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and in combineShiftRightLogical()
48279 // and-not ('andn'). in combineShiftRightLogical()
48288 // If we can shrink the constant mask below 8-bits or 32-bits, then this in combineShiftRightLogical()
48290 // from improved known-bits analysis or instruction selection. in combineShiftRightLogical()
48291 APInt MaskVal = AndC->getAPIntValue(); in combineShiftRightLogical()
48300 APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); in combineShiftRightLogical()
48305 // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC) in combineShiftRightLogical()
48306 SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); in combineShiftRightLogical()
48307 SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); in combineShiftRightLogical()
48308 return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); in combineShiftRightLogical()
48315 unsigned Opcode = N->getOpcode(); in combineHorizOpWithShuffle()
48319 EVT VT = N->getValueType(0); in combineHorizOpWithShuffle() local
48320 SDValue N0 = N->getOperand(0); in combineHorizOpWithShuffle()
48321 SDValue N1 = N->getOperand(1); in combineHorizOpWithShuffle()
48325 N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0; in combineHorizOpWithShuffle()
48327 N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1; in combineHorizOpWithShuffle()
48334 if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) { in combineHorizOpWithShuffle()
48342 // shuffle to a v4X64 width - we can probably relax this in the future. in combineHorizOpWithShuffle()
48347 MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; in combineHorizOpWithShuffle()
48351 SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi); in combineHorizOpWithShuffle()
48354 return DAG.getBitcast(VT, Res); in combineHorizOpWithShuffle()
48360 // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(Z,W)) -> SHUFFLE(HOP()). in combineHorizOpWithShuffle()
48361 if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) { in combineHorizOpWithShuffle()
48385 int PostShuffle[4] = {-1, -1, -1, -1}; in combineHorizOpWithShuffle()
48408 MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; in combineHorizOpWithShuffle()
48409 SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); in combineHorizOpWithShuffle()
48412 return DAG.getBitcast(VT, Res); in combineHorizOpWithShuffle()
48417 // Attempt to fold HOP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y)). in combineHorizOpWithShuffle()
48418 if (VT.is256BitVector() && Subtarget.hasInt256()) { in combineHorizOpWithShuffle()
48444 MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; in combineHorizOpWithShuffle()
48445 SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00), in combineHorizOpWithShuffle()
48449 return DAG.getBitcast(VT, Res); in combineHorizOpWithShuffle()
48460 unsigned Opcode = N->getOpcode(); in combineVectorPack()
48464 EVT VT = N->getValueType(0); in combineVectorPack() local
48465 SDValue N0 = N->getOperand(0); in combineVectorPack()
48466 SDValue N1 = N->getOperand(1); in combineVectorPack()
48467 unsigned NumDstElts = VT.getVectorNumElements(); in combineVectorPack()
48468 unsigned DstBitsPerElt = VT.getScalarSizeInBits(); in combineVectorPack()
48479 if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) && in combineVectorPack()
48480 (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) && in combineVectorPack()
48487 unsigned NumLanes = VT.getSizeInBits() / 128; in combineVectorPack()
48527 return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); in combineVectorPack()
48530 // Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()). in combineVectorPack()
48534 // Try to fold PACKSS(NOT(X),NOT(Y)) -> NOT(PACKSS(X,Y)). in combineVectorPack()
48545 DAG.getNode(X86ISD::PACKSS, DL, VT, DAG.getBitcast(SrcVT, Not0), in combineVectorPack()
48547 return DAG.getNOT(DL, Pack, VT); in combineVectorPack()
48554 N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && in combineVectorPack()
48560 return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); in combineVectorPack()
48566 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); in combineVectorPack()
48570 // Try to fold PACK(EXTEND(X),EXTEND(Y)) -> CONCAT(X,Y) subvectors. in combineVectorPack()
48571 if (VT.is128BitVector()) { in combineVectorPack()
48588 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1); in combineVectorPack()
48596 return getEXTEND_VECTOR_INREG(ExtOpc, SDLoc(N), VT, N0.getOperand(0), in combineVectorPack()
48611 assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() || in combineVectorHADDSUB()
48612 X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) && in combineVectorHADDSUB()
48616 MVT VT = N->getSimpleValueType(0); in combineVectorHADDSUB() local
48617 SDValue LHS = N->getOperand(0); in combineVectorHADDSUB()
48618 SDValue RHS = N->getOperand(1); in combineVectorHADDSUB()
48620 // HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)). in combineVectorHADDSUB()
48621 if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && in combineVectorHADDSUB()
48624 N->isOnlyUserOf(LHS.getNode()) && N->isOnlyUserOf(RHS.getNode())) { in combineVectorHADDSUB()
48635 MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); in combineVectorHADDSUB()
48643 return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(VT, NewLHS), in combineVectorHADDSUB()
48644 DAG.getBitcast(VT, NewRHS)); in combineVectorHADDSUB()
48649 // Try to fold HOP(SHUFFLE(),SHUFFLE()) -> SHUFFLE(HOP()). in combineVectorHADDSUB()
48659 assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() || in combineVectorShiftVar()
48660 X86ISD::VSRL == N->getOpcode()) && in combineVectorShiftVar()
48662 EVT VT = N->getValueType(0); in combineVectorShiftVar() local
48663 SDValue N0 = N->getOperand(0); in combineVectorShiftVar()
48664 SDValue N1 = N->getOperand(1); in combineVectorShiftVar()
48666 // Shift zero -> zero. in combineVectorShiftVar()
48668 return DAG.getConstant(0, SDLoc(N), VT); in combineVectorShiftVar()
48676 unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false); in combineVectorShiftVar()
48677 return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0, in combineVectorShiftVar()
48682 APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); in combineVectorShiftVar()
48692 unsigned Opcode = N->getOpcode(); in combineVectorShiftImm()
48697 EVT VT = N->getValueType(0); in combineVectorShiftImm() local
48698 SDValue N0 = N->getOperand(0); in combineVectorShiftImm()
48699 SDValue N1 = N->getOperand(1); in combineVectorShiftImm()
48700 unsigned NumBitsPerElt = VT.getScalarSizeInBits(); in combineVectorShiftImm()
48701 assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && in combineVectorShiftImm()
48705 // (shift undef, X) -> 0 in combineVectorShiftImm()
48707 return DAG.getConstant(0, SDLoc(N), VT); in combineVectorShiftImm()
48711 unsigned ShiftVal = N->getConstantOperandVal(1); in combineVectorShiftImm()
48714 return DAG.getConstant(0, SDLoc(N), VT); in combineVectorShiftImm()
48715 ShiftVal = NumBitsPerElt - 1; in combineVectorShiftImm()
48718 // (shift X, 0) -> X in combineVectorShiftImm()
48722 // (shift 0, C) -> 0 in combineVectorShiftImm()
48726 return DAG.getConstant(0, SDLoc(N), VT); in combineVectorShiftImm()
48728 // (VSRAI -1, C) -> -1 in combineVectorShiftImm()
48732 return DAG.getConstant(-1, SDLoc(N), VT); in combineVectorShiftImm()
48740 return DAG.getConstant(0, SDLoc(N), VT); in combineVectorShiftImm()
48741 NewShiftVal = NumBitsPerElt - 1; in combineVectorShiftImm()
48743 return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0), in combineVectorShiftImm()
48747 // (shift (shift X, C2), C1) -> (shift X, (C1 + C2)) in combineVectorShiftImm()
48751 // (shl (add X, X), C) -> (shl X, (C + 1)) in combineVectorShiftImm()
48765 // psrad(pshufd(psllq(X,63),1,1,3,3),31) -> in combineVectorShiftImm()
48770 N0->hasOneUse()) { in combineVectorShiftImm()
48777 Src = DAG.getBitcast(VT, Src); in combineVectorShiftImm()
48778 Src = DAG.getNode(X86ISD::PSHUFD, DL, VT, Src, in combineVectorShiftImm()
48780 Src = DAG.getNode(X86ISD::VSHLI, DL, VT, Src, N1); in combineVectorShiftImm()
48781 Src = DAG.getNode(X86ISD::VSRAI, DL, VT, Src, N1); in combineVectorShiftImm()
48793 assert(EltBits.size() == VT.getVectorNumElements() && in combineVectorShiftImm()
48811 return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); in combineVectorShiftImm()
48815 if (N->isOnlyUserOf(N0.getNode())) { in combineVectorShiftImm()
48819 // Fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1)) in combineVectorShiftImm()
48823 BC->isOnlyUserOf(BC.getOperand(1).getNode()) && in combineVectorShiftImm()
48827 SDValue LHS = DAG.getNode(Opcode, DL, VT, in combineVectorShiftImm()
48828 DAG.getBitcast(VT, BC.getOperand(0)), N1); in combineVectorShiftImm()
48829 return DAG.getNode(BC.getOpcode(), DL, VT, LHS, RHS); in combineVectorShiftImm()
48845 EVT VT = N->getValueType(0); in combineVectorInsert() local
48846 unsigned Opcode = N->getOpcode(); in combineVectorInsert()
48847 assert(((Opcode == X86ISD::PINSRB && VT == MVT::v16i8) || in combineVectorInsert()
48848 (Opcode == X86ISD::PINSRW && VT == MVT::v8i16) || in combineVectorInsert()
48852 SDValue Vec = N->getOperand(0); in combineVectorInsert()
48853 SDValue Scl = N->getOperand(1); in combineVectorInsert()
48854 SDValue Idx = N->getOperand(2); in combineVectorInsert()
48856 // Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt). in combineVectorInsert()
48858 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Scl); in combineVectorInsert()
48861 unsigned NumBitsPerElt = VT.getScalarSizeInBits(); in combineVectorInsert()
48869 if (VT.isSimple() && DCI.isAfterLegalizeDAG()) { in combineVectorInsert()
48880 /// OR -> CMPNEQSS.
48889 SDValue N0 = N->getOperand(0); in combineCompareEqual()
48890 SDValue N1 = N->getOperand(1); in combineCompareEqual()
48899 SDValue CMP00 = CMP0->getOperand(0); in combineCompareEqual()
48900 SDValue CMP01 = CMP0->getOperand(1); in combineCompareEqual()
48901 EVT VT = CMP00.getValueType(); in combineCompareEqual() local
48903 if (VT == MVT::f32 || VT == MVT::f64 || in combineCompareEqual()
48904 (VT == MVT::f16 && Subtarget.hasFP16())) { in combineCompareEqual()
48907 for (const SDNode *U : N->uses()) { in combineCompareEqual()
48911 switch (U->getOpcode()) { in combineCompareEqual()
48951 N->getSimpleValueType(0)); in combineCompareEqual()
48961 // On a 32-bit target, we cannot bitcast the 64-bit float to a in combineCompareEqual()
48962 // 64-bit integer, since that's not a legal type. Since in combineCompareEqual()
48987 /// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
48989 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP"); in combineAndNotIntoANDNP()
48991 MVT VT = N->getSimpleValueType(0); in combineAndNotIntoANDNP() local
48992 if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) in combineAndNotIntoANDNP()
48996 SDValue N0 = N->getOperand(0); in combineAndNotIntoANDNP()
48997 SDValue N1 = N->getOperand(1); in combineAndNotIntoANDNP()
49008 X = DAG.getBitcast(VT, X); in combineAndNotIntoANDNP()
49009 Y = DAG.getBitcast(VT, Y); in combineAndNotIntoANDNP()
49010 return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); in combineAndNotIntoANDNP()
49015 /// (insert_vector_elt undef, (xor X, -1), Z), undef), Y
49016 /// ->
49021 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDNP"); in combineAndShuffleNot()
49023 EVT VT = N->getValueType(0); in combineAndShuffleNot() local
49026 if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || in combineAndShuffleNot()
49027 ((VT.is256BitVector() || VT.is512BitVector()) && Subtarget.hasAVX()))) in combineAndShuffleNot()
49032 // TODO: SVN->hasOneUse() is a strong condition. It can be relaxed if all in combineAndShuffleNot()
49033 // end-users are ISD::AND including cases in combineAndShuffleNot()
49035 if (!SVN || !SVN->hasOneUse() || !SVN->isSplat() || in combineAndShuffleNot()
49036 !SVN->getOperand(1).isUndef()) { in combineAndShuffleNot()
49039 SDValue IVEN = SVN->getOperand(0); in combineAndShuffleNot()
49044 IVEN.getConstantOperandAPInt(2) != SVN->getSplatIndex()) in combineAndShuffleNot()
49052 return DAG.getVectorShuffle(SVN->getValueType(0), SDLoc(SVN), NotIVEN, in combineAndShuffleNot()
49053 SVN->getOperand(1), SVN->getMask()); in combineAndShuffleNot()
49059 SDValue N0 = N->getOperand(0); in combineAndShuffleNot()
49060 SDValue N1 = N->getOperand(1); in combineAndShuffleNot()
49072 X = DAG.getBitcast(VT, X); in combineAndShuffleNot()
49073 Y = DAG.getBitcast(VT, Y); in combineAndShuffleNot()
49078 if (!Subtarget.useAVX512Regs() && VT.is512BitVector() && in combineAndShuffleNot()
49079 TLI.isTypeLegal(VT.getHalfNumVectorElementsVT(*DAG.getContext()))) { in combineAndShuffleNot()
49087 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoV, HiV}); in combineAndShuffleNot()
49090 if (TLI.isTypeLegal(VT)) in combineAndShuffleNot()
49091 return DAG.getNode(X86ISD::ANDNP, DL, VT, {X, Y}); in combineAndShuffleNot()
49096 // Try to widen AND, OR and XOR nodes to VT in order to remove casts around
49100 // Given a target type \p VT, we generate
49102 // given x, y and z are of type \p VT. We can do so, if operands are either
49103 // truncates from VT types, the second operand is a vector of constants or can
49105 static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, in PromoteMaskArithmetic() argument
49118 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT)) in PromoteMaskArithmetic()
49121 if (SDValue NN0 = PromoteMaskArithmetic(N0, DL, VT, DAG, Depth + 1)) in PromoteMaskArithmetic()
49129 if (N0.getOperand(0).getValueType() != VT) in PromoteMaskArithmetic()
49135 if (SDValue NN1 = PromoteMaskArithmetic(N1, DL, VT, DAG, Depth + 1)) in PromoteMaskArithmetic()
49140 N1.getOperand(0).getValueType() == VT; in PromoteMaskArithmetic()
49144 DAG.FoldConstantArithmetic(ISD::ZERO_EXTEND, DL, VT, {N1})) in PromoteMaskArithmetic()
49150 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1); in PromoteMaskArithmetic()
49154 // register. In most cases we actually compare or select YMM-sized registers
49157 // Even with AVX-512 this is still useful for removing casts around logical
49162 EVT VT = N.getValueType(); in PromoteMaskArithmetic() local
49163 assert(VT.isVector() && "Expected vector type"); in PromoteMaskArithmetic()
49172 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, 0); in PromoteMaskArithmetic()
49182 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, in PromoteMaskArithmetic()
49190 // clang-format off in convertIntLogicToFPLogicOpcode()
49195 // clang-format on in convertIntLogicToFPLogicOpcode()
49200 /// If both input operands of a logic op are being cast from floating-point
49201 /// types or FP compares, try to convert this into a floating-point logic node
49206 EVT VT = N->getValueType(0); in convertIntLogicToFPLogic() local
49207 SDValue N0 = N->getOperand(0); in convertIntLogicToFPLogic()
49208 SDValue N1 = N->getOperand(1); in convertIntLogicToFPLogic()
49227 unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode()); in convertIntLogicToFPLogic()
49229 return DAG.getBitcast(VT, FPLogic); in convertIntLogicToFPLogic()
49232 if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() || in convertIntLogicToFPLogic()
49236 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get(); in convertIntLogicToFPLogic()
49237 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get(); in convertIntLogicToFPLogic()
49247 // logic (setcc N00, N01), (setcc N10, N11) --> in convertIntLogicToFPLogic()
49261 SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1); in convertIntLogicToFPLogic()
49262 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex); in convertIntLogicToFPLogic()
49265 // Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
49266 // to reduce XMM->GPR traffic.
49268 unsigned Opc = N->getOpcode(); in combineBitOpWithMOVMSK()
49272 SDValue N0 = N->getOperand(0); in combineBitOpWithMOVMSK()
49273 SDValue N1 = N->getOperand(1); in combineBitOpWithMOVMSK()
49299 // Attempt to fold BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z).
49303 unsigned Opc = N->getOpcode(); in combineBitOpWithShift()
49307 SDValue N0 = N->getOperand(0); in combineBitOpWithShift()
49308 SDValue N1 = N->getOperand(1); in combineBitOpWithShift()
49309 EVT VT = N->getValueType(0); in combineBitOpWithShift() local
49321 if (BCOpc != BC1->getOpcode() || BCVT != BC1.getValueType()) in combineBitOpWithShift()
49335 return DAG.getBitcast(VT, Shift); in combineBitOpWithShift()
49343 // BITOP(PACKSS(X,Z),PACKSS(Y,W)) --> PACKSS(BITOP(X,Y),BITOP(Z,W)).
49346 unsigned Opc = N->getOpcode(); in combineBitOpWithPACK()
49350 SDValue N0 = N->getOperand(0); in combineBitOpWithPACK()
49351 SDValue N1 = N->getOperand(1); in combineBitOpWithPACK()
49352 EVT VT = N->getValueType(0); in combineBitOpWithPACK() local
49382 return DAG.getBitcast(VT, DAG.getNode(X86ISD::PACKSS, DL, DstVT, LHS, RHS)); in combineBitOpWithPACK()
49385 /// If this is a zero/all-bits result that is bitwise-anded with a low bits
49387 /// with a shift-right to eliminate loading the vector constant mask value.
49390 SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); in combineAndMaskToShift()
49391 SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); in combineAndMaskToShift()
49392 EVT VT = Op0.getValueType(); in combineAndMaskToShift() local
49393 if (VT != Op1.getValueType() || !VT.isSimple() || !VT.isInteger()) in combineAndMaskToShift()
49397 // shift and "andn". This saves a materialization of a -1 vector constant. in combineAndMaskToShift()
49400 // and (pcmpgt X, -1), Y --> pandn (vsrai X, BitWidth - 1), Y in combineAndMaskToShift()
49405 if (N->getValueType(0) == VT && in combineAndMaskToShift()
49406 supportedVectorShiftWithImm(VT, Subtarget, ISD::SRA)) { in combineAndMaskToShift()
49420 getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X, in combineAndMaskToShift()
49421 VT.getScalarSizeInBits() - 1, DAG); in combineAndMaskToShift()
49422 return DAG.getNode(X86ISD::ANDNP, DL, VT, Sra, Y); in combineAndMaskToShift()
49434 if (!supportedVectorShiftWithImm(VT, Subtarget, ISD::SRL)) in combineAndMaskToShift()
49437 unsigned EltBitWidth = VT.getScalarSizeInBits(); in combineAndMaskToShift()
49443 SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); in combineAndMaskToShift()
49444 SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt); in combineAndMaskToShift()
49445 return DAG.getBitcast(N->getValueType(0), Shift); in combineAndMaskToShift()
49451 if (Ld->isIndexed()) in getIndexFromUnindexedLoad()
49454 SDValue Base = Ld->getBasePtr(); in getIndexFromUnindexedLoad()
49468 static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { in hasBZHI() argument
49470 (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())); in hasBZHI()
49474 // 'and-load' sequence.
49478 // int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1}
49485 MVT VT = Node->getSimpleValueType(0); in combineAndLoadToBZHI() local
49489 if (!hasBZHI(Subtarget, VT)) in combineAndLoadToBZHI()
49494 SDValue N = Node->getOperand(i); in combineAndLoadToBZHI()
49501 const Value *MemOp = Ld->getMemOperand()->getValue(); in combineAndLoadToBZHI()
49507 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) { in combineAndLoadToBZHI()
49508 if (GV->isConstant() && GV->hasDefinitiveInitializer()) { in combineAndLoadToBZHI()
49510 Constant *Init = GV->getInitializer(); in combineAndLoadToBZHI()
49511 Type *Ty = Init->getType(); in combineAndLoadToBZHI()
49513 !Ty->getArrayElementType()->isIntegerTy() || in combineAndLoadToBZHI()
49514 Ty->getArrayElementType()->getScalarSizeInBits() != in combineAndLoadToBZHI()
49515 VT.getSizeInBits() || in combineAndLoadToBZHI()
49516 Ty->getArrayNumElements() > in combineAndLoadToBZHI()
49517 Ty->getArrayElementType()->getScalarSizeInBits()) in combineAndLoadToBZHI()
49521 uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); in combineAndLoadToBZHI()
49524 auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j)); in combineAndLoadToBZHI()
49525 if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { in combineAndLoadToBZHI()
49533 // Do the transformation (For 32-bit type): in combineAndLoadToBZHI()
49534 // -> (and (load arr[idx]), inp) in combineAndLoadToBZHI()
49535 // <- (and (srl 0xFFFFFFFF, (sub 32, idx))) in combineAndLoadToBZHI()
49537 SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); in combineAndLoadToBZHI()
49538 SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); in combineAndLoadToBZHI()
49549 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); in combineAndLoadToBZHI()
49550 SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); in combineAndLoadToBZHI()
49552 return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); in combineAndLoadToBZHI()
49562 // where the setcc will freely 0 upper bits of k-register. We can replace the
49567 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); in combineScalarAndWithMaskSetcc()
49569 EVT VT = N->getValueType(0); in combineScalarAndWithMaskSetcc() local
49573 auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); in combineScalarAndWithMaskSetcc()
49578 assert(!VT.isVector() && "Expected scalar VT!"); in combineScalarAndWithMaskSetcc()
49580 SDValue Src = N->getOperand(0); in combineScalarAndWithMaskSetcc()
49612 !C1->getAPIntValue().isMask(SubVecVT.getVectorNumElements())) in combineScalarAndWithMaskSetcc()
49634 // and cast it back to VT. in combineScalarAndWithMaskSetcc()
49642 return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT); in combineScalarAndWithMaskSetcc()
49651 // Only do this re-ordering if op has one use. in getBMIMatchingOp()
49666 Op.getOperand(1 - OpIdx)); in getBMIMatchingOp()
49682 // BLSR: (and x, (add x, -1)) in getBMIMatchingOp()
49683 // BLSMSK: (xor x, (add x, -1)) in getBMIMatchingOp()
49692 EVT VT = N->getValueType(0); in combineBMILogicOp() local
49694 if (!Subtarget.hasBMI() || !VT.isScalarInteger() || in combineBMILogicOp()
49695 (VT != MVT::i32 && VT != MVT::i64)) in combineBMILogicOp()
49698 assert(N->getOpcode() == ISD::AND || N->getOpcode() == ISD::XOR); in combineBMILogicOp()
49703 getBMIMatchingOp(N->getOpcode(), DAG, N->getOperand(OpIdx), in combineBMILogicOp()
49704 N->getOperand(1 - OpIdx), 0)) in combineBMILogicOp()
49715 // -> in combineX86SubCmpForFlags()
49721 // -> in combineX86SubCmpForFlags()
49727 SDValue SetCC = N->getOperand(0); in combineX86SubCmpForFlags()
49733 SDNode *BrCond = *Flag->uses().begin(); in combineX86SubCmpForFlags()
49734 if (BrCond->getOpcode() != X86ISD::BRCOND) in combineX86SubCmpForFlags()
49737 if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) != in combineX86SubCmpForFlags()
49744 if (N->getOpcode() == X86ISD::SUB) in combineX86SubCmpForFlags()
49745 X = DAG.getMergeValues({N->getOperand(0), X}, SDLoc(N)); in combineX86SubCmpForFlags()
49749 static_cast<X86::CondCode>(CCN->getAsAPIntVal().getSExtValue()); in combineX86SubCmpForFlags()
49755 SmallVector<SDValue> Ops(BrCond->op_values()); in combineX86SubCmpForFlags()
49756 if (isNullConstant(N->getOperand(1))) in combineX86SubCmpForFlags()
49758 else if (isOneConstant(N->getOperand(1))) in combineX86SubCmpForFlags()
49764 DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops); in combineX86SubCmpForFlags()
49765 // Avoid self-assign error b/c CC1 can be `e/ne`. in combineX86SubCmpForFlags()
49775 // -> in combineAndOrForCcmpCtest()
49779 // -> in combineAndOrForCcmpCtest()
49787 SDValue SetCC0 = N->getOperand(0); in combineAndOrForCcmpCtest()
49788 SDValue SetCC1 = N->getOperand(1); in combineAndOrForCcmpCtest()
49793 auto GetCombineToOpc = [&](SDValue V) -> unsigned { in combineAndOrForCcmpCtest()
49819 bool IsOR = N->getOpcode() == ISD::OR; in combineAndOrForCcmpCtest()
49830 static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue()); in combineAndOrForCcmpCtest()
49854 SDValue N0 = N->getOperand(0); in combineAnd()
49855 SDValue N1 = N->getOperand(1); in combineAnd()
49856 EVT VT = N->getValueType(0); in combineAnd() local
49861 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { in combineAnd()
49868 // Use a 32-bit and+zext if upper bits known zero. in combineAnd()
49869 if (VT == MVT::i64 && Subtarget.is64Bit() && !isa<ConstantSDNode>(N1)) { in combineAnd()
49880 // Match all-of bool scalar reductions into a bitcast/movmsk + cmp. in combineAnd()
49882 if (VT == MVT::i1) { in combineAnd()
49903 // `(-x << C0) & C1` in combineAnd()
49905 // `(x * (Pow2_Ceil(C1) - (1 << C0))) & C1` in combineAnd()
49918 const APInt &MulC = N01C->getAPIntValue(); in combineAnd()
49919 const APInt &AndC = N1C->getAPIntValue(); in combineAnd()
49920 APInt MulCLowBit = MulC & (-MulC); in combineAnd()
49923 SDValue Neg = DAG.getNegative(N0.getOperand(0), dl, VT); in combineAnd()
49925 assert(MulCLowBitLog != -1 && in combineAnd()
49927 SDValue Shift = DAG.getNode(ISD::SHL, dl, VT, Neg, in combineAnd()
49928 DAG.getConstant(MulCLowBitLog, dl, VT)); in combineAnd()
49929 return DAG.getNode(ISD::AND, dl, VT, Shift, N1); in combineAnd()
49970 // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) in combineAnd()
49971 // iff c2 is all/no bits mask - i.e. a select-with-zero mask. in combineAnd()
49973 if (VT.isVector() && getTargetConstantFromNode(N1)) { in combineAnd()
49977 DAG.ComputeNumSignBits(N1) == VT.getScalarSizeInBits() && in combineAnd()
49978 N0->hasOneUse() && N0.getOperand(1)->hasOneUse()) { in combineAnd()
49979 SDValue MaskMul = DAG.getNode(ISD::AND, dl, VT, N0.getOperand(1), N1); in combineAnd()
49980 return DAG.getNode(Opc0, dl, VT, N0.getOperand(0), MaskMul); in combineAnd()
49984 // Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant in combineAnd()
49986 if (isOneConstant(N1) && N0->hasOneUse()) { in combineAnd()
49990 Src.getOperand(0)->hasOneUse()) in combineAnd()
50011 if (!(Subtarget.hasBMI2() && !ContainsNOT && VT.getSizeInBits() >= 32)) in combineAnd()
50013 return DAG.getZExtOrTrunc(getSETCC(X86CC, BT, dl, DAG), dl, VT); in combineAnd()
50017 if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { in combineAnd()
50028 int NumElts = VT.getVectorNumElements(); in combineAnd()
50029 int EltSizeInBits = VT.getScalarSizeInBits(); in combineAnd()
50038 // We can't assume an undef src element gives an undef dst - the in combineAnd()
50059 if (N->getOpcode() != ISD::DELETED_NODE) in combineAnd()
50067 return DAG.getNode(ISD::AND, dl, VT, NewN0 ? NewN0 : N0, in combineAnd()
50072 if ((VT.getScalarSizeInBits() % 8) == 0 && in combineAnd()
50074 isa<ConstantSDNode>(N0.getOperand(1)) && N0->hasOneUse()) { in combineAnd()
50082 if (VT == SrcVecVT.getScalarType() && N0->isOnlyUserOf(SrcVec.getNode()) && in combineAnd()
50105 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle, in combineAnd()
50116 // Canonicalize OR(AND(X,C),AND(Y,~C)) -> OR(AND(X,C),ANDNP(C,Y))
50119 assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); in canonicalizeBitSelect()
50121 MVT VT = N->getSimpleValueType(0); in canonicalizeBitSelect() local
50122 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in canonicalizeBitSelect()
50123 if (!VT.isVector() || (EltSizeInBits % 8) != 0) in canonicalizeBitSelect()
50126 SDValue N0 = peekThroughBitcasts(N->getOperand(0)); in canonicalizeBitSelect()
50127 SDValue N1 = peekThroughBitcasts(N->getOperand(1)); in canonicalizeBitSelect()
50133 if (!(Subtarget.hasXOP() || useVPTERNLOG(Subtarget, VT) || in canonicalizeBitSelect()
50150 // TODO - add UNDEF elts support. in canonicalizeBitSelect()
50159 if (useVPTERNLOG(Subtarget, VT)) { in canonicalizeBitSelect()
50160 // Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C. in canonicalizeBitSelect()
50161 // VPTERNLOG is only available as vXi32/64-bit types. in canonicalizeBitSelect()
50164 MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits()); in canonicalizeBitSelect()
50171 return DAG.getBitcast(VT, Res); in canonicalizeBitSelect()
50174 SDValue X = N->getOperand(0); in canonicalizeBitSelect()
50176 DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)), in canonicalizeBitSelect()
50177 DAG.getBitcast(VT, N1.getOperand(0))); in canonicalizeBitSelect()
50178 return DAG.getNode(ISD::OR, DL, VT, X, Y); in canonicalizeBitSelect()
50183 if (N->getOpcode() != ISD::OR) in matchLogicBlend()
50186 SDValue N0 = N->getOperand(0); in matchLogicBlend()
50187 SDValue N1 = N->getOperand(1); in matchLogicBlend()
50208 // TODO: Attempt to match against AND(XOR(-1,M),Y) as well, waiting for in matchLogicBlend()
50223 assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); in combineLogicBlendIntoPBLENDV()
50225 EVT VT = N->getValueType(0); in combineLogicBlendIntoPBLENDV() local
50226 if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || in combineLogicBlendIntoPBLENDV()
50227 (VT.is256BitVector() && Subtarget.hasInt256()))) in combineLogicBlendIntoPBLENDV()
50249 if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL, in combineLogicBlendIntoPBLENDV()
50261 MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8; in combineLogicBlendIntoPBLENDV()
50267 return DAG.getBitcast(VT, Mask); in combineLogicBlendIntoPBLENDV()
50278 EVT VT = Cmp.getOperand(0).getValueType(); in lowerX86CmpEqZeroToCtlzSrl() local
50279 unsigned Log2b = Log2_32(VT.getSizeInBits()); in lowerX86CmpEqZeroToCtlzSrl()
50281 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); in lowerX86CmpEqZeroToCtlzSrl()
50282 // The result of the shift is true or false, and on X86, the 32-bit in lowerX86CmpEqZeroToCtlzSrl()
50300 if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) in combineOrCmpEqZeroToCtlzSrl()
50304 return (N->getOpcode() == ISD::OR && N->hasOneUse()); in combineOrCmpEqZeroToCtlzSrl()
50307 // Check the zero extend is extending to 32-bit or more. The code generated by in combineOrCmpEqZeroToCtlzSrl()
50308 // srl(ctlz) for 16-bit or less variants of the pattern would require extra in combineOrCmpEqZeroToCtlzSrl()
50310 if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || in combineOrCmpEqZeroToCtlzSrl()
50311 !isORCandidate(N->getOperand(0))) in combineOrCmpEqZeroToCtlzSrl()
50316 return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && in combineOrCmpEqZeroToCtlzSrl()
50317 X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && in combineOrCmpEqZeroToCtlzSrl()
50318 N->getOperand(1).getOpcode() == X86ISD::CMP && in combineOrCmpEqZeroToCtlzSrl()
50319 isNullConstant(N->getOperand(1).getOperand(1)) && in combineOrCmpEqZeroToCtlzSrl()
50320 N->getOperand(1).getValueType().bitsGE(MVT::i32); in combineOrCmpEqZeroToCtlzSrl()
50323 SDNode *OR = N->getOperand(0).getNode(); in combineOrCmpEqZeroToCtlzSrl()
50324 SDValue LHS = OR->getOperand(0); in combineOrCmpEqZeroToCtlzSrl()
50325 SDValue RHS = OR->getOperand(1); in combineOrCmpEqZeroToCtlzSrl()
50332 OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); in combineOrCmpEqZeroToCtlzSrl()
50333 LHS = OR->getOperand(0); in combineOrCmpEqZeroToCtlzSrl()
50334 RHS = OR->getOperand(1); in combineOrCmpEqZeroToCtlzSrl()
50358 LHS = OR->getOperand(0); in combineOrCmpEqZeroToCtlzSrl()
50359 RHS = OR->getOperand(1); in combineOrCmpEqZeroToCtlzSrl()
50361 if (RHS->getOpcode() == ISD::OR) in combineOrCmpEqZeroToCtlzSrl()
50369 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); in combineOrCmpEqZeroToCtlzSrl()
50375 if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse()) in foldMaskedMergeImpl()
50377 SDValue NotOp = And0_L->getOperand(0); in foldMaskedMergeImpl()
50384 // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R in foldMaskedMergeImpl()
50385 EVT VT = And1_L->getValueType(0); in foldMaskedMergeImpl() local
50386 SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R); in foldMaskedMergeImpl()
50387 SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R); in foldMaskedMergeImpl()
50388 SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp); in foldMaskedMergeImpl()
50389 SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R); in foldMaskedMergeImpl()
50396 /// "and-not" operation. This function is intended to be called from a
50399 // Note that masked-merge variants using XOR or ADD expressions are in foldMaskedMerge()
50401 assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node"); in foldMaskedMerge()
50402 SDValue N0 = Node->getOperand(0); in foldMaskedMerge()
50403 if (N0->getOpcode() != ISD::AND || !N0->hasOneUse()) in foldMaskedMerge()
50405 SDValue N1 = Node->getOperand(1); in foldMaskedMerge()
50406 if (N1->getOpcode() != ISD::AND || !N1->hasOneUse()) in foldMaskedMerge()
50410 SDValue N00 = N0->getOperand(0); in foldMaskedMerge()
50411 SDValue N01 = N0->getOperand(1); in foldMaskedMerge()
50412 SDValue N10 = N1->getOperand(0); in foldMaskedMerge()
50413 SDValue N11 = N1->getOperand(1); in foldMaskedMerge()
50429 static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT, in combineAddOrSubToADCOrSBB() argument
50433 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) in combineAddOrSubToADCOrSBB()
50436 // Look through a one-use zext. in combineAddOrSubToADCOrSBB()
50453 // If X is -1 or 0, then we have an opportunity to avoid constants required in in combineAddOrSubToADCOrSBB()
50457 if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) || in combineAddOrSubToADCOrSBB()
50458 (IsSub && CC == X86::COND_B && ConstantX->isZero())) { in combineAddOrSubToADCOrSBB()
50459 // This is a complicated way to get -1 or 0 from the carry flag: in combineAddOrSubToADCOrSBB()
50460 // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax in combineAddOrSubToADCOrSBB()
50461 // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax in combineAddOrSubToADCOrSBB()
50462 return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in combineAddOrSubToADCOrSBB()
50467 if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) || in combineAddOrSubToADCOrSBB()
50468 (IsSub && CC == X86::COND_A && ConstantX->isZero())) { in combineAddOrSubToADCOrSBB()
50473 // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB in combineAddOrSubToADCOrSBB()
50474 // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB in combineAddOrSubToADCOrSBB()
50476 X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), in combineAddOrSubToADCOrSBB()
50479 return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in combineAddOrSubToADCOrSBB()
50487 // X + SETB Z --> adc X, 0 in combineAddOrSubToADCOrSBB()
50488 // X - SETB Z --> sbb X, 0 in combineAddOrSubToADCOrSBB()
50490 DAG.getVTList(VT, MVT::i32), X, in combineAddOrSubToADCOrSBB()
50491 DAG.getConstant(0, DL, VT), EFLAGS); in combineAddOrSubToADCOrSBB()
50504 if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && in combineAddOrSubToADCOrSBB()
50508 DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), in combineAddOrSubToADCOrSBB()
50512 DAG.getVTList(VT, MVT::i32), X, in combineAddOrSubToADCOrSBB()
50513 DAG.getConstant(0, DL, VT), NewEFLAGS); in combineAddOrSubToADCOrSBB()
50518 // X + SETAE --> sbb X, -1 in combineAddOrSubToADCOrSBB()
50519 // X - SETAE --> adc X, -1 in combineAddOrSubToADCOrSBB()
50521 DAG.getVTList(VT, MVT::i32), X, in combineAddOrSubToADCOrSBB()
50522 DAG.getConstant(-1, DL, VT), EFLAGS); in combineAddOrSubToADCOrSBB()
50526 // X + SETBE --> sbb X, -1 in combineAddOrSubToADCOrSBB()
50527 // X - SETBE --> adc X, -1 in combineAddOrSubToADCOrSBB()
50534 if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && in combineAddOrSubToADCOrSBB()
50538 DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), in combineAddOrSubToADCOrSBB()
50542 DAG.getVTList(VT, MVT::i32), X, in combineAddOrSubToADCOrSBB()
50543 DAG.getConstant(-1, DL, VT), NewEFLAGS); in combineAddOrSubToADCOrSBB()
50558 // If X is -1 or 0, then we have an opportunity to avoid constants required in in combineAddOrSubToADCOrSBB()
50561 // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with in combineAddOrSubToADCOrSBB()
50563 // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) in combineAddOrSubToADCOrSBB()
50564 // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z) in combineAddOrSubToADCOrSBB()
50565 if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) || in combineAddOrSubToADCOrSBB()
50566 (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) { in combineAddOrSubToADCOrSBB()
50570 return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in combineAddOrSubToADCOrSBB()
50575 // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb' in combineAddOrSubToADCOrSBB()
50577 // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50578 // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50579 if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) || in combineAddOrSubToADCOrSBB()
50580 (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) { in combineAddOrSubToADCOrSBB()
50584 return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in combineAddOrSubToADCOrSBB()
50596 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in combineAddOrSubToADCOrSBB()
50598 // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50599 // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50602 DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); in combineAddOrSubToADCOrSBB()
50604 // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50605 // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1) in combineAddOrSubToADCOrSBB()
50607 DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); in combineAddOrSubToADCOrSBB()
50615 bool IsSub = N->getOpcode() == ISD::SUB; in combineAddOrSubToADCOrSBB()
50616 SDValue X = N->getOperand(0); in combineAddOrSubToADCOrSBB()
50617 SDValue Y = N->getOperand(1); in combineAddOrSubToADCOrSBB()
50618 EVT VT = N->getValueType(0); in combineAddOrSubToADCOrSBB() local
50620 if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG)) in combineAddOrSubToADCOrSBB()
50624 if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) { in combineAddOrSubToADCOrSBB()
50626 ADCOrSBB = DAG.getNegative(ADCOrSBB, DL, VT); in combineAddOrSubToADCOrSBB()
50635 assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::OR) && in combineOrXorWithSETCC()
50647 bool IsSub = N->getOpcode() == ISD::XOR; in combineOrXorWithSETCC()
50648 bool N1COdd = N1C->getZExtValue() & 1; in combineOrXorWithSETCC()
50651 EVT VT = N->getValueType(0); in combineOrXorWithSETCC() local
50652 if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG)) in combineOrXorWithSETCC()
50658 // not(pcmpeq(and(X,CstPow2),0)) -> pcmpeq(and(X,CstPow2),CstPow2) in combineOrXorWithSETCC()
50659 if (N->getOpcode() == ISD::XOR && N0.getOpcode() == X86ISD::PCMPEQ && in combineOrXorWithSETCC()
50663 MVT VT = N->getSimpleValueType(0); in combineOrXorWithSETCC() local
50667 VT.getScalarSizeInBits(), UndefElts, in combineOrXorWithSETCC()
50674 return DAG.getNode(X86ISD::PCMPEQ, SDLoc(N), VT, N0.getOperand(0), in combineOrXorWithSETCC()
50685 SDValue N0 = N->getOperand(0); in combineOr()
50686 SDValue N1 = N->getOperand(1); in combineOr()
50687 EVT VT = N->getValueType(0); in combineOr() local
50692 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { in combineOr()
50699 // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. in combineOr()
50701 if (VT == MVT::i1) { in combineOr()
50749 // (0 - SetCC) | C -> (zext (not SetCC)) * (C + 1) - 1 if we can get a LEA out of it. in combineOr()
50750 if ((VT == MVT::i32 || VT == MVT::i64) && in combineOr()
50759 uint64_t Val = CN->getZExtValue(); in combineOr()
50765 SDValue R = DAG.getZExtOrTrunc(NotCond, dl, VT); in combineOr()
50766 R = DAG.getNode(ISD::MUL, dl, VT, R, DAG.getConstant(Val + 1, dl, VT)); in combineOr()
50767 R = DAG.getNode(ISD::SUB, dl, VT, R, DAG.getConstant(1, dl, VT)); in combineOr()
50774 // Combine OR(X,KSHIFTL(Y,Elts/2)) -> CONCAT_VECTORS(X,Y) == KUNPCK(X,Y). in combineOr()
50775 // Combine OR(KSHIFTL(X,Elts/2),Y) -> CONCAT_VECTORS(Y,X) == KUNPCK(Y,X). in combineOr()
50776 // iff the upper elements of the non-shifted arg are zero. in combineOr()
50779 unsigned NumElts = VT.getVectorNumElements(); in combineOr()
50786 ISD::CONCAT_VECTORS, dl, VT, in combineOr()
50794 ISD::CONCAT_VECTORS, dl, VT, in combineOr()
50800 if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { in combineOr()
50811 int NumElts = VT.getVectorNumElements(); in combineOr()
50812 int EltSizeInBits = VT.getScalarSizeInBits(); in combineOr()
50824 if (N->getOpcode() != ISD::DELETED_NODE) in combineOr()
50831 if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1) in combineOr()
50842 /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
50844 /// SETGT(X, -1)
50847 EVT ResultType = N->getValueType(0); in foldXorTruncShiftIntoCmp()
50851 SDValue N0 = N->getOperand(0); in foldXorTruncShiftIntoCmp()
50852 SDValue N1 = N->getOperand(1); in foldXorTruncShiftIntoCmp()
50874 Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1)) in foldXorTruncShiftIntoCmp()
50877 // Create a greater-than comparison against -1. in foldXorTruncShiftIntoCmp()
50887 DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); in foldXorTruncShiftIntoCmp()
50894 /// xor (sra X, elt_size(X)-1), -1
50896 /// pcmpgt X, -1
50902 EVT VT = N->getValueType(0); in foldVectorXorShiftIntoCmp() local
50903 if (!VT.isSimple()) in foldVectorXorShiftIntoCmp()
50906 switch (VT.getSimpleVT().SimpleTy) { in foldVectorXorShiftIntoCmp()
50907 // clang-format off in foldVectorXorShiftIntoCmp()
50917 // clang-format on in foldVectorXorShiftIntoCmp()
50922 SDValue Shift = N->getOperand(0); in foldVectorXorShiftIntoCmp()
50923 SDValue Ones = N->getOperand(1); in foldVectorXorShiftIntoCmp()
50932 ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1)) in foldVectorXorShiftIntoCmp()
50935 // Create a greater-than comparison against -1. We don't use the more obvious in foldVectorXorShiftIntoCmp()
50936 // greater-than-or-equal-to-zero because SSE/AVX don't have that instruction. in foldVectorXorShiftIntoCmp()
50937 return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); in foldVectorXorShiftIntoCmp()
50956 static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, in detectUSatPattern() argument
50960 // Saturation with truncation. We truncate from InVT to VT. in detectUSatPattern()
50961 assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && in detectUSatPattern()
50965 auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue { in detectUSatPattern()
50976 if (C2.isMask(VT.getScalarSizeInBits())) in detectUSatPattern()
50981 if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits())) in detectUSatPattern()
50986 if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) && in detectUSatPattern()
51003 static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { in detectSSatPattern() argument
51004 unsigned NumDstBits = VT.getScalarSizeInBits(); in detectSSatPattern()
51009 const APInt &Limit) -> SDValue { in detectSSatPattern()
51037 static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, in combineTruncateWithSat() argument
51040 if (!Subtarget.hasSSE2() || !VT.isVector()) in combineTruncateWithSat()
51043 EVT SVT = VT.getVectorElementType(); in combineTruncateWithSat()
51047 // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is in combineTruncateWithSat()
51048 // split across two registers. We can use a packusdw+perm to clamp to 0-65535 in combineTruncateWithSat()
51050 // clip to 0-255. in combineTruncateWithSat()
51052 InVT == MVT::v16i32 && VT == MVT::v16i8) { in combineTruncateWithSat()
51053 if (SDValue USatVal = detectSSatPattern(In, VT, true)) { in combineTruncateWithSat()
51058 return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); in combineTruncateWithSat()
51064 // For 256-bit or smaller vectors, we require VLX. in combineTruncateWithSat()
51066 // If the result type is 256-bits or larger and we have disable 512-bit in combineTruncateWithSat()
51072 !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); in combineTruncateWithSat()
51074 if (!PreferAVX512 && VT.getVectorNumElements() > 1 && in combineTruncateWithSat()
51075 isPowerOf2_32(VT.getVectorNumElements()) && in combineTruncateWithSat()
51078 if (SDValue USatVal = detectSSatPattern(In, VT, true)) { in combineTruncateWithSat()
51079 // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). in combineTruncateWithSat()
51081 EVT MidVT = VT.changeVectorElementType(MVT::i16); in combineTruncateWithSat()
51085 SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, in combineTruncateWithSat()
51090 return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, in combineTruncateWithSat()
51093 if (SDValue SSatVal = detectSSatPattern(In, VT)) in combineTruncateWithSat()
51094 return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, in combineTruncateWithSat()
51104 if (SDValue SSatVal = detectSSatPattern(In, VT)) { in combineTruncateWithSat()
51107 } else if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) { in combineTruncateWithSat()
51112 unsigned ResElts = VT.getVectorNumElements(); in combineTruncateWithSat()
51129 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, in combineTruncateWithSat()
51142 EVT RegVT = Ld->getValueType(0); in combineConstantPoolLoads()
51143 SDValue Ptr = Ld->getBasePtr(); in combineConstantPoolLoads()
51144 SDValue Chain = Ld->getChain(); in combineConstantPoolLoads()
51145 ISD::LoadExtType Ext = Ld->getExtensionType(); in combineConstantPoolLoads()
51147 if (Ext != ISD::NON_EXTLOAD || !Subtarget.hasAVX() || !Ld->isSimple()) in combineConstantPoolLoads()
51170 for (SDNode *User : Chain->uses()) { in combineConstantPoolLoads()
51173 (User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD || in combineConstantPoolLoads()
51174 User->getOpcode() == X86ISD::VBROADCAST_LOAD || in combineConstantPoolLoads()
51176 UserLd->getChain() == Chain && !User->hasAnyUseOfValue(1) && in combineConstantPoolLoads()
51177 User->getValueSizeInBits(0).getFixedValue() > in combineConstantPoolLoads()
51179 EVT UserVT = User->getValueType(0); in combineConstantPoolLoads()
51180 SDValue UserPtr = UserLd->getBasePtr(); in combineConstantPoolLoads()
51186 unsigned LdSize = LdC->getType()->getPrimitiveSizeInBits(); in combineConstantPoolLoads()
51187 unsigned UserSize = UserC->getType()->getPrimitiveSizeInBits(); in combineConstantPoolLoads()
51216 EVT RegVT = Ld->getValueType(0); in combineLoad()
51217 EVT MemVT = Ld->getMemoryVT(); in combineLoad()
51221 // For chips with slow 32-byte unaligned loads, break the 32-byte operation in combineLoad()
51222 // into two 16-byte operations. Also split non-temporal aligned loads on in combineLoad()
51223 // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. in combineLoad()
51224 ISD::LoadExtType Ext = Ld->getExtensionType(); in combineLoad()
51228 ((Ld->isNonTemporal() && !Subtarget.hasInt256() && in combineLoad()
51229 Ld->getAlign() >= Align(16)) || in combineLoad()
51231 *Ld->getMemOperand(), &Fast) && in combineLoad()
51238 SDValue Ptr1 = Ld->getBasePtr(); in combineLoad()
51244 DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(), in combineLoad()
51245 Ld->getOriginalAlign(), in combineLoad()
51246 Ld->getMemOperand()->getFlags()); in combineLoad()
51247 SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2, in combineLoad()
51248 Ld->getPointerInfo().getWithOffset(HalfOffset), in combineLoad()
51249 Ld->getOriginalAlign(), in combineLoad()
51250 Ld->getMemOperand()->getFlags()); in combineLoad()
51258 // Bool vector load - attempt to cast to an integer, as we have good in combineLoad()
51265 SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(), in combineLoad()
51266 Ld->getPointerInfo(), in combineLoad()
51267 Ld->getOriginalAlign(), in combineLoad()
51268 Ld->getMemOperand()->getFlags()); in combineLoad()
51276 if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() && in combineLoad()
51278 SDValue Ptr = Ld->getBasePtr(); in combineLoad()
51279 SDValue Chain = Ld->getChain(); in combineLoad()
51280 for (SDNode *User : Chain->uses()) { in combineLoad()
51283 User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && in combineLoad()
51284 UserLd->getChain() == Chain && UserLd->getBasePtr() == Ptr && in combineLoad()
51285 UserLd->getMemoryVT().getSizeInBits() == MemVT.getSizeInBits() && in combineLoad()
51286 !User->hasAnyUseOfValue(1) && in combineLoad()
51287 User->getValueSizeInBits(0).getFixedValue() > in combineLoad()
51301 unsigned AddrSpace = Ld->getAddressSpace(); in combineLoad()
51305 if (PtrVT != Ld->getBasePtr().getSimpleValueType()) { in combineLoad()
51307 DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0); in combineLoad()
51308 return DAG.getExtLoad(Ext, dl, RegVT, Ld->getChain(), Cast, in combineLoad()
51309 Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), in combineLoad()
51310 Ld->getMemOperand()->getFlags()); in combineLoad()
51319 /// Otherwise, return -1.
51329 if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1) in getOneTrueElt()
51330 return -1; in getOneTrueElt()
51332 int TrueIndex = -1; in getOneTrueElt()
51333 unsigned NumElts = BV->getValueType(0).getVectorNumElements(); in getOneTrueElt()
51335 const SDValue &Op = BV->getOperand(i); in getOneTrueElt()
51340 return -1; in getOneTrueElt()
51341 if (ConstNode->getAPIntValue().countr_one() >= 1) { in getOneTrueElt()
51344 return -1; in getOneTrueElt()
51359 int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); in getParamsForOneTrueMaskedElt()
51365 EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); in getParamsForOneTrueMaskedElt()
51367 Addr = MaskedOp->getBasePtr(); in getParamsForOneTrueMaskedElt()
51375 Alignment = commonAlignment(MaskedOp->getOriginalAlign(), in getParamsForOneTrueMaskedElt()
51380 /// If exactly one element of the mask is set for a non-extending masked load,
51382 /// Note: It is expected that the degenerate cases of an all-zeros or all-ones
51388 assert(ML->isUnindexed() && "Unexpected indexed masked load!"); in reduceMaskedLoadToScalarLoad()
51389 // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. in reduceMaskedLoadToScalarLoad()
51402 EVT VT = ML->getValueType(0); in reduceMaskedLoadToScalarLoad() local
51403 EVT EltVT = VT.getVectorElementType(); in reduceMaskedLoadToScalarLoad()
51405 EVT CastVT = VT; in reduceMaskedLoadToScalarLoad()
51408 CastVT = VT.changeVectorElementType(EltVT); in reduceMaskedLoadToScalarLoad()
51412 DAG.getLoad(EltVT, DL, ML->getChain(), Addr, in reduceMaskedLoadToScalarLoad()
51413 ML->getPointerInfo().getWithOffset(Offset), in reduceMaskedLoadToScalarLoad()
51414 Alignment, ML->getMemOperand()->getFlags()); in reduceMaskedLoadToScalarLoad()
51416 SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru()); in reduceMaskedLoadToScalarLoad()
51421 Insert = DAG.getBitcast(VT, Insert); in reduceMaskedLoadToScalarLoad()
51428 assert(ML->isUnindexed() && "Unexpected indexed masked load!"); in combineMaskedLoadConstantMask()
51429 if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) in combineMaskedLoadConstantMask()
51433 EVT VT = ML->getValueType(0); in combineMaskedLoadConstantMask() local
51438 unsigned NumElts = VT.getVectorNumElements(); in combineMaskedLoadConstantMask()
51439 BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask()); in combineMaskedLoadConstantMask()
51440 bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0)); in combineMaskedLoadConstantMask()
51441 bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1)); in combineMaskedLoadConstantMask()
51443 SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), in combineMaskedLoadConstantMask()
51444 ML->getMemOperand()); in combineMaskedLoadConstantMask()
51445 SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, in combineMaskedLoadConstantMask()
51446 ML->getPassThru()); in combineMaskedLoadConstantMask()
51452 // (for example, vblendvps -> vblendps). in combineMaskedLoadConstantMask()
51454 // Don't try this if the pass-through operand is already undefined. That would in combineMaskedLoadConstantMask()
51456 if (ML->getPassThru().isUndef()) in combineMaskedLoadConstantMask()
51459 if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode())) in combineMaskedLoadConstantMask()
51462 // The new masked load has an undef pass-through operand. The select uses the in combineMaskedLoadConstantMask()
51463 // original pass-through operand. in combineMaskedLoadConstantMask()
51465 VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), in combineMaskedLoadConstantMask()
51466 DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), in combineMaskedLoadConstantMask()
51467 ML->getAddressingMode(), ML->getExtensionType()); in combineMaskedLoadConstantMask()
51468 SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, in combineMaskedLoadConstantMask()
51469 ML->getPassThru()); in combineMaskedLoadConstantMask()
51480 if (Mld->isExpandingLoad()) in combineMaskedLoad()
51483 if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { in combineMaskedLoad()
51494 // If the mask value has been legalized to a non-boolean vector, try to in combineMaskedLoad()
51496 SDValue Mask = Mld->getMask(); in combineMaskedLoad()
51498 EVT VT = Mld->getValueType(0); in combineMaskedLoad() local
51500 APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); in combineMaskedLoad()
51502 if (N->getOpcode() != ISD::DELETED_NODE) in combineMaskedLoad()
51509 VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(), in combineMaskedLoad()
51510 NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(), in combineMaskedLoad()
51511 Mld->getAddressingMode(), Mld->getExtensionType()); in combineMaskedLoad()
51517 /// If exactly one element of the mask is set for a non-truncating masked store,
51519 /// Note: It is expected that the degenerate cases of an all-zeros or all-ones
51524 // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. in reduceMaskedStoreToScalarStore()
51536 SDValue Value = MS->getValue(); in reduceMaskedStoreToScalarStore()
51537 EVT VT = Value.getValueType(); in reduceMaskedStoreToScalarStore() local
51538 EVT EltVT = VT.getVectorElementType(); in reduceMaskedStoreToScalarStore()
51541 EVT CastVT = VT.changeVectorElementType(EltVT); in reduceMaskedStoreToScalarStore()
51548 return DAG.getStore(MS->getChain(), DL, Extract, Addr, in reduceMaskedStoreToScalarStore()
51549 MS->getPointerInfo().getWithOffset(Offset), in reduceMaskedStoreToScalarStore()
51550 Alignment, MS->getMemOperand()->getFlags()); in reduceMaskedStoreToScalarStore()
51557 if (Mst->isCompressingStore()) in combineMaskedStore()
51560 EVT VT = Mst->getValue().getValueType(); in combineMaskedStore() local
51564 if (Mst->isTruncatingStore()) in combineMaskedStore()
51570 // If the mask value has been legalized to a non-boolean vector, try to in combineMaskedStore()
51572 SDValue Mask = Mst->getMask(); in combineMaskedStore()
51574 APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); in combineMaskedStore()
51576 if (N->getOpcode() != ISD::DELETED_NODE) in combineMaskedStore()
51582 return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(), in combineMaskedStore()
51583 Mst->getBasePtr(), Mst->getOffset(), NewMask, in combineMaskedStore()
51584 Mst->getMemoryVT(), Mst->getMemOperand(), in combineMaskedStore()
51585 Mst->getAddressingMode()); in combineMaskedStore()
51588 SDValue Value = Mst->getValue(); in combineMaskedStore()
51589 if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && in combineMaskedStore()
51591 Mst->getMemoryVT())) { in combineMaskedStore()
51592 return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), in combineMaskedStore()
51593 Mst->getBasePtr(), Mst->getOffset(), Mask, in combineMaskedStore()
51594 Mst->getMemoryVT(), Mst->getMemOperand(), in combineMaskedStore()
51595 Mst->getAddressingMode(), true); in combineMaskedStore()
51605 EVT StVT = St->getMemoryVT(); in combineStore()
51607 SDValue StoredVal = St->getValue(); in combineStore()
51608 EVT VT = StoredVal.getValueType(); in combineStore() local
51612 if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() && in combineStore()
51613 VT.getVectorElementType() == MVT::i1) { in combineStore()
51615 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); in combineStore()
51618 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), in combineStore()
51619 St->getPointerInfo(), St->getOriginalAlign(), in combineStore()
51620 St->getMemOperand()->getFlags()); in combineStore()
51624 // This will avoid a copy to k-register. in combineStore()
51625 if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && in combineStore()
51631 return DAG.getStore(St->getChain(), dl, Val, in combineStore()
51632 St->getBasePtr(), St->getPointerInfo(), in combineStore()
51633 St->getOriginalAlign(), in combineStore()
51634 St->getMemOperand()->getFlags()); in combineStore()
51638 if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && in combineStore()
51640 unsigned NumConcats = 8 / VT.getVectorNumElements(); in combineStore()
51642 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); in combineStore()
51645 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), in combineStore()
51646 St->getPointerInfo(), St->getOriginalAlign(), in combineStore()
51647 St->getMemOperand()->getFlags()); in combineStore()
51651 if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || in combineStore()
51652 VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && in combineStore()
51654 // If its a v64i1 store without 64-bit support, we need two stores. in combineStore()
51655 if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) { in combineStore()
51657 StoredVal->ops().slice(0, 32)); in combineStore()
51660 StoredVal->ops().slice(32, 32)); in combineStore()
51663 SDValue Ptr0 = St->getBasePtr(); in combineStore()
51667 DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), in combineStore()
51668 St->getOriginalAlign(), in combineStore()
51669 St->getMemOperand()->getFlags()); in combineStore()
51671 DAG.getStore(St->getChain(), dl, Hi, Ptr1, in combineStore()
51672 St->getPointerInfo().getWithOffset(4), in combineStore()
51673 St->getOriginalAlign(), in combineStore()
51674 St->getMemOperand()->getFlags()); in combineStore()
51679 return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), in combineStore()
51680 St->getPointerInfo(), St->getOriginalAlign(), in combineStore()
51681 St->getMemOperand()->getFlags()); in combineStore()
51684 // If we are saving a 32-byte vector and 32-byte stores are slow, such as on in combineStore()
51685 // Sandy Bridge, perform two 16-byte stores. in combineStore()
51687 if (VT.is256BitVector() && StVT == VT && in combineStore()
51688 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, in combineStore()
51689 *St->getMemOperand(), &Fast) && in combineStore()
51691 unsigned NumElems = VT.getVectorNumElements(); in combineStore()
51698 // Split under-aligned vector non-temporal stores. in combineStore()
51699 if (St->isNonTemporal() && StVT == VT && in combineStore()
51700 St->getAlign().value() < VT.getStoreSize()) { in combineStore()
51701 // ZMM/YMM nt-stores - either it can be stored as a series of shorter in combineStore()
51703 if (VT.is256BitVector() || VT.is512BitVector()) { in combineStore()
51704 unsigned NumElems = VT.getVectorNumElements(); in combineStore()
51710 // XMM nt-stores - scalarize this to f64 nt-stores on SSE4A, else i32/i64 in combineStore()
51712 if (VT.is128BitVector() && Subtarget.hasSSE2()) { in combineStore()
51720 // Try to optimize v16i16->v16i8 truncating stores when BWI is not in combineStore()
51722 if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && in combineStore()
51723 St->getValue().getOpcode() == ISD::TRUNCATE && in combineStore()
51724 St->getValue().getOperand(0).getValueType() == MVT::v16i16 && in combineStore()
51726 St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { in combineStore()
51728 St->getValue().getOperand(0)); in combineStore()
51729 return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), in combineStore()
51730 MVT::v16i8, St->getMemOperand()); in combineStore()
51734 if (!St->isTruncatingStore() && in combineStore()
51738 TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { in combineStore()
51740 return EmitTruncSStore(IsSigned, St->getChain(), in combineStore()
51741 dl, StoredVal.getOperand(0), St->getBasePtr(), in combineStore()
51742 VT, St->getMemOperand(), DAG); in combineStore()
51746 if (!St->isTruncatingStore()) { in combineStore()
51766 if (NumTruncBits == VT.getSizeInBits() && in combineStore()
51768 return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(), in combineStore()
51769 TruncVT, St->getMemOperand()); in combineStore()
51778 if (St->isTruncatingStore() && VT.isVector()) { in combineStore()
51779 if (TLI.isTruncStoreLegal(VT, StVT)) { in combineStore()
51780 if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) in combineStore()
51781 return EmitTruncSStore(true /* Signed saturation */, St->getChain(), in combineStore()
51782 dl, Val, St->getBasePtr(), in combineStore()
51783 St->getMemoryVT(), St->getMemOperand(), DAG); in combineStore()
51784 if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), in combineStore()
51786 return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), in combineStore()
51787 dl, Val, St->getBasePtr(), in combineStore()
51788 St->getMemoryVT(), St->getMemOperand(), DAG); in combineStore()
51795 unsigned AddrSpace = St->getAddressSpace(); in combineStore()
51799 if (PtrVT != St->getBasePtr().getSimpleValueType()) { in combineStore()
51801 DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0); in combineStore()
51803 St->getChain(), dl, StoredVal, Cast, St->getPointerInfo(), StVT, in combineStore()
51804 St->getOriginalAlign(), St->getMemOperand()->getFlags(), in combineStore()
51805 St->getAAInfo()); in combineStore()
51809 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering in combineStore()
51814 // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. in combineStore()
51815 if (VT.getSizeInBits() != 64) in combineStore()
51826 if (VT == MVT::i64 && isa<LoadSDNode>(St->getValue()) && in combineStore()
51827 cast<LoadSDNode>(St->getValue())->isSimple() && in combineStore()
51828 St->getChain().hasOneUse() && St->isSimple()) { in combineStore()
51829 auto *Ld = cast<LoadSDNode>(St->getValue()); in combineStore()
51835 if (!Ld->hasNUsesOfValue(1, 0)) in combineStore()
51841 SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(), in combineStore()
51842 Ld->getBasePtr(), Ld->getMemOperand()); in combineStore()
51846 return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), in combineStore()
51847 St->getMemOperand()); in combineStore()
51850 // This is similar to the above case, but here we handle a scalar 64-bit in combineStore()
51851 // integer store that is extracted from a vector on a 32-bit target. in combineStore()
51852 // If we have SSE2, then we can treat it like a floating-point double in combineStore()
51856 if (VT == MVT::i64 && in combineStore()
51857 St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { in combineStore()
51858 SDValue OldExtract = St->getOperand(1); in combineStore()
51865 return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), in combineStore()
51866 St->getPointerInfo(), St->getOriginalAlign(), in combineStore()
51867 St->getMemOperand()->getFlags()); in combineStore()
51878 SDValue StoredVal = N->getOperand(1); in combineVEXTRACT_STORE()
51879 MVT VT = StoredVal.getSimpleValueType(); in combineVEXTRACT_STORE() local
51880 EVT MemVT = St->getMemoryVT(); in combineVEXTRACT_STORE()
51883 unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits(); in combineVEXTRACT_STORE()
51884 APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts); in combineVEXTRACT_STORE()
51888 if (N->getOpcode() != ISD::DELETED_NODE) in combineVEXTRACT_STORE()
51905 /// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
51907 /// A horizontal-op B, for some already available A and B, and if so then LHS is
51925 // which is A horizontal-op B. in isHorizontalBinOp()
51927 MVT VT = LHS.getSimpleValueType(); in isHorizontalBinOp() local
51928 assert((VT.is128BitVector() || VT.is256BitVector()) && in isHorizontalBinOp()
51930 unsigned NumElts = VT.getVectorNumElements(); in isHorizontalBinOp()
51967 // LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1> in isHorizontalBinOp()
51968 // NOTE: A default initialized SDValue represents an UNDEF of type VT. in isHorizontalBinOp()
52024 // AVX defines horizontal add/sub to operate independently on 128-bit lanes, in isHorizontalBinOp()
52025 // so we just repeat the inner loop if this is a 256-bit op. in isHorizontalBinOp()
52026 unsigned Num128BitChunks = VT.getSizeInBits() / 128; in isHorizontalBinOp()
52046 // Compute the post-shuffle mask index based on where the element in isHorizontalBinOp()
52050 ((Base % NumElts) & ~(NumEltsPer128BitChunk - 1)); in isHorizontalBinOp()
52052 // The low half of the 128-bit result must choose from A. in isHorizontalBinOp()
52053 // The high half of the 128-bit result must choose from B, in isHorizontalBinOp()
52069 // Avoid 128-bit multi lane shuffles if pre-AVX2 and FP (integer will split). in isHorizontalBinOp()
52070 if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && in isHorizontalBinOp()
52071 isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask)) in isHorizontalBinOp()
52077 return User->getOpcode() == HOpcode && User->getValueType(0) == VT; in isHorizontalBinOp()
52080 ForceHorizOp || (llvm::any_of(NewLHS->uses(), FoundHorizUser) && in isHorizontalBinOp()
52081 llvm::any_of(NewRHS->uses(), FoundHorizUser)); in isHorizontalBinOp()
52091 LHS = DAG.getBitcast(VT, NewLHS); in isHorizontalBinOp()
52092 RHS = DAG.getBitcast(VT, NewRHS); in isHorizontalBinOp()
52099 EVT VT = N->getValueType(0); in combineToHorizontalAddSub() local
52100 unsigned Opcode = N->getOpcode(); in combineToHorizontalAddSub()
52105 return N->hasOneUse() && in combineToHorizontalAddSub()
52106 N->use_begin()->getOpcode() == ISD::VECTOR_SHUFFLE && in combineToHorizontalAddSub()
52107 (N->use_begin()->getOperand(0).getOpcode() == HorizOpcode || in combineToHorizontalAddSub()
52108 N->use_begin()->getOperand(1).getOpcode() == HorizOpcode); in combineToHorizontalAddSub()
52114 if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || in combineToHorizontalAddSub()
52115 (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { in combineToHorizontalAddSub()
52116 SDValue LHS = N->getOperand(0); in combineToHorizontalAddSub()
52117 SDValue RHS = N->getOperand(1); in combineToHorizontalAddSub()
52121 SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); in combineToHorizontalAddSub()
52123 HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, in combineToHorizontalAddSub()
52124 DAG.getUNDEF(VT), PostShuffleMask); in combineToHorizontalAddSub()
52131 if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 || in combineToHorizontalAddSub()
52132 VT == MVT::v16i16 || VT == MVT::v8i32)) { in combineToHorizontalAddSub()
52133 SDValue LHS = N->getOperand(0); in combineToHorizontalAddSub()
52134 SDValue RHS = N->getOperand(1); in combineToHorizontalAddSub()
52142 SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, in combineToHorizontalAddSub()
52145 HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, in combineToHorizontalAddSub()
52146 DAG.getUNDEF(VT), PostShuffleMask); in combineToHorizontalAddSub()
52158 // <i32 -2147483648[float -0.000000e+00]> 0
52160 // <(load 4 from constant-pool)> t0, t29
52171 EVT VT = N->getValueType(0); in combineFMulcFCMulc() local
52172 SDValue LHS = N->getOperand(0); in combineFMulcFCMulc()
52173 SDValue RHS = N->getOperand(1); in combineFMulcFCMulc()
52175 N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC; in combineFMulcFCMulc()
52177 if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) { in combineFMulcFCMulc()
52179 if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) { in combineFMulcFCMulc()
52189 SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0)); in combineFMulcFCMulc()
52190 SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F); in combineFMulcFCMulc()
52191 r = DAG.getBitcast(VT, FCMulC); in combineFMulcFCMulc()
52228 if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() || in combineFaddCFmul()
52229 !AllowContract(N->getFlags())) in combineFaddCFmul()
52232 EVT VT = N->getValueType(0); in combineFaddCFmul() local
52233 if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16) in combineFaddCFmul()
52236 SDValue LHS = N->getOperand(0); in combineFaddCFmul()
52237 SDValue RHS = N->getOperand(1); in combineFaddCFmul()
52242 &HasNoSignedZero](SDValue N) -> bool { in combineFaddCFmul()
52247 if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) { in combineFaddCFmul()
52255 ((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) && in combineFaddCFmul()
52256 HasNoSignedZero(Op0->getFlags())) || in combineFaddCFmul()
52257 IsVectorAllNegativeZero(Op0->getOperand(2)))) { in combineFaddCFmul()
52274 MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2); in combineFaddCFmul()
52280 DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags()); in combineFaddCFmul()
52281 return DAG.getBitcast(VT, CFmul); in combineFaddCFmul()
52284 /// Do target-specific dag combines on floating-point adds/subs.
52298 EVT VT = N->getValueType(0); in combineLRINT_LLRINT() local
52299 SDValue Src = N->getOperand(0); in combineLRINT_LLRINT()
52303 if (!Subtarget.hasDQI() || !Subtarget.hasVLX() || VT != MVT::v2i64 || in combineLRINT_LLRINT()
52307 return DAG.getNode(X86ISD::CVTP2SI, DL, VT, in combineLRINT_LLRINT()
52312 /// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
52314 /// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
52320 assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); in combineTruncatedArithmetic()
52321 SDValue Src = N->getOperand(0); in combineTruncatedArithmetic()
52325 EVT VT = N->getValueType(0); in combineTruncatedArithmetic() local
52328 auto IsFreeTruncation = [VT](SDValue Op) { in combineTruncatedArithmetic()
52329 unsigned TruncSizeInBits = VT.getScalarSizeInBits(); in combineTruncatedArithmetic()
52348 SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); in combineTruncatedArithmetic()
52349 SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); in combineTruncatedArithmetic()
52350 return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); in combineTruncatedArithmetic()
52359 if (!VT.isVector()) in combineTruncatedArithmetic()
52362 // In most cases its only worth pre-truncating if we're only facing the cost in combineTruncatedArithmetic()
52367 // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its in combineTruncatedArithmetic()
52370 TLI.isOperationLegal(SrcOpcode, VT) && in combineTruncatedArithmetic()
52381 if (TLI.isOperationLegal(SrcOpcode, VT) && in combineTruncatedArithmetic()
52398 static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, in combinePMULH() argument
52408 // Only handle vXi16 types that are at least 128-bits unless they will be in combinePMULH()
52410 if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) in combinePMULH()
52427 // Count leading sign/zero bits on both inputs - if there are enough then in combinePMULH()
52428 // truncation back to vXi16 will be cheap - either as a pack/shuffle in combinePMULH()
52457 !(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) && in combinePMULH()
52463 return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res)); in combinePMULH()
52467 LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS); in combinePMULH()
52468 RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS); in combinePMULH()
52471 return DAG.getNode(Opc, DL, VT, LHS, RHS); in combinePMULH()
52476 // adjacent pairs of 16-bit products, and saturates the result before
52477 // truncating to 16-bits.
52482 static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, in detectPMADDUBSW() argument
52485 if (!VT.isVector() || !Subtarget.hasSSSE3()) in detectPMADDUBSW()
52488 unsigned NumElems = VT.getVectorNumElements(); in detectPMADDUBSW()
52489 EVT ScalarVT = VT.getVectorElementType(); in detectPMADDUBSW()
52493 SDValue SSatVal = detectSSatPattern(In, VT); in detectPMADDUBSW()
52570 unsigned IdxN00 = ConstN00Elt->getZExtValue(); in detectPMADDUBSW()
52571 unsigned IdxN01 = ConstN01Elt->getZExtValue(); in detectPMADDUBSW()
52572 unsigned IdxN10 = ConstN10Elt->getZExtValue(); in detectPMADDUBSW()
52573 unsigned IdxN11 = ConstN11Elt->getZExtValue(); in detectPMADDUBSW()
52620 return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, in detectPMADDUBSW()
52626 EVT VT = N->getValueType(0); in combineTruncate() local
52627 SDValue Src = N->getOperand(0); in combineTruncate()
52630 // Attempt to pre-truncate inputs to arithmetic ops instead. in combineTruncate()
52635 if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) in combineTruncate()
52639 if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) in combineTruncate()
52643 if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) in combineTruncate()
52648 if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { in combineTruncate()
52655 if (Src.getOpcode() == ISD::LRINT && VT.getScalarType() == MVT::i32 && in combineTruncate()
52657 return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0)); in combineTruncate()
52664 EVT VT = N->getValueType(0); in combineVTRUNC() local
52665 SDValue In = N->getOperand(0); in combineVTRUNC()
52668 if (SDValue SSatVal = detectSSatPattern(In, VT)) in combineVTRUNC()
52669 return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); in combineVTRUNC()
52670 if (SDValue USatVal = detectUSatPattern(In, VT, DAG, DL)) in combineVTRUNC()
52671 return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); in combineVTRUNC()
52674 APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits())); in combineVTRUNC()
52683 /// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
52691 if (N->getOpcode() == ISD::FNEG) in isFNEG()
52692 return N->getOperand(0); in isFNEG()
52698 unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); in isFNEG()
52701 EVT VT = Op->getValueType(0); in isFNEG() local
52704 if (VT.getScalarSizeInBits() != ScalarSize) in isFNEG()
52711 // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. in isFNEG()
52715 if (NegOp0.getValueType() == VT) // FIXME: Can we do better? in isFNEG()
52716 return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT), in isFNEG()
52717 cast<ShuffleVectorSDNode>(Op)->getMask()); in isFNEG()
52722 // -V, INDEX). in isFNEG()
52728 if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME in isFNEG()
52729 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, in isFNEG()
52757 // Only allow bitcast from correctly-sized constant. in isFNEG()
52773 // clang-format off in negateFMAOpcode()
52787 // clang-format on in negateFMAOpcode()
52793 // clang-format off in negateFMAOpcode()
52811 // clang-format on in negateFMAOpcode()
52818 // clang-format off in negateFMAOpcode()
52828 // clang-format on in negateFMAOpcode()
52835 /// Do target-specific dag combines on floating point negations.
52839 EVT OrigVT = N->getValueType(0); in combineFneg()
52845 EVT VT = Arg.getValueType(); in combineFneg() local
52846 EVT SVT = VT.getScalarType(); in combineFneg()
52850 if (!TLI.isTypeLegal(VT)) in combineFneg()
52854 // use of a constant by performing (-0 - A*B) instead. in combineFneg()
52857 Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { in combineFneg()
52858 SDValue Zero = DAG.getConstantFP(0.0, DL, VT); in combineFneg()
52859 SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), in combineFneg()
52884 EVT VT = Op.getValueType(); in getNegatedExpression() local
52885 EVT SVT = VT.getScalarType(); in getNegatedExpression()
52887 SDNodeFlags Flags = Op.getNode()->getFlags(); in getNegatedExpression()
52897 if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || in getNegatedExpression()
52899 !isOperationLegal(ISD::FMA, VT)) in getNegatedExpression()
52922 // Fill in the non-negated ops with the original values. in getNegatedExpression()
52926 return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); in getNegatedExpression()
52932 return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0); in getNegatedExpression()
52942 MVT VT = N->getSimpleValueType(0); in lowerX86FPLogicOp() local
52944 if (!VT.isVector() || !Subtarget.hasSSE2()) in lowerX86FPLogicOp()
52949 unsigned IntBits = VT.getScalarSizeInBits(); in lowerX86FPLogicOp()
52951 MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits); in lowerX86FPLogicOp()
52953 SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); in lowerX86FPLogicOp()
52954 SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); in lowerX86FPLogicOp()
52956 switch (N->getOpcode()) { in lowerX86FPLogicOp()
52957 // clang-format off in lowerX86FPLogicOp()
52963 // clang-format on in lowerX86FPLogicOp()
52966 return DAG.getBitcast(VT, IntOp); in lowerX86FPLogicOp()
52970 /// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val)
52972 if (N->getOpcode() != ISD::XOR) in foldXor1SetCC()
52975 SDValue LHS = N->getOperand(0); in foldXor1SetCC()
52976 if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC) in foldXor1SetCC()
52980 X86::CondCode(LHS->getConstantOperandVal(0))); in foldXor1SetCC()
52982 return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); in foldXor1SetCC()
52987 assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::SUB) && in combineXorSubCTLZ()
52992 EVT VT = N->getValueType(0); in combineXorSubCTLZ() local
52993 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 && in combineXorSubCTLZ()
52994 (VT != MVT::i64 || !Subtarget.is64Bit())) in combineXorSubCTLZ()
52997 SDValue N0 = N->getOperand(0); in combineXorSubCTLZ()
52998 SDValue N1 = N->getOperand(1); in combineXorSubCTLZ()
53010 } else if (N->getOpcode() == ISD::SUB) { in combineXorSubCTLZ()
53023 if (C->getZExtValue() != uint64_t(OpCTLZ.getValueSizeInBits() - 1)) in combineXorSubCTLZ()
53025 EVT OpVT = VT; in combineXorSubCTLZ()
53027 if (VT == MVT::i8) { in combineXorSubCTLZ()
53035 if (VT == MVT::i8) in combineXorSubCTLZ()
53044 SDValue N0 = N->getOperand(0); in combineXor()
53045 SDValue N1 = N->getOperand(1); in combineXor()
53046 EVT VT = N->getValueType(0); in combineXor() local
53050 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { in combineXor()
53087 // Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs. in combineXor()
53094 VT, DAG.getNOT(DL, N0.getOperand(0), N0.getOperand(0).getValueType())); in combineXor()
53098 // Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub)) in combineXor()
53099 if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() && in combineXor()
53100 VT.getVectorElementType() == MVT::i1 && in combineXor()
53104 ISD::INSERT_SUBVECTOR, DL, VT, N0.getOperand(0), in combineXor()
53109 // Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2)) in combineXor()
53110 // Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2)) in combineXor()
53113 N0.getOperand(0).getOpcode() == N->getOpcode()) { in combineXor()
53117 if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { in combineXor()
53118 SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); in combineXor()
53119 SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); in combineXor()
53120 return DAG.getNode(ISD::XOR, DL, VT, LHS, in combineXor()
53121 DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); in combineXor()
53134 SDValue N0 = N->getOperand(0); in combineBITREVERSE()
53135 EVT VT = N->getValueType(0); in combineBITREVERSE() local
53137 // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X))) in combineBITREVERSE()
53138 if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { in combineBITREVERSE()
53148 ReverseMask[I] = (NumElts - 1) - I; in combineBITREVERSE()
53151 return DAG.getBitcast(VT, Rev); in combineBITREVERSE()
53162 unsigned Opcode = N->getOpcode(); in combineAVG()
53163 SDValue N0 = N->getOperand(0); in combineAVG()
53164 SDValue N1 = N->getOperand(1); in combineAVG()
53165 EVT VT = N->getValueType(0); in combineAVG() local
53166 EVT SVT = VT.getScalarType(); in combineAVG()
53169 // avgceils(x,y) -> flipsign(avgceilu(flipsign(x),flipsign(y))) in combineAVG()
53171 if (Opcode == ISD::AVGCEILS && VT.isVector() && SVT == MVT::i8) { in combineAVG()
53172 APInt SignBit = APInt::getSignMask(VT.getScalarSizeInBits()); in combineAVG()
53173 SDValue SignMask = DAG.getConstant(SignBit, DL, VT); in combineAVG()
53174 N0 = DAG.getNode(ISD::XOR, DL, VT, N0, SignMask); in combineAVG()
53175 N1 = DAG.getNode(ISD::XOR, DL, VT, N1, SignMask); in combineAVG()
53176 return DAG.getNode(ISD::XOR, DL, VT, in combineAVG()
53177 DAG.getNode(ISD::AVGCEILU, DL, VT, N0, N1), SignMask); in combineAVG()
53186 EVT VT = N->getValueType(0); in combineBEXTR() local
53187 unsigned NumBits = VT.getSizeInBits(); in combineBEXTR()
53189 // TODO - Constant Folding. in combineBEXTR()
53208 /// to be used as a replacement operand with operations (eg, bitwise-and) where
53223 SDValue N0 = N->getOperand(0); in combineFAndFNotToFAndn()
53224 SDValue N1 = N->getOperand(1); in combineFAndFNotToFAndn()
53225 EVT VT = N->getValueType(0); in combineFAndFNotToFAndn() local
53229 if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || in combineFAndFNotToFAndn()
53230 (VT == MVT::f64 && Subtarget.hasSSE2()) || in combineFAndFNotToFAndn()
53231 (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) in combineFAndFNotToFAndn()
53238 return C && C->getConstantFPValue()->isAllOnesValue(); in combineFAndFNotToFAndn()
53241 // fand (fxor X, -1), Y --> fandn X, Y in combineFAndFNotToFAndn()
53243 return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1); in combineFAndFNotToFAndn()
53245 // fand X, (fxor Y, -1) --> fandn Y, X in combineFAndFNotToFAndn()
53247 return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0); in combineFAndFNotToFAndn()
53252 /// Do target-specific dag combines on X86ISD::FAND nodes.
53255 // FAND(0.0, x) -> 0.0 in combineFAnd()
53256 if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget)) in combineFAnd()
53259 // FAND(x, 0.0) -> 0.0 in combineFAnd()
53260 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) in combineFAnd()
53269 /// Do target-specific dag combines on X86ISD::FANDN nodes.
53272 // FANDN(0.0, x) -> x in combineFAndn()
53273 if (isNullFPScalarOrVectorConst(N->getOperand(0))) in combineFAndn()
53274 return N->getOperand(1); in combineFAndn()
53276 // FANDN(x, 0.0) -> 0.0 in combineFAndn()
53277 if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) in combineFAndn()
53283 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
53287 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); in combineFOr()
53289 // F[X]OR(0.0, x) -> x in combineFOr()
53290 if (isNullFPScalarOrVectorConst(N->getOperand(0))) in combineFOr()
53291 return N->getOperand(1); in combineFOr()
53293 // F[X]OR(x, 0.0) -> x in combineFOr()
53294 if (isNullFPScalarOrVectorConst(N->getOperand(1))) in combineFOr()
53295 return N->getOperand(0); in combineFOr()
53303 /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
53305 assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); in combineFMinFMax()
53312 // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes in combineFMinFMax()
53315 switch (N->getOpcode()) { in combineFMinFMax()
53321 return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), in combineFMinFMax()
53322 N->getOperand(0), N->getOperand(1)); in combineFMinFMax()
53327 EVT VT = N->getValueType(0); in combineFMinNumFMaxNum() local
53328 if (Subtarget.useSoftFloat() || isSoftF16(VT, Subtarget)) in combineFMinNumFMaxNum()
53333 if (!((Subtarget.hasSSE1() && VT == MVT::f32) || in combineFMinNumFMaxNum()
53334 (Subtarget.hasSSE2() && VT == MVT::f64) || in combineFMinNumFMaxNum()
53335 (Subtarget.hasFP16() && VT == MVT::f16) || in combineFMinNumFMaxNum()
53336 (VT.isVector() && TLI.isTypeLegal(VT)))) in combineFMinNumFMaxNum()
53339 SDValue Op0 = N->getOperand(0); in combineFMinNumFMaxNum()
53340 SDValue Op1 = N->getOperand(1); in combineFMinNumFMaxNum()
53342 auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; in combineFMinNumFMaxNum()
53346 if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) in combineFMinNumFMaxNum()
53347 return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); in combineFMinNumFMaxNum()
53349 // If one of the operands is known non-NaN use the native min/max instructions in combineFMinNumFMaxNum()
53350 // with the non-NaN input as second operand. in combineFMinNumFMaxNum()
53352 return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); in combineFMinNumFMaxNum()
53354 return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); in combineFMinNumFMaxNum()
53358 if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) in combineFMinNumFMaxNum()
53362 VT); in combineFMinNumFMaxNum()
53368 // ---------------- in combineFMinNumFMaxNum()
53370 // Op0 ---------------- in combineFMinNumFMaxNum()
53372 // ---------------- in combineFMinNumFMaxNum()
53383 SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); in combineFMinNumFMaxNum()
53388 return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); in combineFMinNumFMaxNum()
53393 EVT VT = N->getValueType(0); in combineX86INT_TO_FP() local
53396 APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); in combineX86INT_TO_FP()
53401 SDValue In = N->getOperand(0); in combineX86INT_TO_FP()
53403 if (VT.getVectorNumElements() < InVT.getVectorNumElements() && in combineX86INT_TO_FP()
53405 assert(InVT.is128BitVector() && "Expected 128-bit input vector"); in combineX86INT_TO_FP()
53406 LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0)); in combineX86INT_TO_FP()
53407 unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); in combineX86INT_TO_FP()
53412 SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, in combineX86INT_TO_FP()
53426 bool IsStrict = N->isTargetStrictFPOpcode(); in combineCVTP2I_CVTTP2I()
53427 EVT VT = N->getValueType(0); in combineCVTP2I_CVTTP2I() local
53430 SDValue In = N->getOperand(IsStrict ? 1 : 0); in combineCVTP2I_CVTTP2I()
53432 if (VT.getVectorNumElements() < InVT.getVectorNumElements() && in combineCVTP2I_CVTTP2I()
53434 assert(InVT.is128BitVector() && "Expected 128-bit input vector"); in combineCVTP2I_CVTTP2I()
53436 unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); in combineCVTP2I_CVTTP2I()
53443 DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other}, in combineCVTP2I_CVTTP2I()
53444 {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)}); in combineCVTP2I_CVTTP2I()
53448 DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); in combineCVTP2I_CVTTP2I()
53460 /// Do target-specific dag combines on X86ISD::ANDNP nodes.
53464 SDValue N0 = N->getOperand(0); in combineAndnp()
53465 SDValue N1 = N->getOperand(1); in combineAndnp()
53466 MVT VT = N->getSimpleValueType(0); in combineAndnp() local
53467 int NumElts = VT.getVectorNumElements(); in combineAndnp()
53468 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in combineAndnp()
53471 // ANDNP(undef, x) -> 0 in combineAndnp()
53472 // ANDNP(x, undef) -> 0 in combineAndnp()
53474 return DAG.getConstant(0, DL, VT); in combineAndnp()
53476 // ANDNP(0, x) -> x in combineAndnp()
53480 // ANDNP(x, 0) -> 0 in combineAndnp()
53482 return DAG.getConstant(0, DL, VT); in combineAndnp()
53484 // ANDNP(x, -1) -> NOT(x) -> XOR(x, -1) in combineAndnp()
53486 return DAG.getNOT(DL, N0, VT); in combineAndnp()
53490 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1); in combineAndnp()
53493 // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)). in combineAndnp()
53494 if (N1->hasOneUse()) in combineAndnp()
53497 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT); in combineAndnp()
53511 return getConstVector(ResultBits, VT, DAG, DL); in combineAndnp()
53517 if (N0->hasOneUse()) { in combineAndnp()
53522 SDValue Not = getConstVector(EltBits0, VT, DAG, DL); in combineAndnp()
53523 return DAG.getNode(ISD::AND, DL, VT, Not, N1); in combineAndnp()
53529 if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { in combineAndnp()
53547 // We can't assume an undef src element gives an undef dst - the in combineAndnp()
53570 if (N->getOpcode() != ISD::DELETED_NODE) in combineAndnp()
53581 SDValue N1 = N->getOperand(1); in combineBT()
53587 if (N->getOpcode() != ISD::DELETED_NODE) in combineBT()
53597 bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS; in combineCVTPH2PS()
53598 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in combineCVTPH2PS()
53600 if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) { in combineCVTPH2PS()
53604 if (N->getOpcode() != ISD::DELETED_NODE) in combineCVTPH2PS()
53611 LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0)); in combineCVTPH2PS()
53616 N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, in combineCVTPH2PS()
53617 {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)}); in combineCVTPH2PS()
53620 SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32, in combineCVTPH2PS()
53637 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); in combineSextInRegCmov()
53639 EVT DstVT = N->getValueType(0); in combineSextInRegCmov()
53641 SDValue N0 = N->getOperand(0); in combineSextInRegCmov()
53642 SDValue N1 = N->getOperand(1); in combineSextInRegCmov()
53643 EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); in combineSextInRegCmov()
53699 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); in combineSignExtendInReg()
53704 EVT VT = N->getValueType(0); in combineSignExtendInReg() local
53705 SDValue N0 = N->getOperand(0); in combineSignExtendInReg()
53706 SDValue N1 = N->getOperand(1); in combineSignExtendInReg()
53707 EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); in combineSignExtendInReg()
53711 // both SSE and AVX2 since there is no sign-extended shift right in combineSignExtendInReg()
53712 // operation on a vector with 64-bit elements. in combineSignExtendInReg()
53713 //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> in combineSignExtendInReg()
53715 if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || in combineSignExtendInReg()
53728 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1); in combineSignExtendInReg()
53739 /// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
53740 /// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
53746 if (Ext->getOpcode() != ISD::SIGN_EXTEND && in promoteExtBeforeAdd()
53747 Ext->getOpcode() != ISD::ZERO_EXTEND) in promoteExtBeforeAdd()
53751 EVT VT = Ext->getValueType(0); in promoteExtBeforeAdd() local
53752 if (VT != MVT::i64) in promoteExtBeforeAdd()
53755 SDValue Add = Ext->getOperand(0); in promoteExtBeforeAdd()
53761 bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; in promoteExtBeforeAdd()
53762 bool NSW = Add->getFlags().hasNoSignedWrap(); in promoteExtBeforeAdd()
53763 bool NUW = Add->getFlags().hasNoUnsignedWrap(); in promoteExtBeforeAdd()
53785 for (auto *User : Ext->uses()) { in promoteExtBeforeAdd()
53786 if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { in promoteExtBeforeAdd()
53795 int64_t AddC = Sext ? AddOp1C->getSExtValue() : AddOp1C->getZExtValue(); in promoteExtBeforeAdd()
53796 SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0); in promoteExtBeforeAdd()
53797 SDValue NewConstant = DAG.getConstant(AddC, SDLoc(Add), VT); in promoteExtBeforeAdd()
53800 // sign-extended. in promoteExtBeforeAdd()
53804 return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); in promoteExtBeforeAdd()
53808 // operands and the result of CMOV is not used anywhere else - promote CMOV
53811 // (or more) pseudo-CMOVs only when they go one-after-another and
53815 // 3) 16-bit CMOV encoding is 4 bytes, 32-bit CMOV is 3-byte, so this
53816 // promotion is also good in terms of code-size.
53817 // (64-bit CMOV is 4-bytes, that's why we don't do 32-bit => 64-bit
53820 SDValue CMovN = Extend->getOperand(0); in combineToExtendCMOV()
53824 EVT TargetVT = Extend->getValueType(0); in combineToExtendCMOV()
53825 unsigned ExtendOpcode = Extend->getOpcode(); in combineToExtendCMOV()
53828 EVT VT = CMovN.getValueType(); in combineToExtendCMOV() local
53842 if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32)) in combineToExtendCMOV()
53868 SDValue N0 = N->getOperand(0); in combineExtSetcc()
53869 EVT VT = N->getValueType(0); in combineExtSetcc() local
53873 if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC) in combineExtSetcc()
53877 EVT SVT = VT.getVectorElementType(); in combineExtSetcc()
53886 unsigned Size = VT.getSizeInBits(); in combineExtSetcc()
53892 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); in combineExtSetcc()
53902 SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); in combineExtSetcc()
53904 if (N->getOpcode() == ISD::ZERO_EXTEND) in combineExtSetcc()
53913 SDValue N0 = N->getOperand(0); in combineSext()
53914 EVT VT = N->getValueType(0); in combineSext() local
53917 // (i32 (sext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) in combineSext()
53920 SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0), in combineSext()
53921 N0->getOperand(1)); in combineSext()
53943 if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), DL, VT, N0, in combineSext()
53947 if (VT.isVector()) { in combineSext()
53952 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); in combineSext()
53974 return Use->getOpcode() != ISD::FMA && Use->getOpcode() != ISD::STRICT_FMA; in getInvertedVectorForFMA()
53976 if (llvm::any_of(V->uses(), IsNotFMA)) in getInvertedVectorForFMA()
53980 EVT VT = V.getValueType(); in getInvertedVectorForFMA() local
53981 EVT EltVT = VT.getVectorElementType(); in getInvertedVectorForFMA()
53982 for (const SDValue &Op : V->op_values()) { in getInvertedVectorForFMA()
53984 Ops.push_back(DAG.getConstantFP(-Cst->getValueAPF(), SDLoc(Op), EltVT)); in getInvertedVectorForFMA()
53991 SDNode *NV = DAG.getNodeIfExists(ISD::BUILD_VECTOR, DAG.getVTList(VT), Ops); in getInvertedVectorForFMA()
53997 if (llvm::any_of(NV->uses(), IsNotFMA)) in getInvertedVectorForFMA()
54004 for (const SDValue &Op : V->op_values()) { in getInvertedVectorForFMA()
54006 if (Cst->isNegative()) in getInvertedVectorForFMA()
54018 EVT VT = N->getValueType(0); in combineFMA() local
54019 bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode(); in combineFMA()
54023 if (!TLI.isTypeLegal(VT)) in combineFMA()
54026 SDValue A = N->getOperand(IsStrict ? 1 : 0); in combineFMA()
54027 SDValue B = N->getOperand(IsStrict ? 2 : 1); in combineFMA()
54028 SDValue C = N->getOperand(IsStrict ? 3 : 2); in combineFMA()
54030 // If the operation allows fast-math and the target does not support FMA, in combineFMA()
54032 SDNodeFlags Flags = N->getFlags(); in combineFMA()
54034 TLI.isOperationExpand(ISD::FMA, VT)) { in combineFMA()
54035 SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); in combineFMA()
54036 return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); in combineFMA()
54039 EVT ScalarVT = VT.getScalarType(); in combineFMA()
54085 negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); in combineFMA()
54087 // Propagate fast-math-flags to new FMA node. in combineFMA()
54090 assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4"); in combineFMA()
54091 return DAG.getNode(NewOpcode, dl, {VT, MVT::Other}, in combineFMA()
54092 {N->getOperand(0), A, B, C}); in combineFMA()
54094 if (N->getNumOperands() == 4) in combineFMA()
54095 return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); in combineFMA()
54096 return DAG.getNode(NewOpcode, dl, VT, A, B, C); in combineFMA()
54100 // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
54101 // Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C)
54105 EVT VT = N->getValueType(0); in combineFMADDSUB() local
54110 SDValue N2 = N->getOperand(2); in combineFMADDSUB()
54116 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); in combineFMADDSUB()
54118 if (N->getNumOperands() == 4) in combineFMADDSUB()
54119 return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), in combineFMADDSUB()
54120 NegN2, N->getOperand(3)); in combineFMADDSUB()
54121 return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), in combineFMADDSUB()
54129 SDValue N0 = N->getOperand(0); in combineZext()
54130 EVT VT = N->getValueType(0); in combineZext() local
54132 // (i32 (aext (i8 (x86isd::setcc_carry)))) -> (i32 (x86isd::setcc_carry)) in combineZext()
54134 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND && in combineZext()
54136 SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0), in combineZext()
54137 N0->getOperand(1)); in combineZext()
54157 if (SDValue V = combineToExtendBoolVectorInReg(N->getOpcode(), dl, VT, N0, in combineZext()
54161 if (VT.isVector()) in combineZext()
54173 VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { in combineZext()
54188 /// pre-promote its result type since vXi1 vectors don't get promoted
54190 static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS, in truncateAVX512SetCCNoBWI() argument
54194 if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && in truncateAVX512SetCCNoBWI()
54195 VT.getVectorElementType() == MVT::i1 && in truncateAVX512SetCCNoBWI()
54199 return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); in truncateAVX512SetCCNoBWI()
54207 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); in combineSetCC()
54208 const SDValue LHS = N->getOperand(0); in combineSetCC()
54209 const SDValue RHS = N->getOperand(1); in combineSetCC()
54210 EVT VT = N->getValueType(0); in combineSetCC() local
54215 if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG, in combineSetCC()
54219 if (VT == MVT::i1) { in combineSetCC()
54223 return DAG.getNode(ISD::TRUNCATE, DL, VT, getSETCC(X86CC, V, DL, DAG)); in combineSetCC()
54227 // cmpeq(or(X,Y),X) --> cmpeq(and(~X,Y),0) in combineSetCC()
54228 // cmpne(or(X,Y),X) --> cmpne(and(~X,Y),0) in combineSetCC()
54230 if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) { in combineSetCC()
54241 return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); in combineSetCC()
54243 return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); in combineSetCC()
54245 // cmpeq(and(X,Y),Y) --> cmpeq(and(~X,Y),0) in combineSetCC()
54246 // cmpne(and(X,Y),Y) --> cmpne(and(~X,Y),0) in combineSetCC()
54248 if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { in combineSetCC()
54259 return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); in combineSetCC()
54261 return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); in combineSetCC()
54263 // cmpeq(trunc(x),C) --> cmpeq(x,C) in combineSetCC()
54264 // cmpne(trunc(x),C) --> cmpne(x,C) in combineSetCC()
54275 return DAG.getSetCC(DL, VT, LHS.getOperand(0), in combineSetCC()
54280 // icmp eq Abs(X) C -> in combineSetCC()
54281 // (icmp eq A, C) | (icmp eq A, -C) in combineSetCC()
54282 // icmp ne Abs(X) C -> in combineSetCC()
54283 // (icmp ne A, C) & (icmp ne A, -C) in combineSetCC()
54289 const APInt &CInt = C->getAPIntValue(); in combineSetCC()
54293 SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC); in combineSetCC()
54295 DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC); in combineSetCC()
54296 return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT, in combineSetCC()
54304 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && in combineSetCC()
54323 assert(VT == Op0.getOperand(0).getValueType() && in combineSetCC()
54326 return DAG.getConstant(0, DL, VT); in combineSetCC()
54328 return DAG.getConstant(1, DL, VT); in combineSetCC()
54330 return DAG.getNOT(DL, Op0.getOperand(0), VT); in combineSetCC()
54342 if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) { in combineSetCC()
54399 if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut, in combineSetCC()
54402 return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC); in combineSetCC()
54408 truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget)) in combineSetCC()
54413 // -> `(icmp ult (add x, -C), 2)` in combineSetCC()
54417 // in worse codegen. So, undo the middle-end transform and go back to `(or in combineSetCC()
54425 if (VT.isVector() && OpVT.isVector() && OpVT.isInteger() && in combineSetCC()
54440 // If we had `(add x, -1)` and can lower with `umin`, don't transform as in combineSetCC()
54457 else if ((CC == ISD::SETUGT && (-CmpC) == 3) || in combineSetCC()
54458 (CC == ISD::SETUGE && (-CmpC) == 2)) { in combineSetCC()
54466 DAG.getSetCC(DL, VT, LHS.getOperand(0), C0, ISD::SETEQ); in combineSetCC()
54468 DAG.getSetCC(DL, VT, LHS.getOperand(0), C1, ISD::SETEQ); in combineSetCC()
54469 return DAG.getNode(ISD::OR, DL, VT, NewLHS, NewRHS); in combineSetCC()
54474 // For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early in combineSetCC()
54476 if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && in combineSetCC()
54480 // X pred 0.0 --> X pred -X in combineSetCC()
54487 return DAG.getSetCC(DL, VT, LHS, SDValue(FNeg, 0), CC); in combineSetCC()
54496 SDValue Src = N->getOperand(0); in combineMOVMSK()
54498 MVT VT = N->getSimpleValueType(0); in combineMOVMSK() local
54499 unsigned NumBits = VT.getScalarSizeInBits(); in combineMOVMSK()
54502 assert(VT == MVT::i32 && NumElts <= NumBits && "Unexpected MOVMSK types"); in combineMOVMSK()
54515 return DAG.getConstant(Imm, SDLoc(N), VT); in combineMOVMSK()
54518 // Look through int->fp bitcasts that don't change the element width. in combineMOVMSK()
54522 return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); in combineMOVMSK()
54524 // Fold movmsk(not(x)) -> not(movmsk(x)) to improve folding of movmsk results in combineMOVMSK()
54530 return DAG.getNode(ISD::XOR, DL, VT, in combineMOVMSK()
54531 DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc), in combineMOVMSK()
54532 DAG.getConstant(NotMask, DL, VT)); in combineMOVMSK()
54535 // Fold movmsk(icmp_sgt(x,-1)) -> not(movmsk(x)) to improve folding of movmsk in combineMOVMSK()
54541 return DAG.getNode(ISD::XOR, DL, VT, in combineMOVMSK()
54542 DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)), in combineMOVMSK()
54543 DAG.getConstant(NotMask, DL, VT)); in combineMOVMSK()
54546 // Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2)) in combineMOVMSK()
54547 // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) in combineMOVMSK()
54549 // Use KnownBits to determine if only a single bit is non-zero in combineMOVMSK()
54563 // vXi8 shifts - we only care about the signbit so can use PSLLW. in combineMOVMSK()
54575 return DAG.getNode(X86ISD::MOVMSK, DL, VT, DAG.getNOT(DL, Res, SrcVT)); in combineMOVMSK()
54579 // Fold movmsk(logic(X,C)) -> logic(movmsk(X),C) in combineMOVMSK()
54580 if (N->isOnlyUserOf(Src.getNode())) { in combineMOVMSK()
54594 SDValue NewMovMsk = DAG.getNode(X86ISD::MOVMSK, DL, VT, NewSrc); in combineMOVMSK()
54595 return DAG.getNode(SrcBC.getOpcode(), DL, VT, NewMovMsk, in combineMOVMSK()
54596 DAG.getConstant(Mask, DL, VT)); in combineMOVMSK()
54613 MVT VT = N->getSimpleValueType(0); in combineTESTP() local
54614 unsigned NumBits = VT.getScalarSizeInBits(); in combineTESTP()
54628 SDValue Mask = MemOp->getMask(); in combineX86GatherScatter()
54635 if (N->getOpcode() != ISD::DELETED_NODE) in combineX86GatherScatter()
54650 SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(), in rebuildGatherScatter()
54651 Gather->getMask(), Base, Index, Scale } ; in rebuildGatherScatter()
54652 return DAG.getMaskedGather(Gather->getVTList(), in rebuildGatherScatter()
54653 Gather->getMemoryVT(), DL, Ops, in rebuildGatherScatter()
54654 Gather->getMemOperand(), in rebuildGatherScatter()
54655 Gather->getIndexType(), in rebuildGatherScatter()
54656 Gather->getExtensionType()); in rebuildGatherScatter()
54659 SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(), in rebuildGatherScatter()
54660 Scatter->getMask(), Base, Index, Scale }; in rebuildGatherScatter()
54661 return DAG.getMaskedScatter(Scatter->getVTList(), in rebuildGatherScatter()
54662 Scatter->getMemoryVT(), DL, in rebuildGatherScatter()
54663 Ops, Scatter->getMemOperand(), in rebuildGatherScatter()
54664 Scatter->getIndexType(), in rebuildGatherScatter()
54665 Scatter->isTruncatingStore()); in rebuildGatherScatter()
54672 SDValue Index = GorS->getIndex(); in combineGatherScatter()
54673 SDValue Base = GorS->getBasePtr(); in combineGatherScatter()
54674 SDValue Scale = GorS->getScale(); in combineGatherScatter()
54680 // Shrink constant indices if they are larger than 32-bits. in combineGatherScatter()
54688 if (BV->isConstant() && IndexWidth > 32 && in combineGatherScatter()
54689 DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { in combineGatherScatter()
54703 DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { in combineGatherScatter()
54718 uint64_t ScaleAmt = Scale->getAsZExtVal(); in combineGatherScatter()
54721 if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) { in combineGatherScatter()
54722 // FIXME: Allow non-constant? in combineGatherScatter()
54725 APInt Adder = C->getAPIntValue() * ScaleAmt; in combineGatherScatter()
54736 if (BV->isConstant() && isa<ConstantSDNode>(Base) && in combineGatherScatter()
54764 SDValue Mask = GorS->getMask(); in combineGatherScatter()
54768 if (N->getOpcode() != ISD::DELETED_NODE) in combineGatherScatter()
54781 X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); in combineX86SetCC()
54782 SDValue EFLAGS = N->getOperand(1); in combineX86SetCC()
54795 SDValue EFLAGS = N->getOperand(3); in combineBrCond()
54796 X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); in combineBrCond()
54803 return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), in combineBrCond()
54804 N->getOperand(1), Cond, Flags); in combineBrCond()
54813 // Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane in combineVectorCompareAndMaskUnaryOp()
54817 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> in combineVectorCompareAndMaskUnaryOp()
54824 EVT VT = N->getValueType(0); in combineVectorCompareAndMaskUnaryOp() local
54825 bool IsStrict = N->isStrictFPOpcode(); in combineVectorCompareAndMaskUnaryOp()
54826 unsigned NumEltBits = VT.getScalarSizeInBits(); in combineVectorCompareAndMaskUnaryOp()
54827 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); in combineVectorCompareAndMaskUnaryOp()
54828 if (!VT.isVector() || Op0.getOpcode() != ISD::AND || in combineVectorCompareAndMaskUnaryOp()
54830 VT.getSizeInBits() != Op0.getValueSizeInBits()) in combineVectorCompareAndMaskUnaryOp()
54834 // make the transformation for non-constant splats as well, but it's unclear in combineVectorCompareAndMaskUnaryOp()
54839 if (!BV->isConstant()) in combineVectorCompareAndMaskUnaryOp()
54844 EVT IntVT = BV->getValueType(0); in combineVectorCompareAndMaskUnaryOp()
54849 SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other}, in combineVectorCompareAndMaskUnaryOp()
54850 {N->getOperand(0), SDValue(BV, 0)}); in combineVectorCompareAndMaskUnaryOp()
54852 SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); in combineVectorCompareAndMaskUnaryOp()
54855 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), in combineVectorCompareAndMaskUnaryOp()
54857 SDValue Res = DAG.getBitcast(VT, NewAnd); in combineVectorCompareAndMaskUnaryOp()
54866 /// If we are converting a value to floating-point, try to replace scalar
54873 SDValue Trunc = N->getOperand(0); in combineToFPTruncExtElt()
54889 // inttofp (trunc (extelt X, 0)) --> inttofp (extelt (bitcast X), 0) in combineToFPTruncExtElt()
54898 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt); in combineToFPTruncExtElt()
54903 bool IsStrict = N->isStrictFPOpcode(); in combineUIntToFP()
54904 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); in combineUIntToFP()
54905 EVT VT = N->getValueType(0); in combineUIntToFP() local
54911 // UINT_TO_FP(vXi1~15) -> SINT_TO_FP(ZEXT(vXi1~15 to vXi16)) in combineUIntToFP()
54912 // UINT_TO_FP(vXi17~31) -> SINT_TO_FP(ZEXT(vXi17~31 to vXi32)) in combineUIntToFP()
54914 // UINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32)) in combineUIntToFP()
54915 // UINT_TO_FP(vXi33~63) -> SINT_TO_FP(ZEXT(vXi33~63 to vXi64)) in combineUIntToFP()
54916 if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) { in combineUIntToFP()
54930 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in combineUIntToFP()
54931 {N->getOperand(0), P}); in combineUIntToFP()
54932 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); in combineUIntToFP()
54935 // UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32)) in combineUIntToFP()
54936 // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32)) in combineUIntToFP()
54937 // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32)) in combineUIntToFP()
54939 VT.getScalarType() != MVT::f16) { in combineUIntToFP()
54946 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in combineUIntToFP()
54947 {N->getOperand(0), P}); in combineUIntToFP()
54948 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); in combineUIntToFP()
54954 SDNodeFlags Flags = N->getFlags(); in combineUIntToFP()
54957 return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, in combineUIntToFP()
54958 {N->getOperand(0), Op0}); in combineUIntToFP()
54959 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); in combineUIntToFP()
54970 bool IsStrict = N->isStrictFPOpcode(); in combineSIntToFP()
54975 SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); in combineSIntToFP()
54976 EVT VT = N->getValueType(0); in combineSIntToFP() local
54982 // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16)) in combineSIntToFP()
54983 // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32)) in combineSIntToFP()
54985 // SINT_TO_FP(vXi1~31) -> SINT_TO_FP(ZEXT(vXi1~31 to vXi32)) in combineSIntToFP()
54986 // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64)) in combineSIntToFP()
54987 if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) { in combineSIntToFP()
55001 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in combineSIntToFP()
55002 {N->getOperand(0), P}); in combineSIntToFP()
55003 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); in combineSIntToFP()
55006 // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32)) in combineSIntToFP()
55007 // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) in combineSIntToFP()
55008 // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) in combineSIntToFP()
55010 VT.getScalarType() != MVT::f16) { in combineSIntToFP()
55015 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in combineSIntToFP()
55016 {N->getOperand(0), P}); in combineSIntToFP()
55017 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); in combineSIntToFP()
55026 if (NumSignBits >= (BitWidth - 31)) { in combineSIntToFP()
55034 return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, in combineSIntToFP()
55035 {N->getOperand(0), Trunc}); in combineSIntToFP()
55036 return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); in combineSIntToFP()
55040 assert(InVT == MVT::v2i64 && "Unexpected VT!"); in combineSIntToFP()
55043 { 0, 2, -1, -1 }); in combineSIntToFP()
55045 return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, in combineSIntToFP()
55046 {N->getOperand(0), Shuf}); in combineSIntToFP()
55047 return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); in combineSIntToFP()
55052 // a 32-bit target where SSE doesn't support i64->FP operations. in combineSIntToFP()
55058 if (VT == MVT::f16 || VT == MVT::f128) in combineSIntToFP()
55062 // the VT is f80. in combineSIntToFP()
55063 if (Subtarget.hasDQI() && VT != MVT::f80) in combineSIntToFP()
55066 if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) && in combineSIntToFP()
55069 Subtarget.getTargetLowering()->BuildFILD( in combineSIntToFP()
55070 VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), in combineSIntToFP()
55071 Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG); in combineSIntToFP()
55087 assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); in needCarryOrOverflowFlag()
55089 for (const SDNode *User : Flags->uses()) { in needCarryOrOverflowFlag()
55091 switch (User->getOpcode()) { in needCarryOrOverflowFlag()
55097 CC = (X86::CondCode)User->getConstantOperandVal(0); in needCarryOrOverflowFlag()
55101 CC = (X86::CondCode)User->getConstantOperandVal(2); in needCarryOrOverflowFlag()
55106 // clang-format off in needCarryOrOverflowFlag()
55114 // clang-format on in needCarryOrOverflowFlag()
55122 assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); in onlyZeroFlagUsed()
55124 for (const SDNode *User : Flags->uses()) { in onlyZeroFlagUsed()
55126 switch (User->getOpcode()) { in onlyZeroFlagUsed()
55140 X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); in onlyZeroFlagUsed()
55152 if (!isNullConstant(N->getOperand(1))) in combineCMP()
55160 SDValue Op = N->getOperand(0); in combineCMP()
55161 EVT VT = Op.getValueType(); in combineCMP() local
55174 unsigned BitWidth = VT.getSizeInBits(); in combineCMP()
55177 unsigned MaskBits = BitWidth - ShAmt.getZExtValue(); in combineCMP()
55182 Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), in combineCMP()
55183 DAG.getConstant(Mask, dl, VT)); in combineCMP()
55185 DAG.getConstant(0, dl, VT)); in combineCMP()
55192 // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C) in combineCMP()
55220 // Peek through any zero-extend if we're only testing for a zero result. in combineCMP()
55238 // i32 truncated op to prevent partial-reg compares of promoted ops. in combineCMP()
55241 APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits()); in combineCMP()
55279 SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); in combineCMP()
55280 SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); in combineCMP()
55283 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in combineCMP()
55289 DAG.getConstant(0, dl, VT)); in combineCMP()
55298 assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && in combineX86AddSub()
55302 SDValue LHS = N->getOperand(0); in combineX86AddSub()
55303 SDValue RHS = N->getOperand(1); in combineX86AddSub()
55304 MVT VT = LHS.getSimpleValueType(); in combineX86AddSub() local
55305 bool IsSub = X86ISD::SUB == N->getOpcode(); in combineX86AddSub()
55308 if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0)) in combineX86AddSub()
55313 if (!N->hasAnyUseOfValue(1)) { in combineX86AddSub()
55314 SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); in combineX86AddSub()
55321 SDVTList VTs = DAG.getVTList(N->getValueType(0)); in combineX86AddSub()
55325 Op = DAG.getNegative(Op, DL, VT); in combineX86AddSub()
55330 MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); in combineX86AddSub()
55334 return combineAddOrSubToADCOrSBB(IsSub, DL, VT, LHS, RHS, DAG, in combineX86AddSub()
55339 SDValue LHS = N->getOperand(0); in combineSBB()
55340 SDValue RHS = N->getOperand(1); in combineSBB()
55341 SDValue BorrowIn = N->getOperand(2); in combineSBB()
55344 MVT VT = N->getSimpleValueType(0); in combineSBB() local
55345 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in combineSBB()
55349 // Fold SBB(SUB(X,Y),0,Carry) -> SBB(X,Y,Carry) in combineSBB()
55352 !N->hasAnyUseOfValue(1)) in combineSBB()
55353 return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), LHS.getOperand(0), in combineSBB()
55362 SDValue LHS = N->getOperand(0); in combineADC()
55363 SDValue RHS = N->getOperand(1); in combineADC()
55364 SDValue CarryIn = N->getOperand(2); in combineADC()
55370 return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), RHS, LHS, in combineADC()
55376 if (LHSC && RHSC && LHSC->isZero() && RHSC->isZero() && in combineADC()
55381 EVT VT = N->getValueType(0); in combineADC() local
55382 SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); in combineADC()
55384 ISD::AND, DL, VT, in combineADC()
55385 DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, in combineADC()
55387 DAG.getConstant(1, DL, VT)); in combineADC()
55391 // Fold ADC(C1,C2,Carry) -> ADC(0,C1+C2,Carry) in combineADC()
55394 if (LHSC && RHSC && !LHSC->isZero() && !N->hasAnyUseOfValue(1)) { in combineADC()
55396 APInt Sum = LHSC->getAPIntValue() + RHSC->getAPIntValue(); in combineADC()
55397 return DAG.getNode(X86ISD::ADC, DL, N->getVTList(), in combineADC()
55403 MVT VT = N->getSimpleValueType(0); in combineADC() local
55404 SDVTList VTs = DAG.getVTList(VT, MVT::i32); in combineADC()
55408 // Fold ADC(ADD(X,Y),0,Carry) -> ADC(X,Y,Carry) in combineADC()
55410 if (LHS.getOpcode() == ISD::ADD && RHSC && RHSC->isZero() && in combineADC()
55411 !N->hasAnyUseOfValue(1)) in combineADC()
55412 return DAG.getNode(X86ISD::ADC, SDLoc(N), N->getVTList(), LHS.getOperand(0), in combineADC()
55419 const SDLoc &DL, EVT VT, in matchPMADDWD() argument
55439 if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || in matchPMADDWD()
55440 VT.getVectorNumElements() < 4 || in matchPMADDWD()
55441 !isPowerOf2_32(VT.getVectorNumElements())) in matchPMADDWD()
55456 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { in matchPMADDWD()
55457 SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), in matchPMADDWD()
55458 Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); in matchPMADDWD()
55465 auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1)); in matchPMADDWD()
55466 auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1)); in matchPMADDWD()
55467 auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1)); in matchPMADDWD()
55468 auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1)); in matchPMADDWD()
55471 unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), in matchPMADDWD()
55472 Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); in matchPMADDWD()
55490 Mul = Op0L->getOperand(0); in matchPMADDWD()
55491 if (Mul->getOpcode() != ISD::MUL || in matchPMADDWD()
55496 if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || in matchPMADDWD()
55497 Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) in matchPMADDWD()
55508 VT.getVectorNumElements() * 2); in matchPMADDWD()
55520 return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder); in matchPMADDWD()
55527 const SDLoc &DL, EVT VT, in matchPMADDWD_2() argument
55535 if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || in matchPMADDWD_2()
55536 VT.getVectorNumElements() < 4 || in matchPMADDWD_2()
55537 !isPowerOf2_32(VT.getVectorNumElements())) in matchPMADDWD_2()
55596 unsigned IdxN00 = ConstN00Elt->getZExtValue(); in matchPMADDWD_2()
55597 unsigned IdxN01 = ConstN01Elt->getZExtValue(); in matchPMADDWD_2()
55598 unsigned IdxN10 = ConstN10Elt->getZExtValue(); in matchPMADDWD_2()
55599 unsigned IdxN11 = ConstN11Elt->getZExtValue(); in matchPMADDWD_2()
55621 if (In0.getValueSizeInBits() < VT.getSizeInBits() || in matchPMADDWD_2()
55622 In1.getValueSizeInBits() < VT.getSizeInBits()) in matchPMADDWD_2()
55646 // If the output is narrower than an input, extract the low part of the input in matchPMADDWD_2()
55649 VT.getVectorNumElements() * 2); in matchPMADDWD_2()
55658 return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, in matchPMADDWD_2()
55662 // ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
55667 const SDLoc &DL, EVT VT) { in combineAddOfPMADDWD() argument
55671 // TODO: Add 256/512-bit support once VPMADDWD combines with shuffles. in combineAddOfPMADDWD()
55672 if (VT.getSizeInBits() > 128) in combineAddOfPMADDWD()
55675 unsigned NumElts = VT.getVectorNumElements(); in combineAddOfPMADDWD()
55703 return DAG.getNode(X86ISD::VPMADDWD, DL, VT, LHS, RHS); in combineAddOfPMADDWD()
55709 /// earlier folds that may be used to turn select-of-constants into logic hacks.
55713 // If an operand is zero, add-of-0 gets simplified away, so that's clearly in pushAddIntoCmovOfConsts()
55714 // better because we eliminate 1-2 instructions. This transform is still in pushAddIntoCmovOfConsts()
55717 // immediate asm operands (fit in 32-bits). in pushAddIntoCmovOfConsts()
55730 SDValue Cmov = N->getOperand(0); in pushAddIntoCmovOfConsts()
55731 SDValue OtherOp = N->getOperand(1); in pushAddIntoCmovOfConsts()
55742 EVT VT = N->getValueType(0); in pushAddIntoCmovOfConsts() local
55749 // a 3-operand LEA which is likely slower than a 2-operand LEA. in pushAddIntoCmovOfConsts()
55753 all_of(N->uses(), [&](SDNode *Use) { in pushAddIntoCmovOfConsts()
55755 return MemNode && MemNode->getBasePtr().getNode() == N; in pushAddIntoCmovOfConsts()
55757 // add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y in pushAddIntoCmovOfConsts()
55761 FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp); in pushAddIntoCmovOfConsts()
55762 TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp); in pushAddIntoCmovOfConsts()
55763 Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, in pushAddIntoCmovOfConsts()
55765 return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y); in pushAddIntoCmovOfConsts()
55768 // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2) in pushAddIntoCmovOfConsts()
55769 FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp); in pushAddIntoCmovOfConsts()
55770 TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp); in pushAddIntoCmovOfConsts()
55771 return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2), in pushAddIntoCmovOfConsts()
55778 EVT VT = N->getValueType(0); in combineAdd() local
55779 SDValue Op0 = N->getOperand(0); in combineAdd()
55780 SDValue Op1 = N->getOperand(1); in combineAdd()
55786 if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget)) in combineAdd()
55788 if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget)) in combineAdd()
55790 if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT)) in combineAdd()
55797 // add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0) in combineAdd()
55806 return DAG.getNode(X86ISD::PSADBW, DL, VT, Sum, in combineAdd()
55813 // FIXME: We have the (sub Y, (zext (vXi1 X))) -> (add (sext (vXi1 X)), Y) in in combineAdd()
55816 if (VT.isVector()) { in combineAdd()
55821 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); in combineAdd()
55822 return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); in combineAdd()
55828 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); in combineAdd()
55829 return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); in combineAdd()
55833 // Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W) in combineAdd()
55834 if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() && in combineAdd()
55836 assert(!Op0->hasAnyUseOfValue(1) && "Overflow bit in use"); in combineAdd()
55837 return DAG.getNode(X86ISD::ADC, SDLoc(Op0), Op0->getVTList(), Op1, in combineAdd()
55844 // Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov
55845 // condition comes from the subtract node that produced -X. This matches the
55849 SDValue N0 = N->getOperand(0); in combineSubABS()
55850 SDValue N1 = N->getOperand(1); in combineSubABS()
55865 // Get the X and -X from the negate. in combineSubABS()
55878 MVT VT = N->getSimpleValueType(0); in combineSubABS() local
55879 SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp, in combineSubABS()
55882 return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov); in combineSubABS()
55886 SDValue Op0 = N->getOperand(0); in combineSubSetcc()
55887 SDValue Op1 = N->getOperand(1); in combineSubSetcc()
55891 // (add (zero_extend (setcc inverted) C-1)) if C is a nonzero immediate in combineSubSetcc()
55893 EVT VT = N->getValueType(0); in combineSubSetcc() local
55896 !Op0C->isZero() && Op1.getOperand(0).getOpcode() == X86ISD::SETCC && in combineSubSetcc()
55901 APInt NewImm = Op0C->getAPIntValue() - 1; in combineSubSetcc()
55904 NewSetCC = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NewSetCC); in combineSubSetcc()
55905 return DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(VT, VT), NewSetCC, in combineSubSetcc()
55906 DAG.getConstant(NewImm, DL, VT)); in combineSubSetcc()
55915 // -> in combineX86CloadCstore()
55917 if (N->getConstantOperandVal(3) != X86::COND_NE) in combineX86CloadCstore()
55920 SDValue Sub = N->getOperand(4); in combineX86CloadCstore()
55929 SmallVector<SDValue, 5> Ops(N->op_values()); in combineX86CloadCstore()
55933 return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops, in combineX86CloadCstore()
55934 cast<MemSDNode>(N)->getMemoryVT(), in combineX86CloadCstore()
55935 cast<MemSDNode>(N)->getMemOperand()); in combineX86CloadCstore()
55941 SDValue Op0 = N->getOperand(0); in combineSub()
55942 SDValue Op1 = N->getOperand(1); in combineSub()
55949 return !Cst->isOpaque(); in combineSub()
55959 // sub(C1, xor(X, C2)) -> add(xor(X, ~C2), C1+1) in combineSub()
55962 Op1->hasOneUse()) { in combineSub()
55963 EVT VT = Op0.getValueType(); in combineSub() local
55964 SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), in combineSub()
55965 DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT)); in combineSub()
55967 DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT)); in combineSub()
55968 return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd); in combineSub()
55978 // Fold SUB(X,ADC(Y,0,W)) -> SBB(X,Y,W) in combineSub()
55979 if (Op1.getOpcode() == X86ISD::ADC && Op1->hasOneUse() && in combineSub()
55981 assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use"); in combineSub()
55982 return DAG.getNode(X86ISD::SBB, SDLoc(Op1), Op1->getVTList(), Op0, in combineSub()
55986 // Fold SUB(X,SBB(Y,Z,W)) -> SUB(ADC(X,Z,W),Y) in combineSub()
55988 if (Op1.getOpcode() == X86ISD::SBB && Op1->hasOneUse() && in combineSub()
55990 assert(!Op1->hasAnyUseOfValue(1) && "Overflow bit in use"); in combineSub()
55991 SDValue ADC = DAG.getNode(X86ISD::ADC, SDLoc(Op1), Op1->getVTList(), Op0, in combineSub()
56008 unsigned Opcode = N->getOpcode(); in combineVectorCompare()
56012 SDValue LHS = N->getOperand(0); in combineVectorCompare()
56013 SDValue RHS = N->getOperand(1); in combineVectorCompare()
56014 MVT VT = N->getSimpleValueType(0); in combineVectorCompare() local
56015 unsigned EltBits = VT.getScalarSizeInBits(); in combineVectorCompare()
56016 unsigned NumElts = VT.getVectorNumElements(); in combineVectorCompare()
56020 return (Opcode == X86ISD::PCMPEQ) ? DAG.getAllOnesConstant(DL, VT) in combineVectorCompare()
56021 : DAG.getConstant(0, DL, VT); in combineVectorCompare()
56024 // PCMPEQ(X,UNDEF) -> UNDEF in combineVectorCompare()
56025 // PCMPGT(X,UNDEF) -> 0 in combineVectorCompare()
56026 // PCMPGT(UNDEF,X) -> 0 in combineVectorCompare()
56043 return getConstVector(Results, LHSUndefs | RHSUndefs, VT, DAG, DL); in combineVectorCompare()
56044 return getConstVector(Results, VT, DAG, DL); in combineVectorCompare()
56053 CastIntSETCCtoFP(MVT VT, ISD::CondCode CC, unsigned NumSignificantBitsLHS, in CastIntSETCCtoFP() argument
56055 MVT SVT = VT.getScalarType(); in CastIntSETCCtoFP()
56075 static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, in combineConcatVectorOps() argument
56080 unsigned EltSizeInBits = VT.getScalarSizeInBits(); in combineConcatVectorOps()
56083 return DAG.getUNDEF(VT); in combineConcatVectorOps()
56088 return getZeroVector(VT, Subtarget, DAG, DL); in combineConcatVectorOps()
56098 (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) { in combineConcatVectorOps()
56101 return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); in combineConcatVectorOps()
56103 // concat_vectors(movddup(x),movddup(x)) -> broadcast(x) in combineConcatVectorOps()
56104 if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && in combineConcatVectorOps()
56107 VT.getScalarType(), Subtarget))) in combineConcatVectorOps()
56108 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, in combineConcatVectorOps()
56113 // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x) in combineConcatVectorOps()
56118 Op0.getOperand(0).getValueType() == VT.getScalarType()) in combineConcatVectorOps()
56119 return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0)); in combineConcatVectorOps()
56122 // extract_subvector(broadcast(x))) -> broadcast(x) in combineConcatVectorOps()
56124 // extract_subvector(subv_broadcast(x))) -> subv_broadcast(x) in combineConcatVectorOps()
56126 Op0.getOperand(0).getValueType() == VT) { in combineConcatVectorOps()
56132 Op0.getValueType() == cast<MemSDNode>(SrcVec)->getMemoryVT()) in combineConcatVectorOps()
56136 // concat_vectors(permq(x),permq(x)) -> permq(concat_vectors(x,x)) in combineConcatVectorOps()
56139 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56140 DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, in combineConcatVectorOps()
56145 // concat(extract_subvector(v0,c0), extract_subvector(v1,c1)) -> vperm2x128. in combineConcatVectorOps()
56148 if (VT.is256BitVector() && NumOps == 2) { in combineConcatVectorOps()
56160 return DAG.getNode(X86ISD::VPERM2X128, DL, VT, in combineConcatVectorOps()
56161 DAG.getBitcast(VT, Src0.getOperand(0)), in combineConcatVectorOps()
56162 DAG.getBitcast(VT, Src1.getOperand(0)), in combineConcatVectorOps()
56169 // TODO - combineX86ShufflesRecursively should handle shuffle concatenation in combineConcatVectorOps()
56174 auto ConcatSubOperand = [&](EVT VT, ArrayRef<SDValue> SubOps, unsigned I) { in combineConcatVectorOps() argument
56187 VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Subs)); in combineConcatVectorOps()
56189 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); in combineConcatVectorOps()
56191 auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) { in combineConcatVectorOps() argument
56201 Sub.getOperand(0).getValueType() == VT && in combineConcatVectorOps()
56212 if (VT == MVT::v4f64 || VT == MVT::v4i64) in combineConcatVectorOps()
56213 return DAG.getNode(X86ISD::UNPCKL, DL, VT, in combineConcatVectorOps()
56214 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56215 ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56217 if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256())) in combineConcatVectorOps()
56218 return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI in combineConcatVectorOps()
56220 DL, VT, ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56229 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56230 ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56235 if (!IsSplat && VT.getScalarType() == MVT::f32 && in combineConcatVectorOps()
56239 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56240 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56241 ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); in combineConcatVectorOps()
56249 ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56250 (VT.is512BitVector() && Subtarget.useAVX512Regs())) && in combineConcatVectorOps()
56257 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56258 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56259 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56266 if (!IsSplat && NumOps == 2 && VT.is256BitVector() && in combineConcatVectorOps()
56268 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56269 ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); in combineConcatVectorOps()
56274 (VT.is256BitVector() || in combineConcatVectorOps()
56275 (VT.is512BitVector() && Subtarget.useAVX512Regs())) && in combineConcatVectorOps()
56279 MVT FloatVT = VT.changeVectorElementType(MVT::f32); in combineConcatVectorOps()
56280 SDValue Res = DAG.getBitcast(FloatVT, ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56283 return DAG.getBitcast(VT, Res); in combineConcatVectorOps()
56285 if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) { in combineConcatVectorOps()
56289 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56290 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56298 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56299 (VT.is512BitVector() && Subtarget.useBWIRegs()))) { in combineConcatVectorOps()
56303 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56310 (VT.is512BitVector() && Subtarget.useAVX512Regs())) { in combineConcatVectorOps()
56331 return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src); in combineConcatVectorOps()
56336 if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { in combineConcatVectorOps()
56361 return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1); in combineConcatVectorOps()
56366 if (!IsSplat && VT.is512BitVector() && Subtarget.useAVX512Regs()) { in combineConcatVectorOps()
56374 MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64; in combineConcatVectorOps()
56383 return DAG.getBitcast(VT, Res); in combineConcatVectorOps()
56389 if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { in combineConcatVectorOps()
56398 return DAG.getNode(X86ISD::SHUF128, DL, VT, LHS, RHS, in combineConcatVectorOps()
56404 if (!IsSplat && NumOps == 2 && VT.is256BitVector()) { in combineConcatVectorOps()
56412 return DAG.getNode(ISD::TRUNCATE, DL, VT, in combineConcatVectorOps()
56419 // Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle. in combineConcatVectorOps()
56421 if (VT == MVT::v4i64 && !Subtarget.hasInt256() && in combineConcatVectorOps()
56425 SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56434 return DAG.getBitcast(VT, Res); in combineConcatVectorOps()
56441 if (((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56442 (VT.is512BitVector() && Subtarget.useAVX512Regs() && in combineConcatVectorOps()
56447 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56448 ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); in combineConcatVectorOps()
56454 if (VT.is512BitVector() && Subtarget.useAVX512Regs() && in combineConcatVectorOps()
56458 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56459 ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); in combineConcatVectorOps()
56466 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56467 (VT.is512BitVector() && Subtarget.useAVX512Regs()))) { in combineConcatVectorOps()
56468 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56469 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56470 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56475 if (!IsSplat && VT.is256BitVector() && in combineConcatVectorOps()
56476 (Subtarget.hasInt256() || VT == MVT::v8i32) && in combineConcatVectorOps()
56477 (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1))) { in combineConcatVectorOps()
56479 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56480 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56481 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56503 MVT FpVT = VT.changeVectorElementType(FpSVT); in combineConcatVectorOps()
56507 SDValue LHS = ConcatSubOperand(VT, Ops, 0); in combineConcatVectorOps()
56508 SDValue RHS = ConcatSubOperand(VT, Ops, 1); in combineConcatVectorOps()
56516 VT, DAG.getNode(X86ISD::CMPP, DL, FpVT, LHS, RHS, in combineConcatVectorOps()
56526 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56527 (VT.is512BitVector() && Subtarget.useBWIRegs()))) { in combineConcatVectorOps()
56528 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56529 ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56534 (VT.is256BitVector() || in combineConcatVectorOps()
56535 (VT.is512BitVector() && Subtarget.useAVX512Regs())) && in combineConcatVectorOps()
56539 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56540 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56541 ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); in combineConcatVectorOps()
56547 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56548 (VT.is512BitVector() && Subtarget.useAVX512Regs() && in combineConcatVectorOps()
56550 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56551 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56552 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56561 if (!IsSplat && (IsConcatFree(VT, Ops, 0) || IsConcatFree(VT, Ops, 1)) && in combineConcatVectorOps()
56562 (VT.is256BitVector() || in combineConcatVectorOps()
56563 (VT.is512BitVector() && Subtarget.useAVX512Regs()))) { in combineConcatVectorOps()
56564 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56565 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56566 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56570 if (!IsSplat && (VT.is256BitVector() || in combineConcatVectorOps()
56571 (VT.is512BitVector() && Subtarget.useAVX512Regs()))) { in combineConcatVectorOps()
56572 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56573 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56574 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56581 if (!IsSplat && VT.is256BitVector() && in combineConcatVectorOps()
56582 (VT.isFloatingPoint() || Subtarget.hasInt256())) { in combineConcatVectorOps()
56583 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56584 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56585 ConcatSubOperand(VT, Ops, 1)); in combineConcatVectorOps()
56590 if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56591 (VT.is512BitVector() && Subtarget.useBWIRegs()))) { in combineConcatVectorOps()
56595 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56602 ((VT.is256BitVector() && Subtarget.hasInt256()) || in combineConcatVectorOps()
56603 (VT.is512BitVector() && Subtarget.useBWIRegs())) && in combineConcatVectorOps()
56607 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56608 ConcatSubOperand(VT, Ops, 0), in combineConcatVectorOps()
56609 ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); in combineConcatVectorOps()
56613 if (NumOps == 2 && VT.is512BitVector() && Subtarget.useBWIRegs()) { in combineConcatVectorOps()
56621 uint64_t Mask = (Mask1 << (VT.getVectorNumElements() / 2)) | Mask0; in combineConcatVectorOps()
56622 MVT MaskSVT = MVT::getIntegerVT(VT.getVectorNumElements()); in combineConcatVectorOps()
56623 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); in combineConcatVectorOps()
56626 return DAG.getSelect(DL, VT, Sel, ConcatSubOperand(VT, Ops, 1), in combineConcatVectorOps()
56627 ConcatSubOperand(VT, Ops, 0)); in combineConcatVectorOps()
56632 (VT.is256BitVector() || in combineConcatVectorOps()
56633 (VT.is512BitVector() && Subtarget.useAVX512Regs())) && in combineConcatVectorOps()
56640 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56642 ConcatSubOperand(VT, Ops, 1), in combineConcatVectorOps()
56643 ConcatSubOperand(VT, Ops, 2)); in combineConcatVectorOps()
56648 if (!IsSplat && VT.is256BitVector() && NumOps == 2 && in combineConcatVectorOps()
56650 IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) { in combineConcatVectorOps()
56654 return DAG.getNode(Op0.getOpcode(), DL, VT, in combineConcatVectorOps()
56656 ConcatSubOperand(VT, Ops, 1), in combineConcatVectorOps()
56657 ConcatSubOperand(VT, Ops, 2)); in combineConcatVectorOps()
56668 if (TLI->allowsMemoryAccess(Ctx, DAG.getDataLayout(), VT, in combineConcatVectorOps()
56669 *FirstLd->getMemOperand(), &Fast) && in combineConcatVectorOps()
56672 EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false)) in combineConcatVectorOps()
56680 APInt UndefElts = APInt::getZero(VT.getVectorNumElements()); in combineConcatVectorOps()
56691 if (EltBits.size() == VT.getVectorNumElements()) { in combineConcatVectorOps()
56692 Constant *C = getConstantVector(VT, EltBits, UndefElts, Ctx); in combineConcatVectorOps()
56697 SDValue Ld = DAG.getLoad(VT, DL, DAG.getEntryNode(), CV, MPI); in combineConcatVectorOps()
56708 (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) { in combineConcatVectorOps()
56717 getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) { in combineConcatVectorOps()
56726 // If we're splatting a 128-bit subvector to 512-bits, use SHUF128 directly. in combineConcatVectorOps()
56727 if (IsSplat && NumOps == 4 && VT.is512BitVector() && in combineConcatVectorOps()
56729 MVT ShuffleVT = VT.isFloatingPoint() ? MVT::v8f64 : MVT::v8i64; in combineConcatVectorOps()
56734 return DAG.getBitcast(VT, Res); in combineConcatVectorOps()
56743 EVT VT = N->getValueType(0); in combineCONCAT_VECTORS() local
56744 EVT SrcVT = N->getOperand(0).getValueType(); in combineCONCAT_VECTORS()
56746 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); in combineCONCAT_VECTORS()
56748 if (VT.getVectorElementType() == MVT::i1) { in combineCONCAT_VECTORS()
56751 APInt Constant = APInt::getZero(VT.getSizeInBits()); in combineCONCAT_VECTORS()
56755 Constant.insertBits(C->getAPIntValue(), I * SubSizeInBits); in combineCONCAT_VECTORS()
56756 if (I == (E - 1)) { in combineCONCAT_VECTORS()
56757 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); in combineCONCAT_VECTORS()
56759 return DAG.getBitcast(VT, DAG.getConstant(Constant, SDLoc(N), IntVT)); in combineCONCAT_VECTORS()
56767 if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) { in combineCONCAT_VECTORS()
56768 if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG, in combineCONCAT_VECTORS()
56782 MVT OpVT = N->getSimpleValueType(0); in combineINSERT_SUBVECTOR()
56787 SDValue Vec = N->getOperand(0); in combineINSERT_SUBVECTOR()
56788 SDValue SubVec = N->getOperand(1); in combineINSERT_SUBVECTOR()
56790 uint64_t IdxVal = N->getConstantOperandVal(2); in combineINSERT_SUBVECTOR()
56827 Ins.getOperand(1), N->getOperand(2)); in combineINSERT_SUBVECTOR()
56836 // insert_subvector X, (insert_subvector undef, Y, 0), Idx --> in combineINSERT_SUBVECTOR()
56843 SubVec.getOperand(1), N->getOperand(2)); in combineINSERT_SUBVECTOR()
56905 SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; in combineINSERT_SUBVECTOR()
56908 MemIntr->getMemoryVT(), in combineINSERT_SUBVECTOR()
56909 MemIntr->getMemOperand()); in combineINSERT_SUBVECTOR()
56932 /// is a common pattern for AVX1 integer code because 256-bit selects may be
56933 /// legal, but there is almost no integer math/logic available for 256-bit.
56938 SDValue Sel = Ext->getOperand(0); in narrowExtractedVectorSelect()
56945 // TODO: This can be extended to handle extraction to 256-bits. in narrowExtractedVectorSelect()
56946 MVT VT = Ext->getSimpleValueType(0); in narrowExtractedVectorSelect() local
56947 if (!VT.is128BitVector()) in narrowExtractedVectorSelect()
56954 MVT WideVT = Ext->getOperand(0).getSimpleValueType(); in narrowExtractedVectorSelect()
56961 unsigned ExtIdx = Ext->getConstantOperandVal(1); in narrowExtractedVectorSelect()
56977 unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits(); in narrowExtractedVectorSelect()
56984 return DAG.getBitcast(VT, NarrowSel); in narrowExtractedVectorSelect()
56990 // For AVX1 only, if we are extracting from a 256-bit and+not (which will in combineEXTRACT_SUBVECTOR()
56992 // split the 'and' into 128-bit ops to avoid the concatenate and extract. in combineEXTRACT_SUBVECTOR()
57000 if (!N->getValueType(0).isSimple()) in combineEXTRACT_SUBVECTOR()
57003 MVT VT = N->getSimpleValueType(0); in combineEXTRACT_SUBVECTOR() local
57004 SDValue InVec = N->getOperand(0); in combineEXTRACT_SUBVECTOR()
57005 unsigned IdxVal = N->getConstantOperandVal(1); in combineEXTRACT_SUBVECTOR()
57008 unsigned SizeInBits = VT.getSizeInBits(); in combineEXTRACT_SUBVECTOR()
57010 unsigned NumSubElts = VT.getVectorNumElements(); in combineEXTRACT_SUBVECTOR()
57021 SDValue NotOp = V->getOperand(0); in combineEXTRACT_SUBVECTOR()
57026 // extract (and v4i64 X, (not (concat Y1, Y2))), n -> andnp v2i64 X(n), Y1 in combineEXTRACT_SUBVECTOR()
57028 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, in combineEXTRACT_SUBVECTOR()
57029 DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); in combineEXTRACT_SUBVECTOR()
57040 return getZeroVector(VT, Subtarget, DAG, DL); in combineEXTRACT_SUBVECTOR()
57043 if (VT.getScalarType() == MVT::i1) in combineEXTRACT_SUBVECTOR()
57044 return DAG.getConstant(1, DL, VT); in combineEXTRACT_SUBVECTOR()
57045 return getOnesVector(VT, DAG, DL); in combineEXTRACT_SUBVECTOR()
57049 return DAG.getBuildVector(VT, DL, InVec->ops().slice(IdxVal, NumSubElts)); in combineEXTRACT_SUBVECTOR()
57055 if (VT.getVectorElementType() != MVT::i1 && in combineEXTRACT_SUBVECTOR()
57059 SDValue NewExt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, in combineEXTRACT_SUBVECTOR()
57060 InVec.getOperand(0), N->getOperand(1)); in combineEXTRACT_SUBVECTOR()
57061 unsigned NewIdxVal = InVec.getConstantOperandVal(2) - IdxVal; in combineEXTRACT_SUBVECTOR()
57062 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, NewExt, in combineEXTRACT_SUBVECTOR()
57077 cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT) in combineEXTRACT_SUBVECTOR()
57091 return DAG.getUNDEF(VT); in combineEXTRACT_SUBVECTOR()
57093 return getZeroVector(VT, Subtarget, DAG, DL); in combineEXTRACT_SUBVECTOR()
57117 if (IdxVal == 0 && VT == MVT::v2f64 && InVecVT == MVT::v4f64) { in combineEXTRACT_SUBVECTOR()
57121 return DAG.getNode(X86ISD::CVTSI2P, DL, VT, InVec.getOperand(0)); in combineEXTRACT_SUBVECTOR()
57126 return DAG.getNode(X86ISD::CVTUI2P, DL, VT, InVec.getOperand(0)); in combineEXTRACT_SUBVECTOR()
57131 return DAG.getNode(X86ISD::VFPEXT, DL, VT, InVec.getOperand(0)); in combineEXTRACT_SUBVECTOR()
57135 if (InOpcode == ISD::FP_TO_SINT && VT == MVT::v4i32) { in combineEXTRACT_SUBVECTOR()
57138 return DAG.getNode(InOpcode, DL, VT, in combineEXTRACT_SUBVECTOR()
57149 return DAG.getNode(ExtOp, DL, VT, Ext); in combineEXTRACT_SUBVECTOR()
57158 return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); in combineEXTRACT_SUBVECTOR()
57165 return DAG.getNode(InOpcode, DL, VT, Ext); in combineEXTRACT_SUBVECTOR()
57177 return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, InVec.getOperand(2)); in combineEXTRACT_SUBVECTOR()
57178 return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1); in combineEXTRACT_SUBVECTOR()
57184 return DAG.getNode(InOpcode, DL, VT, Ext0); in combineEXTRACT_SUBVECTOR()
57188 // Always split vXi64 logical shifts where we're extracting the upper 32-bits in combineEXTRACT_SUBVECTOR()
57195 return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1)); in combineEXTRACT_SUBVECTOR()
57202 EVT VT = N->getValueType(0); in combineScalarToVector() local
57203 SDValue Src = N->getOperand(0); in combineScalarToVector()
57210 if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse() && in combineScalarToVector()
57215 if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && in combineScalarToVector()
57219 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0), in combineScalarToVector()
57224 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) { in combineScalarToVector()
57234 if (Ld->getExtensionType() == Ext && in combineScalarToVector()
57235 Ld->getMemoryVT().getScalarSizeInBits() <= 32) in combineScalarToVector()
57247 VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, in combineScalarToVector()
57252 VT, in combineScalarToVector()
57259 if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST && in combineScalarToVector()
57261 return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0)); in combineScalarToVector()
57265 if (VT.getScalarType() == Src.getValueType()) in combineScalarToVector()
57266 for (SDNode *User : Src->uses()) in combineScalarToVector()
57267 if (User->getOpcode() == X86ISD::VBROADCAST && in combineScalarToVector()
57268 Src == User->getOperand(0)) { in combineScalarToVector()
57269 unsigned SizeInBits = VT.getFixedSizeInBits(); in combineScalarToVector()
57271 User->getValueSizeInBits(0).getFixedValue(); in combineScalarToVector()
57287 SDValue LHS = N->getOperand(0); in combinePMULDQ()
57288 SDValue RHS = N->getOperand(1); in combinePMULDQ()
57293 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); in combinePMULDQ()
57298 return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); in combinePMULDQ()
57312 if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && in combinePMULDQ()
57318 LHS.getOperand(0), { 0, -1, 1, -1 }); in combinePMULDQ()
57320 return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); in combinePMULDQ()
57322 if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && in combinePMULDQ()
57328 RHS.getOperand(0), { 0, -1, 1, -1 }); in combinePMULDQ()
57330 return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); in combinePMULDQ()
57339 MVT VT = N->getSimpleValueType(0); in combineVPMADD() local
57340 SDValue LHS = N->getOperand(0); in combineVPMADD()
57341 SDValue RHS = N->getOperand(1); in combineVPMADD()
57342 unsigned Opc = N->getOpcode(); in combineVPMADD()
57351 return DAG.getConstant(0, SDLoc(N), VT); in combineVPMADD()
57357 unsigned DstEltBits = VT.getScalarSizeInBits(); in combineVPMADD()
57371 return getConstVector(Result, VT, DAG, SDLoc(N)); in combineVPMADD()
57375 APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); in combineVPMADD()
57385 EVT VT = N->getValueType(0); in combineEXTEND_VECTOR_INREG() local
57386 SDValue In = N->getOperand(0); in combineEXTEND_VECTOR_INREG()
57387 unsigned Opcode = N->getOpcode(); in combineEXTEND_VECTOR_INREG()
57396 if (Ld->isSimple()) { in combineEXTEND_VECTOR_INREG()
57401 EVT MemVT = VT.changeVectorElementType(SVT); in combineEXTEND_VECTOR_INREG()
57402 if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { in combineEXTEND_VECTOR_INREG()
57404 Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), in combineEXTEND_VECTOR_INREG()
57405 MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); in combineEXTEND_VECTOR_INREG()
57412 // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X). in combineEXTEND_VECTOR_INREG()
57414 return DAG.getNode(Opcode, DL, VT, In.getOperand(0)); in combineEXTEND_VECTOR_INREG()
57417 // -> EXTEND_VECTOR_INREG(X). in combineEXTEND_VECTOR_INREG()
57418 // TODO: Handle non-zero subvector indices. in combineEXTEND_VECTOR_INREG()
57423 return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0)); in combineEXTEND_VECTOR_INREG()
57425 // Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0). in combineEXTEND_VECTOR_INREG()
57429 In.getValueSizeInBits() == VT.getSizeInBits()) { in combineEXTEND_VECTOR_INREG()
57430 unsigned NumElts = VT.getVectorNumElements(); in combineEXTEND_VECTOR_INREG()
57431 unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits(); in combineEXTEND_VECTOR_INREG()
57436 return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts)); in combineEXTEND_VECTOR_INREG()
57442 if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) in combineEXTEND_VECTOR_INREG()
57452 EVT VT = N->getValueType(0); in combineKSHIFT() local
57454 if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) in combineKSHIFT()
57455 return DAG.getConstant(0, SDLoc(N), VT); in combineKSHIFT()
57458 APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); in combineKSHIFT()
57473 if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16) in combineFP16_TO_FP()
57476 if (N->getValueType(0) != MVT::f32 || in combineFP16_TO_FP()
57477 N->getOperand(0).getOperand(0).getValueType() != MVT::f32) in combineFP16_TO_FP()
57482 N->getOperand(0).getOperand(0)); in combineFP16_TO_FP()
57493 EVT VT = N->getValueType(0); in combineFP_EXTEND() local
57494 bool IsStrict = N->isStrictFPOpcode(); in combineFP_EXTEND()
57495 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in combineFP_EXTEND()
57501 !IsStrict && Src.getOperand(0).getValueType() == VT) in combineFP_EXTEND()
57508 if (VT.getVectorElementType() == MVT::f64) { in combineFP_EXTEND()
57509 EVT TmpVT = VT.changeVectorElementType(MVT::f32); in combineFP_EXTEND()
57510 return DAG.getNode(ISD::FP_EXTEND, dl, VT, in combineFP_EXTEND()
57513 assert(VT.getVectorElementType() == MVT::f32 && "Unexpected fpext"); in combineFP_EXTEND()
57518 return DAG.getBitcast(VT, Src); in combineFP_EXTEND()
57530 if (VT.getVectorElementType() != MVT::f32 && in combineFP_EXTEND()
57531 VT.getVectorElementType() != MVT::f64) in combineFP_EXTEND()
57534 unsigned NumElts = VT.getVectorNumElements(); in combineFP_EXTEND()
57558 {N->getOperand(0), Src}); in combineFP_EXTEND()
57571 // Extend to the original VT if necessary. in combineFP_EXTEND()
57572 if (Cvt.getValueType() != VT) { in combineFP_EXTEND()
57573 Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other}, in combineFP_EXTEND()
57580 // Extend to the original VT if necessary. in combineFP_EXTEND()
57581 return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt); in combineFP_EXTEND()
57589 assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD || in combineBROADCAST_LOAD()
57590 N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) && in combineBROADCAST_LOAD()
57594 if (N->hasAnyUseOfValue(1)) in combineBROADCAST_LOAD()
57599 SDValue Ptr = MemIntrin->getBasePtr(); in combineBROADCAST_LOAD()
57600 SDValue Chain = MemIntrin->getChain(); in combineBROADCAST_LOAD()
57601 EVT VT = N->getSimpleValueType(0); in combineBROADCAST_LOAD() local
57602 EVT MemVT = MemIntrin->getMemoryVT(); in combineBROADCAST_LOAD()
57605 // The input chain and the size of the memory VT must match. in combineBROADCAST_LOAD()
57606 for (SDNode *User : Ptr->uses()) in combineBROADCAST_LOAD()
57607 if (User != N && User->getOpcode() == N->getOpcode() && in combineBROADCAST_LOAD()
57608 cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && in combineBROADCAST_LOAD()
57609 cast<MemIntrinsicSDNode>(User)->getChain() == Chain && in combineBROADCAST_LOAD()
57610 cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == in combineBROADCAST_LOAD()
57612 !User->hasAnyUseOfValue(1) && in combineBROADCAST_LOAD()
57613 User->getValueSizeInBits(0).getFixedValue() > VT.getFixedSizeInBits()) { in combineBROADCAST_LOAD()
57615 VT.getSizeInBits()); in combineBROADCAST_LOAD()
57616 Extract = DAG.getBitcast(VT, Extract); in combineBROADCAST_LOAD()
57628 bool IsStrict = N->isStrictFPOpcode(); in combineFP_ROUND()
57629 EVT VT = N->getValueType(0); in combineFP_ROUND() local
57630 SDValue Src = N->getOperand(IsStrict ? 1 : 0); in combineFP_ROUND()
57633 if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || in combineFP_ROUND()
57640 unsigned NumElts = VT.getVectorNumElements(); in combineFP_ROUND()
57651 bool IsOp0Strict = Op0->isStrictFPOpcode(); in combineFP_ROUND()
57693 {N->getOperand(0), Src, Rnd}); in combineFP_ROUND()
57701 EVT IntVT = VT.changeVectorElementTypeToInteger(); in combineFP_ROUND()
57706 Cvt = DAG.getBitcast(VT, Cvt); in combineFP_ROUND()
57715 SDValue Src = N->getOperand(0); in combineMOVDQ2Q()
57721 if (LN->isSimple()) { in combineMOVDQ2Q()
57722 SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(), in combineMOVDQ2Q()
57723 LN->getBasePtr(), in combineMOVDQ2Q()
57724 LN->getPointerInfo(), in combineMOVDQ2Q()
57725 LN->getOriginalAlign(), in combineMOVDQ2Q()
57726 LN->getMemOperand()->getFlags()); in combineMOVDQ2Q()
57737 unsigned NumBits = N->getSimpleValueType(0).getSizeInBits(); in combinePDEP()
57748 switch (N->getOpcode()) { in PerformDAGCombine()
57749 // clang-format off in PerformDAGCombine()
57935 // clang-format on in PerformDAGCombine()
57941 bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const { in preferABDSToABSWithNSW()
57945 // Prefer (non-AVX512) vector TRUNCATE(SIGN_EXTEND_INREG(X)) to use of PACKSS.
57946 bool X86TargetLowering::preferSextInRegOfTruncate(EVT TruncVT, EVT VT, in preferSextInRegOfTruncate() argument
57948 return Subtarget.hasAVX512() || !VT.isVector(); in preferSextInRegOfTruncate()
57951 bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { in isTypeDesirableForOp()
57952 if (!isTypeLegal(VT)) in isTypeDesirableForOp()
57956 if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) in isTypeDesirableForOp()
57959 // TODO: Almost no 8-bit ops are desirable because they have no actual in isTypeDesirableForOp()
57960 // size/speed advantages vs. 32-bit ops, but they do have a major in isTypeDesirableForOp()
57963 // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and in isTypeDesirableForOp()
57964 // we have specializations to turn 32-bit multiply/shl into LEA or other ops. in isTypeDesirableForOp()
57965 // Also, see the comment in "IsDesirableToPromoteOp" - where we additionally in isTypeDesirableForOp()
57967 if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8) in isTypeDesirableForOp()
57972 if (VT == MVT::i16) { in isTypeDesirableForOp()
58006 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); in expandIndirectJTBranch()
58008 // In case control-flow branch protection is enabled, we need to add in expandIndirectJTBranch()
58023 EVT VT = LogicOp->getValueType(0); in isDesirableToCombineLogicOpOfSETCC() local
58024 EVT OpVT = SETCC0->getOperand(0).getValueType(); in isDesirableToCombineLogicOpOfSETCC()
58025 if (!VT.isInteger()) in isDesirableToCombineLogicOpOfSETCC()
58028 if (VT.isVector()) in isDesirableToCombineLogicOpOfSETCC()
58037 // TODO: Currently we lower (icmp eq/ne (and ~X, Y), 0) -> `test (not X), Y`, in isDesirableToCombineLogicOpOfSETCC()
58043 EVT VT = Op.getValueType(); in IsDesirableToPromoteOp() local
58044 bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && in IsDesirableToPromoteOp()
58049 // 8-bit multiply-by-constant can usually be expanded to something cheaper in IsDesirableToPromoteOp()
58051 if (VT != MVT::i16 && !Is8BitMulByConstant) in IsDesirableToPromoteOp()
58057 SDNode *User = *Op->use_begin(); in IsDesirableToPromoteOp()
58062 return Ld->getBasePtr() == St->getBasePtr(); in IsDesirableToPromoteOp()
58070 SDNode *User = *Op->use_begin(); in IsDesirableToPromoteOp()
58071 if (User->getOpcode() != ISD::ATOMIC_STORE) in IsDesirableToPromoteOp()
58075 return Ld->getBasePtr() == St->getBasePtr(); in IsDesirableToPromoteOp()
58123 //===----------------------------------------------------------------------===//
58125 //===----------------------------------------------------------------------===//
58163 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); in ExpandInlineAsm()
58165 const std::string &AsmStr = IA->getAsmString(); in ExpandInlineAsm()
58167 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); in ExpandInlineAsm()
58168 if (!Ty || Ty->getBitWidth() % 16 != 0) in ExpandInlineAsm()
58171 // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" in ExpandInlineAsm()
58194 // rorw $$8, ${0:w} --> llvm.bswap.i16 in ExpandInlineAsm()
58195 if (CI->getType()->isIntegerTy(16) && in ExpandInlineAsm()
58196 IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && in ExpandInlineAsm()
58200 StringRef ConstraintsStr = IA->getConstraintString(); in ExpandInlineAsm()
58208 if (CI->getType()->isIntegerTy(32) && in ExpandInlineAsm()
58209 IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && in ExpandInlineAsm()
58214 StringRef ConstraintsStr = IA->getConstraintString(); in ExpandInlineAsm()
58221 if (CI->getType()->isIntegerTy(64)) { in ExpandInlineAsm()
58222 InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); in ExpandInlineAsm()
58226 // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 in ExpandInlineAsm()
58361 Type *Ty = CallOperandVal->getType(); in getSingleConstraintMatchWeight()
58377 if (CallOperandVal->getType()->isIntegerTy()) in getSingleConstraintMatchWeight()
58383 if (Ty->isFloatingPointTy()) in getSingleConstraintMatchWeight()
58387 if (Ty->isX86_MMXTy() && Subtarget.hasMMX()) in getSingleConstraintMatchWeight()
58398 if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || in getSingleConstraintMatchWeight()
58399 ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) || in getSingleConstraintMatchWeight()
58400 ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())) in getSingleConstraintMatchWeight()
58405 if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) in getSingleConstraintMatchWeight()
58410 if (Ty->isX86_MMXTy() && Subtarget.hasMMX()) in getSingleConstraintMatchWeight()
58430 if (CallOperandVal->getType()->isIntegerTy()) in getSingleConstraintMatchWeight()
58436 if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) in getSingleConstraintMatchWeight()
58440 if (((Ty->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || in getSingleConstraintMatchWeight()
58441 ((Ty->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX())) in getSingleConstraintMatchWeight()
58446 if ((Ty->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) in getSingleConstraintMatchWeight()
58451 if (C->getZExtValue() <= 31) in getSingleConstraintMatchWeight()
58456 if (C->getZExtValue() <= 63) in getSingleConstraintMatchWeight()
58461 if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) in getSingleConstraintMatchWeight()
58466 if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) in getSingleConstraintMatchWeight()
58471 if (C->getZExtValue() <= 3) in getSingleConstraintMatchWeight()
58476 if (C->getZExtValue() <= 0xff) in getSingleConstraintMatchWeight()
58486 if ((C->getSExtValue() >= -0x80000000LL) && in getSingleConstraintMatchWeight()
58487 (C->getSExtValue() <= 0x7fffffffLL)) in getSingleConstraintMatchWeight()
58492 if (C->getZExtValue() <= 0xffffffff) in getSingleConstraintMatchWeight()
58534 // Extend to 32-bits in LowerAsmOutputForConstraint()
58552 if (C->getZExtValue() <= 31) { in LowerAsmOperandForConstraint()
58553 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58561 if (C->getZExtValue() <= 63) { in LowerAsmOperandForConstraint()
58562 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58570 if (isInt<8>(C->getSExtValue())) { in LowerAsmOperandForConstraint()
58571 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58579 if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || in LowerAsmOperandForConstraint()
58580 (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { in LowerAsmOperandForConstraint()
58581 Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58589 if (C->getZExtValue() <= 3) { in LowerAsmOperandForConstraint()
58590 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58598 if (C->getZExtValue() <= 255) { in LowerAsmOperandForConstraint()
58599 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58607 if (C->getZExtValue() <= 127) { in LowerAsmOperandForConstraint()
58608 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58615 // 32-bit signed value in LowerAsmOperandForConstraint()
58618 C->getSExtValue())) { in LowerAsmOperandForConstraint()
58620 Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); in LowerAsmOperandForConstraint()
58633 Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(), in LowerAsmOperandForConstraint()
58634 BA->getValueType(0))); in LowerAsmOperandForConstraint()
58637 if (Op->getOpcode() == ISD::ADD && in LowerAsmOperandForConstraint()
58638 isa<ConstantSDNode>(Op->getOperand(1))) { in LowerAsmOperandForConstraint()
58639 Offset = cast<ConstantSDNode>(Op->getOperand(1))->getSExtValue(); in LowerAsmOperandForConstraint()
58640 Op = Op->getOperand(0); in LowerAsmOperandForConstraint()
58643 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), in LowerAsmOperandForConstraint()
58644 GA->getValueType(0), Offset)); in LowerAsmOperandForConstraint()
58649 // 32-bit unsigned value in LowerAsmOperandForConstraint()
58652 C->getZExtValue())) { in LowerAsmOperandForConstraint()
58653 Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), in LowerAsmOperandForConstraint()
58665 bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; in LowerAsmOperandForConstraint()
58669 int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue() in LowerAsmOperandForConstraint()
58670 : CST->getSExtValue(); in LowerAsmOperandForConstraint()
58682 // If we are in non-pic codegen mode, we allow the address of a global (with in LowerAsmOperandForConstraint()
58688 Subtarget.classifyGlobalReference(GA->getGlobal()))) in LowerAsmOperandForConstraint()
58741 MVT VT) const { in getRegForInlineAsmConstraint()
58761 if (VT == MVT::v1i1 || VT == MVT::i1) in getRegForInlineAsmConstraint()
58763 if (VT == MVT::v8i1 || VT == MVT::i8) in getRegForInlineAsmConstraint()
58765 if (VT == MVT::v16i1 || VT == MVT::i16) in getRegForInlineAsmConstraint()
58769 if (VT == MVT::v32i1 || VT == MVT::i32) in getRegForInlineAsmConstraint()
58771 if (VT == MVT::v64i1 || VT == MVT::i64) in getRegForInlineAsmConstraint()
58775 case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. in getRegForInlineAsmConstraint()
58777 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
58781 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
58785 if (VT == MVT::i32 || VT == MVT::f32) in getRegForInlineAsmConstraint()
58789 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
58796 // 32-bit fallthrough in getRegForInlineAsmConstraint()
58798 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
58800 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
58802 if (VT == MVT::i32 || VT == MVT::f32 || in getRegForInlineAsmConstraint()
58803 (!VT.isVector() && !Subtarget.is64Bit())) in getRegForInlineAsmConstraint()
58805 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
58810 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
58814 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
58818 if (VT == MVT::i32 || VT == MVT::f32 || in getRegForInlineAsmConstraint()
58819 (!VT.isVector() && !Subtarget.is64Bit())) in getRegForInlineAsmConstraint()
58823 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
58829 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
58831 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
58833 if (VT == MVT::i32 || VT == MVT::f32 || in getRegForInlineAsmConstraint()
58834 (!VT.isVector() && !Subtarget.is64Bit())) in getRegForInlineAsmConstraint()
58836 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
58840 // If SSE is enabled for this VT, use f80 to ensure the isel moves the in getRegForInlineAsmConstraint()
58842 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) in getRegForInlineAsmConstraint()
58844 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) in getRegForInlineAsmConstraint()
58846 if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) in getRegForInlineAsmConstraint()
58857 switch (VT.SimpleTy) { in getRegForInlineAsmConstraint()
58960 return getRegForInlineAsmConstraint(TRI, "x", VT); in getRegForInlineAsmConstraint()
58966 switch (VT.SimpleTy) { in getRegForInlineAsmConstraint()
59035 if (VT == MVT::v1i1 || VT == MVT::i1) in getRegForInlineAsmConstraint()
59037 if (VT == MVT::v8i1 || VT == MVT::i8) in getRegForInlineAsmConstraint()
59039 if (VT == MVT::v16i1 || VT == MVT::i16) in getRegForInlineAsmConstraint()
59043 if (VT == MVT::v32i1 || VT == MVT::i32) in getRegForInlineAsmConstraint()
59045 if (VT == MVT::v64i1 || VT == MVT::i64) in getRegForInlineAsmConstraint()
59055 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
59057 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
59059 if (VT == MVT::i32 || VT == MVT::f32) in getRegForInlineAsmConstraint()
59061 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
59065 if (VT == MVT::i8 || VT == MVT::i1) in getRegForInlineAsmConstraint()
59067 if (VT == MVT::i16) in getRegForInlineAsmConstraint()
59069 if (VT == MVT::i32 || VT == MVT::f32) in getRegForInlineAsmConstraint()
59071 if (VT != MVT::f80 && !VT.isVector()) in getRegForInlineAsmConstraint()
59083 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); in getRegForInlineAsmConstraint()
59087 // Only match x87 registers if the VT is one SelectionDAGBuilder can convert in getRegForInlineAsmConstraint()
59089 if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) { in getRegForInlineAsmConstraint()
59090 // Map st(0) -> st(7) -> ST0 in getRegForInlineAsmConstraint()
59100 return std::make_pair(X86::FP0 + Constraint[4] - '0', in getRegForInlineAsmConstraint()
59109 // flags -> EFLAGS in getRegForInlineAsmConstraint()
59113 // dirflag -> DF in getRegForInlineAsmConstraint()
59116 VT == MVT::Other) in getRegForInlineAsmConstraint()
59119 // fpsr -> FPSW in getRegForInlineAsmConstraint()
59121 if (StringRef("{fpsr}").equals_insensitive(Constraint) && VT == MVT::Other) in getRegForInlineAsmConstraint()
59127 // Make sure it isn't a register that requires 64-bit mode. in getRegForInlineAsmConstraint()
59130 TRI->getEncodingValue(Res.first) >= 8) { in getRegForInlineAsmConstraint()
59131 // Register requires REX prefix, but we're in 32-bit mode. in getRegForInlineAsmConstraint()
59137 TRI->getEncodingValue(Res.first) & 0x10) { in getRegForInlineAsmConstraint()
59143 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to in getRegForInlineAsmConstraint()
59146 if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) in getRegForInlineAsmConstraint()
59158 unsigned Size = VT.getSizeInBits(); in getRegForInlineAsmConstraint()
59171 // Model GCC's behavior here and select a fixed pair of 32-bit in getRegForInlineAsmConstraint()
59192 if (RC && RC->contains(DestReg)) in getRegForInlineAsmConstraint()
59205 if (VT == MVT::f16) in getRegForInlineAsmConstraint()
59207 else if (VT == MVT::f32 || VT == MVT::i32) in getRegForInlineAsmConstraint()
59209 else if (VT == MVT::f64 || VT == MVT::i64) in getRegForInlineAsmConstraint()
59211 else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT)) in getRegForInlineAsmConstraint()
59213 else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT)) in getRegForInlineAsmConstraint()
59215 else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) in getRegForInlineAsmConstraint()
59223 if (VT == MVT::v1i1 || VT == MVT::i1) in getRegForInlineAsmConstraint()
59225 else if (VT == MVT::v8i1 || VT == MVT::i8) in getRegForInlineAsmConstraint()
59227 else if (VT == MVT::v16i1 || VT == MVT::i16) in getRegForInlineAsmConstraint()
59229 else if (VT == MVT::v32i1 || VT == MVT::i32) in getRegForInlineAsmConstraint()
59231 else if (VT == MVT::v64i1 || VT == MVT::i64) in getRegForInlineAsmConstraint()
59243 bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { in isIntDivCheap() argument
59248 // integer division, leaving the division as-is is a loss even in terms of in isIntDivCheap()
59252 return OptSize && !VT.isVector(); in isIntDivCheap()
59261 Entry->getParent()->getInfo<X86MachineFunctionInfo>(); in initializeSplitCSR()
59262 AFI->setIsSplitCSR(true); in initializeSplitCSR()
59269 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); in insertCopiesSplitCSR()
59274 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); in insertCopiesSplitCSR()
59275 MachineBasicBlock::iterator MBBI = Entry->begin(); in insertCopiesSplitCSR()
59283 Register NewVR = MRI->createVirtualRegister(RC); in insertCopiesSplitCSR()
59285 // FIXME: this currently does not emit CFI pseudo-instructions, it works in insertCopiesSplitCSR()
59286 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be in insertCopiesSplitCSR()
59288 // CFI pseudo-instructions. in insertCopiesSplitCSR()
59290 Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) && in insertCopiesSplitCSR()
59292 Entry->addLiveIn(*I); in insertCopiesSplitCSR()
59293 BuildMI(*Entry, MBBI, MIMetadata(), TII->get(TargetOpcode::COPY), NewVR) in insertCopiesSplitCSR()
59296 // Insert the copy-back instructions right before the terminator. in insertCopiesSplitCSR()
59298 BuildMI(*Exit, Exit->getFirstTerminator(), MIMetadata(), in insertCopiesSplitCSR()
59299 TII->get(TargetOpcode::COPY), *I) in insertCopiesSplitCSR()
59312 assert(MBBI->isCall() && MBBI->getCFIType() && in EmitKCFICheck()
59318 switch (MBBI->getOpcode()) { in EmitKCFICheck()
59325 if (!TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11, /*UnfoldLoad=*/true, in EmitKCFICheck()
59330 assert(MBBI->isCall() && in EmitKCFICheck()
59332 if (OrigCall->shouldUpdateCallSiteInfo()) in EmitKCFICheck()
59334 MBBI->setCFIType(MF, OrigCall->getCFIType()); in EmitKCFICheck()
59335 OrigCall->eraseFromParent(); in EmitKCFICheck()
59342 MachineOperand &Target = MBBI->getOperand(0); in EmitKCFICheck()
59344 switch (MBBI->getOpcode()) { in EmitKCFICheck()
59357 // 64-bit indirect thunk calls. in EmitKCFICheck()
59367 return BuildMI(MBB, MBBI, MIMetadata(*MBBI), TII->get(X86::KCFI_CHECK)) in EmitKCFICheck()
59369 .addImm(MBBI->getCFIType()) in EmitKCFICheck()
59383 MF.getFunction().hasFnAttribute("no-stack-arg-probe")) in hasInlineStackProbe()
59387 if (MF.getFunction().hasFnAttribute("probe-stack")) in hasInlineStackProbe()
59388 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == in hasInlineStackProbe()
59389 "inline-asm"; in hasInlineStackProbe()
59403 if (MF.getFunction().hasFnAttribute("probe-stack")) in getStackProbeSymbolName()
59404 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); in getStackProbeSymbolName()
59409 MF.getFunction().hasFnAttribute("no-stack-arg-probe")) in getStackProbeSymbolName()
59423 return MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", in getStackProbeSize()
59428 if (ML && ML->isInnermost() && in getPrefLoopAlignment()