Lines Matching +full:fsin +full:- +full:output
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
109 #define DEBUG_TYPE "aarch64-lower"
119 "aarch64-elf-ldtls-generation", cl::Hidden,
124 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
134 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
139 static cl::opt<bool> EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden,
146 static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
154 "aarch64-enable-gisel-sve", cl::Hidden,
349 // Otherwise, it's either a constant discriminator, or a non-blended in extractPtrauthBlendDiscriminators()
351 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN && in extractPtrauthBlendDiscriminators()
352 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) { in extractPtrauthBlendDiscriminators()
353 AddrDisc = Disc->getOperand(1); in extractPtrauthBlendDiscriminators()
354 ConstDisc = Disc->getOperand(2); in extractPtrauthBlendDiscriminators()
360 // discriminator value) isn't a 16-bit constant, bail out, and let the in extractPtrauthBlendDiscriminators()
363 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue())) in extractPtrauthBlendDiscriminators()
364 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc); in extractPtrauthBlendDiscriminators()
369 AddrDisc = DAG->getRegister(AArch64::NoRegister, MVT::i64); in extractPtrauthBlendDiscriminators()
372 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64), in extractPtrauthBlendDiscriminators()
383 // vector to all-one or all-zero. in AArch64TargetLowering()
390 if (Subtarget->hasLS64()) { in AArch64TargetLowering()
396 if (Subtarget->hasFPARMv8()) { in AArch64TargetLowering()
404 if (Subtarget->hasNEON()) { in AArch64TargetLowering()
427 if (Subtarget->isSVEorStreamingSVEAvailable()) { in AArch64TargetLowering()
452 if (Subtarget->useSVEForFixedLengthVectors()) { in AArch64TargetLowering()
463 if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) { in AArch64TargetLowering()
473 computeRegisterProperties(Subtarget->getRegisterInfo()); in AArch64TargetLowering()
546 setOperationAction(ISD::FSIN, MVT::f128, Expand); in AArch64TargetLowering()
562 // Lowering for many of the conversions is actually specified by the non-f128 in AArch64TargetLowering()
588 if (Subtarget->hasFPARMv8()) { in AArch64TargetLowering()
594 if (Subtarget->hasFPARMv8()) { in AArch64TargetLowering()
612 // Variable-sized objects. in AArch64TargetLowering()
630 // AArch64 lacks both left-rotate and popcount instructions. in AArch64TargetLowering()
648 if (Subtarget->hasCSSC()) { in AArch64TargetLowering()
719 setOperationAction(ISD::FSIN, MVT::f32, Expand); in AArch64TargetLowering()
720 setOperationAction(ISD::FSIN, MVT::f64, Expand); in AArch64TargetLowering()
727 if (Subtarget->hasFullFP16()) { in AArch64TargetLowering()
736 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, in AArch64TargetLowering()
800 // Round-to-integer need custom lowering for fp16, as Promote doesn't work in AArch64TargetLowering()
855 if (!Subtarget->hasFullFP16()) { in AArch64TargetLowering()
862 // AArch64 has implementations of a lot of rounding-like FP operations. in AArch64TargetLowering()
876 if (Subtarget->hasFullFP16()) in AArch64TargetLowering()
885 if (Subtarget->hasFullFP16()) in AArch64TargetLowering()
902 if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) { in AArch64TargetLowering()
914 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) { in AArch64TargetLowering()
963 if (Subtarget->hasLSE128()) { in AArch64TargetLowering()
971 // 128-bit loads and stores can be done without expanding in AArch64TargetLowering()
975 // Aligned 128-bit loads and stores are single-copy atomic according to the in AArch64TargetLowering()
976 // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2. in AArch64TargetLowering()
977 if (Subtarget->hasLSE2()) { in AArch64TargetLowering()
982 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the in AArch64TargetLowering()
983 // custom lowering, as there are no un-paired non-temporal stores and in AArch64TargetLowering()
994 // 256 bit non-temporal loads can be lowered to LDNP. This is done using in AArch64TargetLowering()
995 // custom lowering, as there are no un-paired non-temporal loads legalization in AArch64TargetLowering()
1019 // Make floating-point constants legal for the large code model, so they don't in AArch64TargetLowering()
1021 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { in AArch64TargetLowering()
1026 // AArch64 does not have floating-point extending loads, i1 sign-extending in AArch64TargetLowering()
1027 // load, floating-point truncating stores, or v2i32->v2i16 truncating store. in AArch64TargetLowering()
1046 if (Subtarget->hasFPARMv8()) { in AArch64TargetLowering()
1087 // Vector add and sub nodes may conceal a high-half opportunity. in AArch64TargetLowering()
1134 Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32; in AArch64TargetLowering()
1139 Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16; in AArch64TargetLowering()
1146 Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8; in AArch64TargetLowering()
1161 if (!Subtarget->isTargetWindows()) in AArch64TargetLowering()
1178 if (Subtarget->isNeonAvailable()) { in AArch64TargetLowering()
1181 // clang-format off in AArch64TargetLowering()
1188 ISD::FSIN, ISD::FCOS, ISD::FTAN, in AArch64TargetLowering()
1203 // clang-format on in AArch64TargetLowering()
1211 // AArch64 doesn't have a direct vector ->f32 conversion instructions for in AArch64TargetLowering()
1216 // Similarly, there is no direct i32 -> f64 vector conversion instruction. in AArch64TargetLowering()
1217 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the in AArch64TargetLowering()
1218 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 in AArch64TargetLowering()
1224 if (Subtarget->hasFullFP16()) { in AArch64TargetLowering()
1264 // Custom handling for some quad-vector types to detect MULL. in AArch64TargetLowering()
1294 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) { in AArch64TargetLowering()
1347 // AArch64 has implementations of a lot of rounding-like FP operations. in AArch64TargetLowering()
1355 if (Subtarget->hasFullFP16()) in AArch64TargetLowering()
1364 if (Subtarget->hasFullFP16()) in AArch64TargetLowering()
1402 Subtarget->isLittleEndian() ? Legal : Expand); in AArch64TargetLowering()
1413 if (Subtarget->hasSME()) { in AArch64TargetLowering()
1419 if (Subtarget->isSVEorStreamingSVEAvailable()) { in AArch64TargetLowering()
1429 if (Subtarget->isSVEorStreamingSVEAvailable()) { in AArch64TargetLowering()
1492 if (!Subtarget->isLittleEndian()) in AArch64TargetLowering()
1495 if (Subtarget->hasSVE2() || in AArch64TargetLowering()
1496 (Subtarget->hasSME() && Subtarget->isStreaming())) in AArch64TargetLowering()
1574 // SVE supports truncating stores of 64 and 128-bit vectors in AArch64TargetLowering()
1627 setOperationAction(ISD::FSIN, VT, Expand); in AArch64TargetLowering()
1655 if (!Subtarget->isLittleEndian()) in AArch64TargetLowering()
1666 if (!Subtarget->isLittleEndian()) in AArch64TargetLowering()
1680 // NEON doesn't support 64-bit vector integer muls, but SVE does. in AArch64TargetLowering()
1686 if (Subtarget->useSVEForFixedLengthVectors()) { in AArch64TargetLowering()
1689 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable())) in AArch64TargetLowering()
1694 VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable())) in AArch64TargetLowering()
1759 // Handle operations that are only available in non-streaming SVE mode. in AArch64TargetLowering()
1760 if (Subtarget->isSVEAvailable()) { in AArch64TargetLowering()
1778 if (Subtarget->hasSVE2()) in AArch64TargetLowering()
1784 if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { in AArch64TargetLowering()
1791 if (Subtarget->hasSVE()) { in AArch64TargetLowering()
1798 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); in AArch64TargetLowering()
1803 // On MSVC, both 32-bit and 64-bit, ldexpf(f32) is not defined. MinGW has in AArch64TargetLowering()
1805 if (Subtarget->isTargetWindows()) { in AArch64TargetLowering()
1817 if (Subtarget->isWindowsArm64EC()) { in AArch64TargetLowering()
1840 setOperationAction(ISD::FSIN, VT, Expand); in addTypeForNEON()
1858 // But we do support custom-lowering for FCOPYSIGN. in addTypeForNEON()
1862 Subtarget->hasFullFP16())) in addTypeForNEON()
1911 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16())) in addTypeForNEON()
1934 // * The lowering of the non-strict versions involves target-specific ISD in addTypeForNEON()
1940 if (Subtarget->isLittleEndian()) { in addTypeForNEON()
1948 if (Subtarget->hasD128()) { in addTypeForNEON()
1956 // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo). in shouldExpandGetActiveLaneMask()
1957 if (!Subtarget->hasSVE()) in shouldExpandGetActiveLaneMask()
1961 // whilelo instruction for generating fixed-width predicates too. in shouldExpandGetActiveLaneMask()
1975 if (!Subtarget->isSVEorStreamingSVEAvailable()) in shouldExpandCttzElements()
1979 // also support fixed-width predicates. in shouldExpandCttzElements()
2020 // Mark floating-point truncating stores/extending loads as having custom in addTypeForFixedLengthSVE()
2033 bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable(); in addTypeForFixedLengthSVE()
2129 if (Subtarget->isNeonAvailable()) in addDRType()
2135 if (Subtarget->isNeonAvailable()) in addQRType()
2148 // isIntImmediate - This method tests to see if the node is a constant
2152 Imm = C->getZExtValue(); in isIntImmediate()
2158 // isOpcWithIntImmediate - This method tests to see if the node is a specific
2163 return N->getOpcode() == Opc && in isOpcWithIntImmediate()
2164 isIntImmediate(N->getOperand(1).getNode(), Imm); in isOpcWithIntImmediate()
2172 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; in optimizeLogicalImm()
2187 // The goal here is to set the non-demanded bits in a way that minimizes in optimizeLogicalImm()
2189 // we set the non-demanded bits to the value of the preceding demanded bits. in optimizeLogicalImm()
2191 // non-demanded bit), we copy bit0 (1) to the least significant 'x', in optimizeLogicalImm()
2197 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & in optimizeLogicalImm()
2200 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); in optimizeLogicalImm()
2205 // or all-ones or all-zeros, in which case we can stop searching. Otherwise, in optimizeLogicalImm()
2210 // We cannot shrink the element size any further if it is 2-bits. in optimizeLogicalImm()
2245 // If the new constant immediate is all-zeros or all-ones, let the target in optimizeLogicalImm()
2301 uint64_t Imm = C->getZExtValue(); in targetShrinkDemandedConstant()
2305 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
2325 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2326 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); in computeKnownBitsForTargetNode()
2333 ~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2)); in computeKnownBitsForTargetNode()
2334 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2340 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2341 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); in computeKnownBitsForTargetNode()
2347 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2348 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); in computeKnownBitsForTargetNode()
2354 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2355 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1); in computeKnownBitsForTargetNode()
2361 APInt(Known.getBitWidth(), Op->getConstantOperandVal(0))); in computeKnownBitsForTargetNode()
2366 if (!Subtarget->isTargetILP32()) in computeKnownBitsForTargetNode()
2368 // In ILP32 mode all valid pointers are in the low 4GB of the address-space. in computeKnownBitsForTargetNode()
2373 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1); in computeKnownBitsForTargetNode()
2379 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1)); in computeKnownBitsForTargetNode()
2385 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT(); in computeKnownBitsForTargetNode()
2387 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); in computeKnownBitsForTargetNode()
2405 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - Bound); in computeKnownBitsForTargetNode()
2414 // bits larger than the element datatype. 32-bit or larget doesn't need in computeKnownBitsForTargetNode()
2420 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); in computeKnownBitsForTargetNode()
2424 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); in computeKnownBitsForTargetNode()
2459 // Compares return either 0 or all-ones in ComputeNumSignBitsForTargetNode()
2474 if (Subtarget->requiresStrictAlign()) in allowsMisalignedMemoryAccesses()
2478 // Some CPUs are fine with unaligned stores except for 128-bit ones. in allowsMisalignedMemoryAccesses()
2479 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 || in allowsMisalignedMemoryAccesses()
2489 // them regresses performance on micro-benchmarks and olden/bh. in allowsMisalignedMemoryAccesses()
2499 if (Subtarget->requiresStrictAlign()) in allowsMisalignedMemoryAccesses()
2503 // Some CPUs are fine with unaligned stores except for 128-bit ones. in allowsMisalignedMemoryAccesses()
2504 *Fast = !Subtarget->isMisaligned128StoreSlow() || in allowsMisalignedMemoryAccesses()
2515 // them regresses performance on micro-benchmarks and olden/bh. in allowsMisalignedMemoryAccesses()
2865 // We materialise the F128CSEL pseudo-instruction as some control flow and a in EmitF128CSEL()
2877 MachineFunction *MF = MBB->getParent(); in EmitF128CSEL()
2878 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitF128CSEL()
2879 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); in EmitF128CSEL()
2881 MachineFunction::iterator It = ++MBB->getIterator(); in EmitF128CSEL()
2889 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitF128CSEL()
2890 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); in EmitF128CSEL()
2891 MF->insert(It, TrueBB); in EmitF128CSEL()
2892 MF->insert(It, EndBB); in EmitF128CSEL()
2894 // Transfer rest of current basic-block to EndBB in EmitF128CSEL()
2895 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), in EmitF128CSEL()
2896 MBB->end()); in EmitF128CSEL()
2897 EndBB->transferSuccessorsAndUpdatePHIs(MBB); in EmitF128CSEL()
2899 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); in EmitF128CSEL()
2900 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); in EmitF128CSEL()
2901 MBB->addSuccessor(TrueBB); in EmitF128CSEL()
2902 MBB->addSuccessor(EndBB); in EmitF128CSEL()
2905 TrueBB->addSuccessor(EndBB); in EmitF128CSEL()
2908 TrueBB->addLiveIn(AArch64::NZCV); in EmitF128CSEL()
2909 EndBB->addLiveIn(AArch64::NZCV); in EmitF128CSEL()
2912 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) in EmitF128CSEL()
2925 BB->getParent()->getFunction().getPersonalityFn())) && in EmitLoweredCatchRet()
2933 MachineFunction &MF = *MBB->getParent(); in EmitDynamicProbedAlloc()
2935 DebugLoc DL = MBB->findDebugLoc(MBBI); in EmitDynamicProbedAlloc()
2943 return NextInst->getParent(); in EmitDynamicProbedAlloc()
2950 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitTileLoad()
2951 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); in EmitTileLoad()
2966 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitFill()
2968 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA)); in EmitFill()
2984 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitZTInstr()
2987 MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode)) in EmitZTInstr()
3000 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitZAInstr()
3001 MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); in EmitZAInstr()
3007 MIB.add(MI.getOperand(StartIdx)); // Output ZPR in EmitZAInstr()
3012 RegState::Define); // Output ZA Tile in EmitZAInstr()
3018 MIB.add(MI.getOperand(StartIdx)); // Output ZPR in EmitZAInstr()
3032 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitZero()
3034 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M)); in EmitZero()
3050 MachineFunction *MF = BB->getParent(); in EmitInitTPIDR2Object()
3051 MachineFrameInfo &MFI = MF->getFrameInfo(); in EmitInitTPIDR2Object()
3052 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); in EmitInitTPIDR2Object()
3053 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); in EmitInitTPIDR2Object()
3055 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitInitTPIDR2Object()
3057 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui)) in EmitInitTPIDR2Object()
3061 // Set the reserved bytes (10-15) to zero in EmitInitTPIDR2Object()
3062 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui)) in EmitInitTPIDR2Object()
3066 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui)) in EmitInitTPIDR2Object()
3073 BB->remove_instr(&MI); in EmitInitTPIDR2Object()
3080 MachineFunction *MF = BB->getParent(); in EmitAllocateZABuffer()
3081 MachineFrameInfo &MFI = MF->getFrameInfo(); in EmitAllocateZABuffer()
3082 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); in EmitAllocateZABuffer()
3087 assert(!MF->getSubtarget<AArch64Subtarget>().isTargetWindows() && in EmitAllocateZABuffer()
3090 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); in EmitAllocateZABuffer()
3093 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitAllocateZABuffer()
3094 MachineRegisterInfo &MRI = MF->getRegInfo(); in EmitAllocateZABuffer()
3099 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP) in EmitAllocateZABuffer()
3102 // Allocate a lazy-save buffer object of the size given, normally SVL * SVL in EmitAllocateZABuffer()
3105 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), Dest) in EmitAllocateZABuffer()
3109 BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), in EmitAllocateZABuffer()
3117 BB->remove_instr(&MI); in EmitAllocateZABuffer()
3125 if (SMEOrigInstr != -1) { in EmitInstrWithCustomInserter()
3126 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in EmitInstrWithCustomInserter()
3128 TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; in EmitInstrWithCustomInserter()
3160 // has implicit def. This def is early-clobber as it will be set at in EmitInstrWithCustomInserter()
3216 //===----------------------------------------------------------------------===//
3218 //===----------------------------------------------------------------------===//
3220 //===----------------------------------------------------------------------===//
3222 //===----------------------------------------------------------------------===//
3234 /// isZerosVector - Check whether SDNode N is a zero-filled vector.
3237 while (N->getOpcode() == ISD::BITCAST) in isZerosVector()
3238 N = N->getOperand(0).getNode(); in isZerosVector()
3243 if (N->getOpcode() != AArch64ISD::DUP) in isZerosVector()
3246 auto Opnd0 = N->getOperand(0); in isZerosVector()
3250 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
3279 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
3369 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
3395 // All of the compare-mask comparisons are ordered, but we can switch in changeVectorFPCCToAArch64CC()
3418 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
3424 // So, finally, the only LLVM-native comparisons that don't mention C or V
3523 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
3525 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
3528 /// - Note that we can only ever negate all previously processed results.
3530 /// of two sub-trees (because the negation affects all sub-trees emitted so
3531 /// far, so the 2nd sub-tree we emit would also affect the first).
3533 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
3535 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
3578 APInt Imm = Const->getAPIntValue(); in emitConditionalComparison()
3579 if (Imm.isNegative() && Imm.sgt(-32)) { in emitConditionalComparison()
3581 RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0)); in emitConditionalComparison()
3605 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
3608 /// Negate==true on this sub-tree)
3622 unsigned Opcode = Val->getOpcode(); in canEmitConjunction()
3624 if (Val->getOperand(0).getValueType() == MVT::f128) in canEmitConjunction()
3635 SDValue O0 = Val->getOperand(0); in canEmitConjunction()
3636 SDValue O1 = Val->getOperand(1); in canEmitConjunction()
3655 // the leafs, then this sub-tree as a whole negates naturally. in canEmitConjunction()
3657 // If we cannot naturally negate the whole sub-tree, then this must be in canEmitConjunction()
3677 /// \p Negate is true if we want this sub-tree being negated just by changing
3683 unsigned Opcode = Val->getOpcode(); in emitConjunctionRec()
3685 SDValue LHS = Val->getOperand(0); in emitConjunctionRec()
3686 SDValue RHS = Val->getOperand(1); in emitConjunctionRec()
3687 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get(); in emitConjunctionRec()
3720 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree"); in emitConjunctionRec()
3724 SDValue LHS = Val->getOperand(0); in emitConjunctionRec()
3731 SDValue RHS = Val->getOperand(1); in emitConjunctionRec()
3738 // Swap sub-tree that must come first to the right side. in emitConjunctionRec()
3751 // Swap the sub-tree that we can negate naturally to the left. in emitConjunctionRec()
3760 // Negate the left sub-tree if possible, otherwise negate the result. in emitConjunctionRec()
3776 // Emit sub-trees. in emitConjunctionRec()
3812 uint64_t Mask = MaskCst->getZExtValue(); in getCmpOperandFoldingProfit()
3828 uint64_t Shift = ShiftCst->getZExtValue(); in getCmpOperandFoldingProfit()
3844 uint64_t C = RHSC->getZExtValue(); in getAArch64Cmp()
3853 isLegalArithImmed((uint32_t)(C - 1))) || in getAArch64Cmp()
3855 isLegalArithImmed(C - 1ULL))) { in getAArch64Cmp()
3857 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; in getAArch64Cmp()
3864 isLegalArithImmed((uint32_t)(C - 1))) || in getAArch64Cmp()
3865 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { in getAArch64Cmp()
3867 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; in getAArch64Cmp()
3908 !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) { in getAArch64Cmp()
3931 // -1 constant. For example, in getAArch64Cmp()
3942 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) && in getAArch64Cmp()
3943 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD && in getAArch64Cmp()
3944 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 && in getAArch64Cmp()
3945 LHS.getNode()->hasNUsesOfValue(1, 0)) { in getAArch64Cmp()
3946 int16_t ValueofRHS = RHS->getAsZExtVal(); in getAArch64Cmp()
3947 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { in getAArch64Cmp()
3958 if (!Cmp && (RHSC->isZero() || RHSC->isOne())) { in getAArch64Cmp()
3960 if ((CC == ISD::SETNE) ^ RHSC->isZero()) in getAArch64Cmp()
4008 // Extend to 64-bits, then perform a 64-bit multiply. in getAArch64XALUOOp()
4015 // Check that the result fits into a 32-bit integer. in getAArch64XALUOOp()
4055 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); in getAArch64XALUOOp()
4066 !Subtarget->isNeonAvailable())) in LowerXOR()
4076 // --> in LowerXOR()
4082 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) in LowerXOR()
4101 // (xor x, (select_cc a, b, cc, 0, -1) ) in LowerXOR()
4102 // --> in LowerXOR()
4103 // (csel x, (xor x, -1), cc ...) in LowerXOR()
4107 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get(); in LowerXOR()
4113 // FIXME: This could be generalized to non-integer comparisons. in LowerXOR()
4126 if (CTVal->isAllOnes() && CFVal->isZero()) { in LowerXOR()
4133 if (CTVal->isZero() && CFVal->isAllOnes()) { in LowerXOR()
4139 DAG.getConstant(-1ULL, dl, Other.getValueType())); in LowerXOR()
4252 // The front-end should have filtered out the out-of-range values in LowerPREFETCH()
4253 assert(Locality <= 3 && "Prefetch locality out-of-range"); in LowerPREFETCH()
4257 Locality = 3 - Locality; in LowerPREFETCH()
4276 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerFP_EXTEND()
4289 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_ROUND()
4294 if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable())) in LowerFP_ROUND()
4300 !((Subtarget->hasNEON() || Subtarget->hasSME()) && in LowerFP_ROUND()
4301 Subtarget->hasBF16())) { in LowerFP_ROUND()
4371 bool IsStrict = Op->isStrictFPOpcode(); in LowerVectorFP_TO_INT()
4382 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) || in LowerVectorFP_TO_INT()
4383 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) in LowerVectorFP_TO_INT()
4389 if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) || in LowerVectorFP_TO_INT()
4436 // Use a scalar operation for conversions between single-element vectors of in LowerVectorFP_TO_INT()
4456 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_TO_INT()
4463 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) || in LowerFP_TO_INT()
4489 // AArch64 FP-to-int conversions saturate to the destination element size, so in LowerVectorFP_TO_INT_SAT()
4494 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); in LowerVectorFP_TO_INT_SAT()
4512 (!Subtarget->hasFullFP16() || DstElementWidth > 16)) || in LowerVectorFP_TO_INT_SAT()
4567 // AArch64 FP-to-int conversions saturate to the destination register size, so in LowerFP_TO_INT_SAT()
4576 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); in LowerFP_TO_INT_SAT()
4582 if ((SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) || SrcVT == MVT::bf16) { in LowerFP_TO_INT_SAT()
4592 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) && in LowerFP_TO_INT_SAT()
4633 // Round the floating-point value into a floating-point register with the in LowerVectorXRINT()
4647 bool IsStrict = Op->isStrictFPOpcode(); in LowerVectorINT_TO_FP()
4669 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) || in LowerVectorINT_TO_FP()
4670 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) in LowerVectorINT_TO_FP()
4715 // Use a scalar operation for conversions between single-element vectors of in LowerVectorINT_TO_FP()
4736 bool IsStrict = Op->isStrictFPOpcode(); in LowerINT_TO_FP()
4739 bool IsSigned = Op->getOpcode() == ISD::STRICT_SINT_TO_FP || in LowerINT_TO_FP()
4740 Op->getOpcode() == ISD::SINT_TO_FP; in LowerINT_TO_FP()
4770 // We need to be careful about i64 -> bf16. in LowerINT_TO_FP()
4793 // double-precision value or it is too big. If it is sufficiently small, in LowerINT_TO_FP()
4794 // we should just go u64 -> double -> bf16 in a naive way. Otherwise, we in LowerINT_TO_FP()
4795 // ensure that u64 -> double has no rounding error by only using the 52 in LowerINT_TO_FP()
4855 if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { in LowerINT_TO_FP()
4863 // Other conversions are legal, unless it's to the completely software-based in LowerINT_TO_FP()
4925 "Expected int->fp bitcast!"); in LowerBITCAST()
4971 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than in addRequiredExtensionForVectorMULL()
4972 // 64-bits we need to insert a new extension so that it will be 64-bits. in addRequiredExtensionForVectorMULL()
4983 // Returns lane if Op extracts from a two-element vector and lane is constant
4988 if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT) in getConstantLaneNumOfExtractHalfOperand()
4991 EVT VT = OpNode->getOperand(0).getValueType(); in getConstantLaneNumOfExtractHalfOperand()
4992 ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1)); in getConstantLaneNumOfExtractHalfOperand()
4996 return C->getZExtValue(); in getConstantLaneNumOfExtractHalfOperand()
5006 for (const SDValue &Elt : N->op_values()) { in isExtendedBUILD_VECTOR()
5011 if (!isIntN(HalfSize, C->getSExtValue())) in isExtendedBUILD_VECTOR()
5014 if (!isUIntN(HalfSize, C->getZExtValue())) in isExtendedBUILD_VECTOR()
5072 return N0->hasOneUse() && N1->hasOneUse() && in isAddSubSExt()
5083 return N0->hasOneUse() && N1->hasOneUse() && in isAddSubZExt()
5092 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 in LowerGET_ROUNDING()
5115 SDValue Chain = Op->getOperand(0); in LowerSET_ROUNDING()
5116 SDValue RMValue = Op->getOperand(1); in LowerSET_ROUNDING()
5120 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is in LowerSET_ROUNDING()
5121 // ((arg - 1) & 3) << 22). in LowerSET_ROUNDING()
5159 SDValue Chain = Op->getOperand(0); in LowerGET_FPMODE()
5178 SDValue Chain = Op->getOperand(0); in LowerSET_FPMODE()
5179 SDValue Mode = Op->getOperand(1); in LowerSET_FPMODE()
5193 SDValue Chain = Op->getOperand(0); in LowerRESET_FPMODE()
5281 bool OverrideNEON = !Subtarget->isNeonAvailable(); in LowerMUL()
5285 // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so in LowerMUL()
5288 "unexpected type for custom-lowering ISD::MUL"); in LowerMUL()
5303 if (Subtarget->hasSVE()) in LowerMUL()
5320 if (Subtarget->hasSVE()) in LowerMUL()
5342 // isel lowering to take advantage of no-stall back to back s/umul + s/umla. in LowerMUL()
5343 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57 in LowerMUL()
5409 "Expected a predicate-to-predicate bitcast"); in getSVEPredicateBitCast()
5416 // e.g. <n x 16 x i1> -> <n x 16 x i1> in getSVEPredicateBitCast()
5424 // case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then in getSVEPredicateBitCast()
5461 // ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
5465 // ->
5477 // ->
5483 // Case 4: If the vecnum is an add of an immediate, then the non-immediate
5487 // ->
5498 SDValue TileSlice = N->getOperand(2); in LowerSMELdrStr()
5499 SDValue Base = N->getOperand(3); in LowerSMELdrStr()
5500 SDValue VecNum = N->getOperand(4); in LowerSMELdrStr()
5507 ConstAddend = cast<ConstantSDNode>(VecNum.getOperand(1))->getSExtValue(); in LowerSMELdrStr()
5510 ConstAddend = ImmNode->getSExtValue(); in LowerSMELdrStr()
5515 if (int32_t C = (ConstAddend - ImmAddend)) { in LowerSMELdrStr()
5572 Op->getOperand(0), // Chain in LowerINTRINSIC_VOID()
5578 Op->getOperand(0), // Chain in LowerINTRINSIC_VOID()
5593 SDValue Chain = Node->getChain(); in LowerINTRINSIC_W_CHAIN()
5598 auto Alignment = Node->getMemOperand()->getAlign(); in LowerINTRINSIC_W_CHAIN()
5599 bool IsVol = Node->isVolatile(); in LowerINTRINSIC_W_CHAIN()
5600 auto DstPtrInfo = Node->getPointerInfo(); in LowerINTRINSIC_W_CHAIN()
5659 SelectionDAG &DAG) -> SDValue { in LowerINTRINSIC_WO_CHAIN()
5662 // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}. in LowerINTRINSIC_WO_CHAIN()
5687 // the non-high version of PMULL instruction. Use v1i64 to represent i64. in LowerINTRINSIC_WO_CHAIN()
5958 const auto *RegInfo = Subtarget->getRegisterInfo(); in LowerINTRINSIC_WO_CHAIN()
5959 unsigned Reg = RegInfo->getLocalAddressRegister(MF); in LowerINTRINSIC_WO_CHAIN()
5971 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); in LowerINTRINSIC_WO_CHAIN()
6039 // then extracting a fixed-width subvector from the scalable vector. in LowerINTRINSIC_WO_CHAIN()
6072 // an SVE predicate register mask from the fixed-width vector. in LowerINTRINSIC_WO_CHAIN()
6097 // SVE only supports implicit extension of 32-bit indices. in shouldRemoveExtendFromGSIndex()
6098 if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32) in shouldRemoveExtendFromGSIndex()
6105 // Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit in shouldRemoveExtendFromGSIndex()
6112 if (!ExtVT.isScalableVector() && !Subtarget->useSVEForFixedLengthVectors()) in isVectorLoadExtDesirable()
6120 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal->getOperand(0))) { in isVectorLoadExtDesirable()
6121 if (!isLoadExtLegalOrCustom(ISD::ZEXTLOAD, ExtVT, Ld->getValueType(0))) { in isVectorLoadExtDesirable()
6122 // Disable extending masked loads for fixed-width for now, since the code in isVectorLoadExtDesirable()
6128 for (auto *U : Ld->getMask()->uses()) in isVectorLoadExtDesirable()
6160 return AddrModes.find(Key)->second; in getGatherVecOpcode()
6190 SDValue Chain = MGT->getChain(); in LowerMGATHER()
6191 SDValue PassThru = MGT->getPassThru(); in LowerMGATHER()
6192 SDValue Mask = MGT->getMask(); in LowerMGATHER()
6193 SDValue BasePtr = MGT->getBasePtr(); in LowerMGATHER()
6194 SDValue Index = MGT->getIndex(); in LowerMGATHER()
6195 SDValue Scale = MGT->getScale(); in LowerMGATHER()
6197 EVT MemVT = MGT->getMemoryVT(); in LowerMGATHER()
6198 ISD::LoadExtType ExtType = MGT->getExtensionType(); in LowerMGATHER()
6199 ISD::MemIndexType IndexType = MGT->getIndexType(); in LowerMGATHER()
6202 // must be handled manually by an explicit select on the load's output. in LowerMGATHER()
6203 if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) { in LowerMGATHER()
6206 DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops, in LowerMGATHER()
6207 MGT->getMemOperand(), IndexType, ExtType); in LowerMGATHER()
6212 bool IsScaled = MGT->isIndexScaled(); in LowerMGATHER()
6213 bool IsSigned = MGT->isIndexSigned(); in LowerMGATHER()
6217 uint64_t ScaleVal = Scale->getAsZExtVal(); in LowerMGATHER()
6219 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); in LowerMGATHER()
6226 return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops, in LowerMGATHER()
6227 MGT->getMemOperand(), IndexType, ExtType); in LowerMGATHER()
6232 assert(Subtarget->useSVEForFixedLengthVectors() && in LowerMGATHER()
6235 // NOTE: Handle floating-point as if integer then bitcast the result. in LowerMGATHER()
6262 PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT) in LowerMGATHER()
6269 Ops, MGT->getMemOperand(), IndexType, ExtType); in LowerMGATHER()
6289 SDValue Chain = MSC->getChain(); in LowerMSCATTER()
6290 SDValue StoreVal = MSC->getValue(); in LowerMSCATTER()
6291 SDValue Mask = MSC->getMask(); in LowerMSCATTER()
6292 SDValue BasePtr = MSC->getBasePtr(); in LowerMSCATTER()
6293 SDValue Index = MSC->getIndex(); in LowerMSCATTER()
6294 SDValue Scale = MSC->getScale(); in LowerMSCATTER()
6296 EVT MemVT = MSC->getMemoryVT(); in LowerMSCATTER()
6297 ISD::MemIndexType IndexType = MSC->getIndexType(); in LowerMSCATTER()
6298 bool Truncating = MSC->isTruncatingStore(); in LowerMSCATTER()
6300 bool IsScaled = MSC->isIndexScaled(); in LowerMSCATTER()
6301 bool IsSigned = MSC->isIndexSigned(); in LowerMSCATTER()
6305 uint64_t ScaleVal = Scale->getAsZExtVal(); in LowerMSCATTER()
6307 assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types"); in LowerMSCATTER()
6314 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, in LowerMSCATTER()
6315 MSC->getMemOperand(), IndexType, Truncating); in LowerMSCATTER()
6320 assert(Subtarget->useSVEForFixedLengthVectors() && in LowerMSCATTER()
6323 // Once bitcast we treat floating-point scatters as if integer. in LowerMSCATTER()
6357 return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops, in LowerMSCATTER()
6358 MSC->getMemOperand(), IndexType, Truncating); in LowerMSCATTER()
6369 EVT VT = Op->getValueType(0); in LowerMLOAD()
6374 SDValue PassThru = LoadNode->getPassThru(); in LowerMLOAD()
6375 SDValue Mask = LoadNode->getMask(); in LowerMLOAD()
6377 if (PassThru->isUndef() || isZerosVector(PassThru.getNode())) in LowerMLOAD()
6381 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(), in LowerMLOAD()
6382 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(), in LowerMLOAD()
6383 LoadNode->getMemOperand(), LoadNode->getAddressingMode(), in LowerMLOAD()
6384 LoadNode->getExtensionType()); in LowerMLOAD()
6398 SDValue Value = ST->getValue(); in LowerTruncateVectorStore()
6419 return DAG.getStore(ST->getChain(), DL, ExtractTrunc, in LowerTruncateVectorStore()
6420 ST->getBasePtr(), ST->getMemOperand()); in LowerTruncateVectorStore()
6432 SDValue Value = StoreNode->getValue(); in LowerSTORE()
6435 EVT MemVT = StoreNode->getMemoryVT(); in LowerSTORE()
6440 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) in LowerSTORE()
6443 unsigned AS = StoreNode->getAddressSpace(); in LowerSTORE()
6444 Align Alignment = StoreNode->getAlign(); in LowerSTORE()
6447 StoreNode->getMemOperand()->getFlags(), in LowerSTORE()
6452 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 && in LowerSTORE()
6456 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of in LowerSTORE()
6457 // the custom lowering, as there are no un-paired non-temporal stores and in LowerSTORE()
6460 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u && in LowerSTORE()
6469 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64)); in LowerSTORE()
6473 StoreNode->getValue(), in LowerSTORE()
6477 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()}, in LowerSTORE()
6478 StoreNode->getMemoryVT(), StoreNode->getMemOperand()); in LowerSTORE()
6481 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) { in LowerSTORE()
6484 SDValue Value = StoreNode->getValue(); in LowerSTORE()
6485 assert(Value->getValueType(0) == MVT::i64x8); in LowerSTORE()
6486 SDValue Chain = StoreNode->getChain(); in LowerSTORE()
6487 SDValue Base = StoreNode->getBasePtr(); in LowerSTORE()
6494 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(), in LowerSTORE()
6495 StoreNode->getOriginalAlign()); in LowerSTORE()
6503 /// Lower atomic or volatile 128-bit stores to a single STP instruction.
6507 assert(StoreNode->getMemoryVT() == MVT::i128); in LowerStore128()
6508 assert(StoreNode->isVolatile() || StoreNode->isAtomic()); in LowerStore128()
6511 StoreNode->getMergedOrdering() == AtomicOrdering::Release; in LowerStore128()
6512 if (StoreNode->isAtomic()) in LowerStore128()
6513 assert((Subtarget->hasFeature(AArch64::FeatureLSE2) && in LowerStore128()
6514 Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) || in LowerStore128()
6515 StoreNode->getMergedOrdering() == AtomicOrdering::Unordered || in LowerStore128()
6516 StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic); in LowerStore128()
6518 SDValue Value = (StoreNode->getOpcode() == ISD::STORE || in LowerStore128()
6519 StoreNode->getOpcode() == ISD::ATOMIC_STORE) in LowerStore128()
6520 ? StoreNode->getOperand(1) in LowerStore128()
6521 : StoreNode->getOperand(2); in LowerStore128()
6529 {StoreNode->getChain(), StoreValue.first, StoreValue.second, in LowerStore128()
6530 StoreNode->getBasePtr()}, in LowerStore128()
6531 StoreNode->getMemoryVT(), StoreNode->getMemOperand()); in LowerStore128()
6541 if (LoadNode->getMemoryVT() == MVT::i64x8) { in LowerLOAD()
6543 SDValue Base = LoadNode->getBasePtr(); in LowerLOAD()
6544 SDValue Chain = LoadNode->getChain(); in LowerLOAD()
6550 LoadNode->getPointerInfo(), in LowerLOAD()
6551 LoadNode->getOriginalAlign()); in LowerLOAD()
6560 EVT VT = Op->getValueType(0); in LowerLOAD()
6563 if (LoadNode->getMemoryVT() != MVT::v4i8) in LowerLOAD()
6567 if (Subtarget->requiresStrictAlign() && LoadNode->getAlign() < Align(4)) in LowerLOAD()
6571 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) in LowerLOAD()
6573 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD || in LowerLOAD()
6574 LoadNode->getExtensionType() == ISD::EXTLOAD) in LowerLOAD()
6579 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(), in LowerLOAD()
6580 LoadNode->getBasePtr(), MachinePointerInfo()); in LowerLOAD()
6639 VT.getFixedSizeInBits() - ShiftNo->getZExtValue(); in LowerFunnelShift()
6698 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) in LowerADJUST_TRAMPOLINE()
6709 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) in LowerINIT_TRAMPOLINE()
6937 if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) { in LowerOperation()
6938 assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3()); in LowerOperation()
6978 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); in LowerOperation()
6993 !Subtarget->isNeonAvailable())) in LowerOperation()
7067 "WRITE_REGISTER custom lowering is only for 128-bit sysregs"); in LowerOperation()
7092 return !Subtarget->useSVEForFixedLengthVectors(); in mergeStoresAfterLegalization()
7117 // NEON-sized vectors can be emulated using SVE instructions. in useSVEForFixedLengthVectorVT()
7119 return Subtarget->isSVEorStreamingSVEAvailable(); in useSVEForFixedLengthVectorVT()
7126 if (!Subtarget->useSVEForFixedLengthVectors()) in useSVEForFixedLengthVectorVT()
7130 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits()) in useSVEForFixedLengthVectorVT()
7141 //===----------------------------------------------------------------------===//
7143 //===----------------------------------------------------------------------===//
7146 unsigned Opcode = N->getOpcode(); in getIntrinsicID()
7151 unsigned IID = N->getConstantOperandVal(0); in getIntrinsicID()
7187 // The non-vararg case is handled in the CC function itself. in CCAssignFnForCall()
7200 if (Subtarget->isTargetWindows()) { in CCAssignFnForCall()
7202 if (Subtarget->isWindowsArm64EC()) in CCAssignFnForCall()
7208 if (!Subtarget->isTargetDarwin()) in CCAssignFnForCall()
7212 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg in CCAssignFnForCall()
7216 if (Subtarget->isWindowsArm64EC()) in CCAssignFnForCall()
7222 if (Subtarget->isWindowsArm64EC()) in CCAssignFnForCall()
7245 if (Subtarget->isWindowsArm64EC()) in CCAssignFnForReturn()
7264 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); in LowerFormalArguments()
7266 (isVarArg && Subtarget->isWindowsArm64EC()); in LowerFormalArguments()
7273 FuncInfo->setIsSVECC(true); in LowerFormalArguments()
7292 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx); in LowerFormalArguments()
7296 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(), in LowerFormalArguments()
7324 CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; in LowerFormalArguments()
7328 // non-compliant manner for larger structs. in LowerFormalArguments()
7333 // FIXME: This works on big-endian for composite byvals, which are the common in LowerFormalArguments()
7344 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); in LowerFormalArguments()
7366 FuncInfo->setIsSVECC(true); in LowerFormalArguments()
7369 FuncInfo->setIsSVECC(true); in LowerFormalArguments()
7372 FuncInfo->setIsSVECC(true); in LowerFormalArguments()
7387 // tn: res,ch,glue = CopyFromReg t(n-1), .. in LowerFormalArguments()
7403 // If this is an 8, 16 or 32-bit value, it is really passed promoted in LowerFormalArguments()
7413 (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) && in LowerFormalArguments()
7437 if (!Subtarget->isLittleEndian() && ArgSize < 8 && in LowerFormalArguments()
7439 BEAlign = 8 - ArgSize; in LowerFormalArguments()
7477 Subtarget->isWindowsArm64EC()) && in LowerFormalArguments()
7498 Subtarget->isWindowsArm64EC()) && in LowerFormalArguments()
7504 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) in LowerFormalArguments()
7516 NumParts--; in LowerFormalArguments()
7537 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) in LowerFormalArguments()
7541 // i1 arguments are zero-extended to i8 by the caller. Emit a in LowerFormalArguments()
7545 if (OrigArg->getType()->isIntegerTy(1)) { in LowerFormalArguments()
7566 FuncInfo->setPStateSMReg(Reg); in LowerFormalArguments()
7587 if (!Subtarget->isTargetDarwin() || IsWin64) { in LowerFormalArguments()
7588 // The AAPCS variadic function ABI is identical to the non-variadic in LowerFormalArguments()
7598 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 in LowerFormalArguments()
7599 VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8); in LowerFormalArguments()
7600 FuncInfo->setVarArgsStackOffset(VarArgsOffset); in LowerFormalArguments()
7601 FuncInfo->setVarArgsStackIndex( in LowerFormalArguments()
7610 FuncInfo->getForwardedMustTailRegParms(); in LowerFormalArguments()
7630 assert(!FuncInfo->getSRetReturnReg()); in LowerFormalArguments()
7635 FuncInfo->setSRetReturnReg(Reg); in LowerFormalArguments()
7647 // This is a non-standard ABI so by fiat I say we're allowed to make full in LowerFormalArguments()
7654 FuncInfo->setArgumentStackToRestore(StackArgSize); in LowerFormalArguments()
7662 FuncInfo->setBytesInStackArgArea(StackArgSize); in LowerFormalArguments()
7664 if (Subtarget->hasCustomCallingConv()) in LowerFormalArguments()
7665 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); in LowerFormalArguments()
7670 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); in LowerFormalArguments()
7676 if (!Subtarget->isTargetWindows() && !hasInlineStackProbe(MF)) { in LowerFormalArguments()
7696 DAG.getContext()->diagnose(DiagnosticInfoUnsupported( in LowerFormalArguments()
7718 Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()); in saveVarArgRegisters()
7724 if (Subtarget->isWindowsArm64EC()) { in saveVarArgRegisters()
7725 // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs in saveVarArgRegisters()
7731 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); in saveVarArgRegisters()
7735 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); in saveVarArgRegisters()
7738 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false); in saveVarArgRegisters()
7743 if (Subtarget->isWindowsArm64EC()) { in saveVarArgRegisters()
7745 // compute its address relative to x4. For a normal AArch64->AArch64 in saveVarArgRegisters()
7762 MF, GPRIdx, (i - FirstVariadicGPR) * 8) in saveVarArgRegisters()
7769 FuncInfo->setVarArgsGPRIndex(GPRIdx); in saveVarArgRegisters()
7770 FuncInfo->setVarArgsGPRSize(GPRSaveSize); in saveVarArgRegisters()
7772 if (Subtarget->hasFPARMv8() && !IsWin64) { in saveVarArgRegisters()
7777 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); in saveVarArgRegisters()
7795 FuncInfo->setVarArgsFPRIndex(FPRIdx); in saveVarArgRegisters()
7796 FuncInfo->setVarArgsFPRSize(FPRSaveSize); in saveVarArgRegisters()
7804 /// LowerCallResult - Lower the result values of a call into the
7912 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg); in analyzeCallOperands()
7982 // The check for matching callee-saved regs will determine whether it is in isEligibleForTailCallOptimization()
7985 MF.getInfo<AArch64FunctionInfo>()->isSVECC()) in isEligibleForTailCallOptimization()
7990 // When using the Windows calling convention on a non-windows OS, we want in isEligibleForTailCallOptimization()
7993 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() && in isEligibleForTailCallOptimization()
8003 if (i->hasByValAttr()) in isEligibleForTailCallOptimization()
8006 // On Windows, "inreg" attributes signify non-aggregate indirect returns. in isEligibleForTailCallOptimization()
8012 if (i->hasInRegAttr()) in isEligibleForTailCallOptimization()
8019 // Externally-defined functions with weak linkage should not be in isEligibleForTailCallOptimization()
8020 // tail-called on AArch64 when the OS does not support dynamic in isEligibleForTailCallOptimization()
8021 // pre-emption of symbols, as the AAELF spec requires normal calls in isEligibleForTailCallOptimization()
8024 // situation (as used for tail calls) is implementation-defined, so we in isEligibleForTailCallOptimization()
8027 const GlobalValue *GV = G->getGlobal(); in isEligibleForTailCallOptimization()
8029 if (GV->hasExternalWeakLinkage() && in isEligibleForTailCallOptimization()
8050 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in isEligibleForTailCallOptimization()
8051 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); in isEligibleForTailCallOptimization()
8053 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); in isEligibleForTailCallOptimization()
8054 if (Subtarget->hasCustomCallingConv()) { in isEligibleForTailCallOptimization()
8055 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved); in isEligibleForTailCallOptimization()
8056 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved); in isEligibleForTailCallOptimization()
8058 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) in isEligibleForTailCallOptimization()
8071 if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) { in isEligibleForTailCallOptimization()
8093 Subtarget->isWindowsArm64EC()) && in isEligibleForTailCallOptimization()
8101 if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) in isEligibleForTailCallOptimization()
8117 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; in addTokenForArgument()
8125 for (SDNode *U : DAG.getEntryNode().getNode()->uses()) in addTokenForArgument()
8127 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) in addTokenForArgument()
8128 if (FI->getIndex() < 0) { in addTokenForArgument()
8129 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); in addTokenForArgument()
8131 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; in addTokenForArgument()
8148 // Check if the value is zero-extended from i1 to i8
8162 // Live-in physreg copies that are glued to SMSTART are applied as in AdjustInstrPostInstrSelection()
8163 // implicit-def's in the InstrEmitter. Here we remove them, allowing the in AdjustInstrPostInstrSelection()
8165 // copies to avoid these fake clobbers of actually-preserved GPRs. in AdjustInstrPostInstrSelection()
8168 for (unsigned I = MI.getNumOperands() - 1; I > 0; --I) in AdjustInstrPostInstrSelection()
8187 // frame-address. If they contain a frame-index to a scalable vector, this in AdjustInstrPostInstrSelection()
8191 if (MF.getInfo<AArch64FunctionInfo>()->hasStreamingModeChanges() && in AdjustInstrPostInstrSelection()
8209 FuncInfo->setHasStreamingModeChanges(true); in changeStreamingMode()
8211 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in changeStreamingMode()
8212 SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()); in changeStreamingMode()
8243 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
8244 /// and add input and output parameter nodes.
8265 bool IsCFICall = CLI.CB && CLI.CB->isIndirectCall() && CLI.CFIType; in LowerCall()
8269 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && in LowerCall()
8270 !Subtarget->noBTIAtReturnTwice()) { in LowerCall()
8271 GuardWithBTI = FuncInfo->branchTargetEnforcement(); in LowerCall()
8324 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) in LowerCall()
8341 // arguments to begin at SP+0. Completely unused for non-tail calls. in LowerCall()
8345 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); in LowerCall()
8348 // popped size 16-byte aligned. in LowerCall()
8354 FPDiff = NumReusableBytes - NumBytes; in LowerCall()
8358 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff) in LowerCall()
8359 FuncInfo->setTailCallReservedStack(-FPDiff); in LowerCall()
8361 // The stack pointer must be 16-byte aligned at all times it's used for a in LowerCall()
8364 // a 16-byte aligned SP and the delta applied for the tail call should in LowerCall()
8374 CalleeAttrs = SMEAttrs(ES->getSymbol()); in LowerCall()
8377 [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & { in LowerCall()
8380 R << ore::NV("Callee", ES->getSymbol()); in LowerCall()
8381 else if (CLI.CB && CLI.CB->getCalledFunction()) in LowerCall()
8382 R << ore::NV("Callee", CLI.CB->getCalledFunction()->getName()); in LowerCall()
8391 const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); in LowerCall()
8455 // PSTATE.ZA before the call if there is no lazy-save active. in LowerCall()
8458 "Lazy-save should have PSTATE.SM=1 on entry to the function"); in LowerCall()
8479 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) { in LowerCall()
8480 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); in LowerCall()
8490 CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; in LowerCall()
8508 // AAPCS requires i1 to be zero-extended to 8-bits by the caller. in LowerCall()
8511 // already be zero-extended. in LowerCall()
8513 // We cannot just emit a (zext i8 (trunc (assert-zext i8))) in LowerCall()
8517 // (ext (zext x)) -> (zext x) in LowerCall()
8529 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); in LowerCall()
8545 assert((isScalable || Subtarget->isWindowsArm64EC()) && in LowerCall()
8552 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast()) in LowerCall()
8575 NumParts--; in LowerCall()
8613 // parts of an [N x i32] into an X-register. The extension type will in LowerCall()
8621 ->second; in LowerCall()
8649 // FIXME: This works on big-endian for composite byvals, which are the in LowerCall()
8660 if (!Subtarget->isLittleEndian() && !Flags.isByVal() && in LowerCall()
8663 BEAlign = 8 - OpSize; in LowerCall()
8712 if (IsVarArg && Subtarget->isWindowsArm64EC()) { in LowerCall()
8735 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { in LowerCall()
8748 // Build a sequence of copy-to-reg nodes chained together with token chain in LowerCall()
8760 auto GV = G->getGlobal(); in LowerCall()
8762 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()); in LowerCall()
8767 const GlobalValue *GV = G->getGlobal(); in LowerCall()
8772 Subtarget->isTargetMachO()) || in LowerCall()
8773 MF.getFunction().getParent()->getRtLibUseGOT(); in LowerCall()
8774 const char *Sym = S->getSymbol(); in LowerCall()
8783 // We don't usually want to end the call-sequence here because we would tidy in LowerCall()
8784 // the frame up *after* the call, however in the ABI-changing tail-call case in LowerCall()
8825 const uint64_t Key = CLI.PAI->Key; in LowerCall()
8832 extractPtrauthBlendDiscriminators(CLI.PAI->Discriminator, &DAG); in LowerCall()
8849 // Add a register mask operand representing the call-preserved registers. in LowerCall()
8851 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in LowerCall()
8853 // For 'this' returns, use the X0-preserving mask if applicable in LowerCall()
8854 Mask = TRI->getThisReturnPreservedMask(MF, CallConv); in LowerCall()
8857 Mask = TRI->getCallPreservedMask(MF, CallConv); in LowerCall()
8860 Mask = TRI->getCallPreservedMask(MF, CallConv); in LowerCall()
8862 if (Subtarget->hasCustomCallingConv()) in LowerCall()
8863 TRI->UpdateCustomCallPreservedMask(MF, &Mask); in LowerCall()
8865 if (TRI->isAnyArgRegReserved(MF)) in LowerCall()
8866 TRI->emitReservedArgRegCallError(MF); in LowerCall()
8882 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); in LowerCall()
8892 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); in LowerCall()
8911 InGlue = Result.getValue(Result->getNumValues() - 1); in LowerCall()
8919 if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { in LowerCall()
8941 TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); in LowerCall()
8943 TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); in LowerCall()
8976 // a vreg -> vreg copy. in LowerCall()
8990 DAG.getContext()->diagnose(DiagnosticInfoUnsupported( in LowerCall()
9025 // Copy the result values into the output registers. in LowerReturn()
9040 // AAPCS requires i1 to be zero-extended to i8 by the producer of the in LowerReturn()
9055 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); in LowerReturn()
9066 })->second; in LowerReturn()
9074 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in LowerReturn()
9080 Register Reg = FuncInfo->getPStateSMReg(); in LowerReturn()
9108 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { in LowerReturn()
9122 const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF); in LowerReturn()
9158 //===----------------------------------------------------------------------===//
9160 //===----------------------------------------------------------------------===//
9165 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, in getTargetNode()
9166 N->getOffset(), Flag); in getTargetNode()
9172 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); in getTargetNode()
9178 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), in getTargetNode()
9179 N->getOffset(), Flag); in getTargetNode()
9185 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); in getTargetNode()
9191 return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); in getTargetNode()
9251 const GlobalValue *GV = GN->getGlobal(); in LowerGlobalAddress()
9252 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); in LowerGlobalAddress()
9255 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && in LowerGlobalAddress()
9312 assert(Subtarget->isTargetDarwin() && in LowerDarwinGlobalTLSAddress()
9318 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); in LowerDarwinGlobalTLSAddress()
9343 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in LowerDarwinGlobalTLSAddress()
9344 const uint32_t *Mask = TRI->getTLSCallPreservedMask(); in LowerDarwinGlobalTLSAddress()
9345 if (Subtarget->hasCustomCallingConv()) in LowerDarwinGlobalTLSAddress()
9346 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask); in LowerDarwinGlobalTLSAddress()
9358 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0). in LowerDarwinGlobalTLSAddress()
9359 if (DAG.getMachineFunction().getFunction().hasFnAttribute("ptrauth-calls")) { in LowerDarwinGlobalTLSAddress()
9373 /// Convert a thread-local variable reference into a sequence of instructions to
9464 /// When accessing thread-local variables under either the general-dynamic or
9465 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
9466 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
9479 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
9500 assert(Subtarget->isTargetELF() && "This function expects an ELF target"); in LowerELFGlobalTLSAddress()
9504 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); in LowerELFGlobalTLSAddress()
9525 const GlobalValue *GV = GA->getGlobal(); in LowerELFGlobalTLSAddress()
9535 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS in LowerELFGlobalTLSAddress()
9543 MFI->incNumLocalDynamicTLSAccesses(); in LowerELFGlobalTLSAddress()
9552 // thread-local area. in LowerELFGlobalTLSAddress()
9556 // in its thread-storage area. in LowerELFGlobalTLSAddress()
9587 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering"); in LowerWindowsGlobalTLSAddress()
9627 const GlobalValue *GV = GA->getGlobal(); in LowerWindowsGlobalTLSAddress()
9649 if (Subtarget->isTargetDarwin()) in LowerGlobalTLSAddress()
9651 if (Subtarget->isTargetELF()) in LowerGlobalTLSAddress()
9653 if (Subtarget->isTargetWindows()) in LowerGlobalTLSAddress()
9659 //===----------------------------------------------------------------------===//
9663 // - MOVaddrPAC: similar to MOVaddr, with added PAC.
9667 // - LOADgotPAC: similar to LOADgot, with added PAC.
9670 // section is assumed to be read-only (for example, via relro mechanism). See
9673 // - LOADauthptrstatic: similar to LOADgot, but use a
9681 // provide integrity guarantees on the to-be-signed intermediate values.
9684 // with often similarly-signed pointers, making it a good harvesting target.
9692 assert(TGN->getGlobal()->hasExternalWeakLinkage()); in LowerPtrAuthGlobalAddressStatically()
9698 if (TGN->getOffset() != 0) in LowerPtrAuthGlobalAddressStatically()
9700 "unsupported non-zero offset in weak ptrauth global reference"); in LowerPtrAuthGlobalAddressStatically()
9703 report_fatal_error("unsupported weak addr-div ptrauth global"); in LowerPtrAuthGlobalAddressStatically()
9725 // Blend only works if the integer discriminator is 16-bit wide. in LowerPtrAuthGlobalAddress()
9730 // Choosing between 3 lowering alternatives is target-specific. in LowerPtrAuthGlobalAddress()
9731 if (!Subtarget->isTargetELF() && !Subtarget->isTargetMachO()) in LowerPtrAuthGlobalAddress()
9740 const GlobalValue *PtrGV = PtrN->getGlobal(); in LowerPtrAuthGlobalAddress()
9744 Subtarget->ClassifyGlobalReference(PtrGV, getTargetMachine()); in LowerPtrAuthGlobalAddress()
9747 "unsupported non-GOT op flags on ptrauth global reference"); in LowerPtrAuthGlobalAddress()
9750 PtrOffsetC += PtrN->getOffset(); in LowerPtrAuthGlobalAddress()
9753 assert(PtrN->getTargetFlags() == 0 && in LowerPtrAuthGlobalAddress()
9762 // No GOT load needed -> MOVaddrPAC in LowerPtrAuthGlobalAddress()
9764 assert(!PtrGV->hasExternalWeakLinkage() && "extern_weak should use GOT"); in LowerPtrAuthGlobalAddress()
9771 // GOT load -> LOADgotPAC in LowerPtrAuthGlobalAddress()
9773 if (!PtrGV->hasExternalWeakLinkage()) in LowerPtrAuthGlobalAddress()
9779 // extern_weak ref -> LOADauthptrstatic in LowerPtrAuthGlobalAddress()
9791 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() - in lookThroughSignExtension()
9796 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1}; in lookThroughSignExtension()
9798 return {Val, Val.getValueSizeInBits() - 1}; in lookThroughSignExtension()
9803 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); in LowerBR_CC()
9835 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) in LowerBR_CC()
9858 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) { in LowerBR_CC()
9862 // out of bounds, a late MI-layer pass rewrites branches. in LowerBR_CC()
9878 // out of bounds, a late MI-layer pass rewrites branches. in LowerBR_CC()
9901 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT && in LowerBR_CC()
9940 if (!Subtarget->isNeonAvailable() && in LowerFCOPYSIGN()
9941 !Subtarget->useSVEForFixedLengthVectors()) in LowerFCOPYSIGN()
9960 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) { in LowerFCOPYSIGN()
9979 auto SetVecVal = [&](int Idx = -1) { in LowerFCOPYSIGN()
10011 // 64-bit elements. Instead, materialize all bits set and then negate that. in LowerFCOPYSIGN()
10039 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerCTPOP_PARITY()
10042 if (!Subtarget->isNeonAvailable()) in LowerCTPOP_PARITY()
10058 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd in LowerCTPOP_PARITY()
10059 // CNT V0.8B, V0.8B // 8xbyte pop-counts in LowerCTPOP_PARITY()
10060 // ADDV B0, V0.8B // sum 8xbyte pop-counts in LowerCTPOP_PARITY()
10104 if (Subtarget->hasDotProd() && VT.getScalarSizeInBits() != 16 && in LowerCTPOP_PARITY()
10141 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())); in LowerCTTZ()
10174 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { in LowerMinMax()
10201 VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) in LowerBitreverse()
10252 // Skip the one-use zext in isOrXorChain()
10253 if (N->getOpcode() == ISD::ZERO_EXTEND && N->hasOneUse()) in isOrXorChain()
10254 N = N->getOperand(0); in isOrXorChain()
10257 if (N->getOpcode() == ISD::XOR) { in isOrXorChain()
10258 WorkList.push_back(std::make_pair(N->getOperand(0), N->getOperand(1))); in isOrXorChain()
10263 // All the non-leaf nodes must be OR. in isOrXorChain()
10264 if (N->getOpcode() != ISD::OR || !N->hasOneUse()) in isOrXorChain()
10267 if (isOrXorChain(N->getOperand(0), Num, WorkList) && in isOrXorChain()
10268 isOrXorChain(N->getOperand(1), Num, WorkList)) in isOrXorChain()
10275 SDValue LHS = N->getOperand(0); in performOrXorChainCombine()
10276 SDValue RHS = N->getOperand(1); in performOrXorChainCombine()
10278 EVT VT = N->getValueType(0); in performOrXorChainCombine()
10282 if (N->getOpcode() != ISD::SETCC) in performOrXorChainCombine()
10285 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); in performOrXorChainCombine()
10290 LHS->getOpcode() == ISD::OR && LHS->hasOneUse() && in performOrXorChainCombine()
10314 bool IsStrict = Op->isStrictFPOpcode(); in LowerSETCC()
10322 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get(); in LowerSETCC()
10383 // this case, we emit the first CSEL and then emit a second using the output in LowerSETCC()
10417 ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get(); in LowerSETCCCARRY()
10445 if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) || in LowerSELECT_CC()
10459 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform in LowerSELECT_CC()
10460 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the in LowerSELECT_CC()
10462 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal && in LowerSELECT_CC()
10463 CTVal->isOne() && CFVal->isAllOnes() && in LowerSELECT_CC()
10468 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); in LowerSELECT_CC()
10473 // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1)) in LowerSELECT_CC()
10474 // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1)) in LowerSELECT_CC()
10477 RHSC && RHSC->isZero() && CFVal && CFVal->isZero() && in LowerSELECT_CC()
10482 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); in LowerSELECT_CC()
10494 if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) { in LowerSELECT_CC()
10498 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) { in LowerSELECT_CC()
10519 const int64_t TrueVal = CTVal->getSExtValue(); in LowerSELECT_CC()
10520 const int64_t FalseVal = CFVal->getSExtValue(); in LowerSELECT_CC()
10529 TrueVal == -FalseVal) { in LowerSELECT_CC()
10532 // If our operands are only 32-bit wide, make sure we use 32-bit in LowerSELECT_CC()
10536 // 64-bit arithmetic). in LowerSELECT_CC()
10537 const uint32_t TrueVal32 = CTVal->getZExtValue(); in LowerSELECT_CC()
10538 const uint32_t FalseVal32 = CFVal->getZExtValue(); in LowerSELECT_CC()
10548 // 64-bit check whether we can use CSINC. in LowerSELECT_CC()
10581 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() && in LowerSELECT_CC()
10582 !RHSVal->isZero() && !RHSVal->isAllOnes()) { in LowerSELECT_CC()
10590 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) { in LowerSELECT_CC()
10592 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to in LowerSELECT_CC()
10624 if (RHSVal && RHSVal->isZero()) { in LowerSELECT_CC()
10629 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType()) in LowerSELECT_CC()
10632 CFVal && CFVal->isZero() && in LowerSELECT_CC()
10642 // If we need a second CSEL, emit it, using the output of the first as the in LowerSELECT_CC()
10649 // Otherwise, return the output of the first CSEL. in LowerSELECT_CC()
10664 // -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the in LowerVECTOR_SPLICE()
10674 // Create a predicate where all but the last -IdxVal elements are false. in LowerVECTOR_SPLICE()
10694 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); in LowerSELECT_CC()
10705 SDValue CCVal = Op->getOperand(0); in LowerSELECT()
10706 SDValue TVal = Op->getOperand(1); in LowerSELECT()
10707 SDValue FVal = Op->getOperand(2); in LowerSELECT()
10725 if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) { in LowerSELECT()
10740 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) in LowerSELECT()
10758 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get(); in LowerSELECT()
10767 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) { in LowerSELECT()
10776 if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) { in LowerSELECT()
10791 !Subtarget->isTargetMachO()) in LowerJumpTable()
10805 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex(); in LowerBR_JT()
10808 AFI->setJumpTableEntryInfo(JTI, 4, nullptr); in LowerBR_JT()
10810 // With aarch64-jump-table-hardening, we only expand the jump table dispatch in LowerBR_JT()
10813 "aarch64-jump-table-hardening")) { in LowerBR_JT()
10815 if (Subtarget->isTargetMachO()) { in LowerBR_JT()
10817 report_fatal_error("Unsupported code-model for hardened jump-table"); in LowerBR_JT()
10820 assert(Subtarget->isTargetELF() && in LowerBR_JT()
10823 report_fatal_error("Unsupported code-model for hardened jump-table"); in LowerBR_JT()
10846 // Skip over the jump-table BRINDs, where the destination is JumpTableDest32. in LowerBRIND()
10847 if (Dest->isMachineOpcode() && in LowerBRIND()
10848 Dest->getMachineOpcode() == AArch64::JumpTableDest32) in LowerBRIND()
10853 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled(MF.getFunction()); in LowerBRIND()
10874 if (Subtarget->isTargetMachO()) { in LowerConstantPool()
10888 const BlockAddress *BA = BAN->getBlockAddress(); in LowerBlockAddress()
10891 Subtarget->getPtrAuthBlockAddressDiscriminatorIfEnabled( in LowerBlockAddress()
10892 *BA->getFunction())) { in LowerBlockAddress()
10896 SDValue TargetBA = DAG.getTargetBlockAddress(BA, BAN->getValueType(0)); in LowerBlockAddress()
10911 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) { in LowerBlockAddress()
10926 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), in LowerDarwin_VASTART()
10929 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerDarwin_VASTART()
10941 if (Subtarget->isWindowsArm64EC()) { in LowerWin64_VASTART()
10943 // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry, in LowerWin64_VASTART()
10948 if (FuncInfo->getVarArgsGPRSize() > 0) in LowerWin64_VASTART()
10949 StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize(); in LowerWin64_VASTART()
10951 StackOffset = FuncInfo->getVarArgsStackOffset(); in LowerWin64_VASTART()
10955 FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0 in LowerWin64_VASTART()
10956 ? FuncInfo->getVarArgsGPRIndex() in LowerWin64_VASTART()
10957 : FuncInfo->getVarArgsStackIndex(), in LowerWin64_VASTART()
10960 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerWin64_VASTART()
10971 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; in LowerAAPCS_VASTART()
10978 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerAAPCS_VASTART()
10983 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT); in LowerAAPCS_VASTART()
10990 int GPRSize = FuncInfo->getVarArgsGPRSize(); in LowerAAPCS_VASTART()
10997 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT); in LowerAAPCS_VASTART()
11009 int FPRSize = FuncInfo->getVarArgsFPRSize(); in LowerAAPCS_VASTART()
11015 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT); in LowerAAPCS_VASTART()
11030 DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), in LowerAAPCS_VASTART()
11038 DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), in LowerAAPCS_VASTART()
11049 if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg())) in LowerVASTART()
11051 else if (Subtarget->isTargetDarwin()) in LowerVASTART()
11062 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; in LowerVACOPY()
11064 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) in LowerVACOPY()
11066 : Subtarget->isTargetILP32() ? 20 : 32; in LowerVACOPY()
11067 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); in LowerVACOPY()
11068 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); in LowerVACOPY()
11078 assert(Subtarget->isTargetDarwin() && in LowerVAARG()
11081 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVAARG()
11087 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; in LowerVAARG()
11101 DAG.getConstant(Align->value() - 1, DL, PtrVT)); in LowerVAARG()
11103 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT)); in LowerVAARG()
11140 // Merge the rounded value with the chain output of the load. in LowerVAARG()
11157 while (Depth--) in LowerFRAMEADDR()
11161 if (Subtarget->isTargetILP32()) in LowerFRAMEADDR()
11187 const AArch64RegisterInfo *MRI = Subtarget->getRegisterInfo(); in getRegisterByName()
11188 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false); in getRegisterByName()
11189 if (!Subtarget->isXRegisterReserved(DwarfRegNum) && in getRegisterByName()
11190 !MRI->isReservedReg(MF, Reg)) in getRegisterByName()
11231 // live-in. in LowerRETURNADDR()
11236 // The XPACLRI instruction assembles to a hint-space instruction before in LowerRETURNADDR()
11237 // Armv8.3-A therefore this instruction can be safely used for any pre in LowerRETURNADDR()
11238 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use in LowerRETURNADDR()
11241 if (Subtarget->hasPAuth()) { in LowerRETURNADDR()
11252 /// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
11271 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and in isFPImmLegal()
11272 // 16-bit case when target has full fp16 support. in isFPImmLegal()
11281 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero(); in isFPImmLegal()
11283 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); in isFPImmLegal()
11286 (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) || in isFPImmLegal()
11302 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2)); in isFPImmLegal()
11311 //===----------------------------------------------------------------------===//
11313 //===----------------------------------------------------------------------===//
11319 if ((ST->hasNEON() && in getEstimate()
11323 (ST->hasSVE() && in getEstimate()
11328 // the initial estimate is 2^-8. Thus the number of extra steps to refine in getEstimate()
11336 : Log2_64_Ceil(DesiredBits) - Log2_64_Ceil(AccurateBits); in getEstimate()
11367 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt())) in getSqrtEstimate()
11376 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2) in getSqrtEstimate()
11377 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) in getSqrtEstimate()
11378 for (int i = ExtraSteps; i > 0; --i) { in getSqrtEstimate()
11406 // Newton reciprocal iteration: E * (2 - X * E) in getRecipEstimate()
11407 // AArch64 reciprocal iteration instruction: (2 - M * N) in getRecipEstimate()
11408 for (int i = ExtraSteps; i > 0; --i) { in getRecipEstimate()
11421 //===----------------------------------------------------------------------===//
11423 //===----------------------------------------------------------------------===//
11429 // r - A general register
11430 // w - An FP/SIMD register of some size in the range v0-v31
11431 // x - An FP/SIMD register of some size in the range v0-v15
11432 // I - Constant that can be used with an ADD instruction
11433 // J - Constant that can be used with a SUB instruction
11434 // K - Constant that can be used with a 32-bit logical instruction
11435 // L - Constant that can be used with a 64-bit logical instruction
11436 // M - Constant that can be used as a 32-bit MOV immediate
11437 // N - Constant that can be used as a 64-bit MOV immediate
11438 // Q - A memory reference with base register and no offset
11439 // S - A symbolic address
11440 // Y - Floating point constant zero
11441 // Z - Integer constant zero
11443 // Note that general register operands will be output using their 64-bit x
11445 // is prefixed by the %w modifier. Floating-point and SIMD register operands
11446 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
11456 if (!Subtarget->hasFPARMv8()) in LowerXConstraint()
11528 // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
11561 // Lower @cc flag output via getSETCC.
11568 // The output variable should be a scalar integer. in LowerAsmOutputForConstraint()
11571 report_fatal_error("Flag output operand is of invalid type"); in LowerAsmOutputForConstraint()
11593 /// getConstraintType - Given a constraint letter, return the type of
11643 Type *type = CallOperandVal->getType(); in getSingleConstraintMatchWeight()
11652 if (type->isFloatingPointTy() || type->isVectorTy()) in getSingleConstraintMatchWeight()
11675 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512) in getRegForInlineAsmConstraint()
11681 if (!Subtarget->hasFPARMv8()) in getRegForInlineAsmConstraint()
11702 // only take 128-bit registers so just use that regclass. in getRegForInlineAsmConstraint()
11704 if (!Subtarget->hasFPARMv8()) in getRegForInlineAsmConstraint()
11712 if (!Subtarget->hasFPARMv8()) in getRegForInlineAsmConstraint()
11748 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { in getRegForInlineAsmConstraint()
11750 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo); in getRegForInlineAsmConstraint()
11752 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size. in getRegForInlineAsmConstraint()
11753 // By default we'll emit v0-v31 for this unless there's a modifier where in getRegForInlineAsmConstraint()
11766 if (Res.second && !Subtarget->hasFPARMv8() && in getRegForInlineAsmConstraint()
11777 if (Subtarget->hasLS64() && Ty->isIntegerTy(512)) in getAsmOperandValueType()
11783 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
11830 uint64_t CVal = C->getZExtValue(); in LowerAsmOperandForConstraint()
11835 // valid when negated, i.e. if [an add pattern] were to be output as a SUB in LowerAsmOperandForConstraint()
11836 // instruction [or vice versa], in other words -1 to -4095 with optional in LowerAsmOperandForConstraint()
11843 uint64_t NVal = -C->getSExtValue(); in LowerAsmOperandForConstraint()
11845 CVal = C->getSExtValue(); in LowerAsmOperandForConstraint()
11853 // distinguish between bit patterns that are valid 32-bit or 64-bit in LowerAsmOperandForConstraint()
11867 // also match 32 or 64-bit immediates that can be loaded either using a in LowerAsmOperandForConstraint()
11868 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca in LowerAsmOperandForConstraint()
11869 // (M) or 64-bit 0x1234000000000000 (N) etc. in LowerAsmOperandForConstraint()
11913 // All assembler immediates are 64-bit integers. in LowerAsmOperandForConstraint()
11926 //===----------------------------------------------------------------------===//
11928 //===----------------------------------------------------------------------===//
11930 /// WidenVector - Given a value in the V64 register class, produce the
11943 /// getExtFactor - Determine the adjustment factor for the position when
11993 MaskSource = MaskSource->getOperand(0); in ReconstructShuffleWithRuntimeMask()
12009 !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i)) in ReconstructShuffleWithRuntimeMask()
12015 MaskSourceVec = MaskSource->getOperand(0); in ReconstructShuffleWithRuntimeMask()
12018 } else if (MaskSourceVec != MaskSource->getOperand(0)) { in ReconstructShuffleWithRuntimeMask()
12026 // of elements in the source, or we would have an out-of-bounds access. in ReconstructShuffleWithRuntimeMask()
12089 "various elements of other fixed-width vectors, provided " in ReconstructShuffle()
12102 Source->MinElt = std::min(Source->MinElt, EltNo); in ReconstructShuffle()
12103 Source->MaxElt = std::max(Source->MaxElt, EltNo); in ReconstructShuffle()
12117 Mask.push_back(-1); in ReconstructShuffle()
12120 // Set the Mask lanes adjusted for the size of the input and output in ReconstructShuffle()
12122 // output element, adjusted in their positions per input and output types. in ReconstructShuffle()
12201 // the original, but with a total width matching the BUILD_VECTOR output. in ReconstructShuffle()
12222 if (Src.MaxElt - Src.MinElt >= NumSrcElts) { in ReconstructShuffle()
12233 Src.WindowBase = -NumSrcElts; in ReconstructShuffle()
12259 Src.WindowBase = -Src.MinElt; in ReconstructShuffle()
12288 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1); in ReconstructShuffle()
12296 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue(); in ReconstructShuffle()
12310 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase; in ReconstructShuffle()
12311 ExtractBase += NumElts * (Src - Sources.begin()); in ReconstructShuffle()
12447 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1); in isWideDUPMask()
12464 // NumEltsPerBlock with some values possibly replaced by undef-s. in isWideDUPMask()
12466 // Find first non-undef element in isWideDUPMask()
12469 "Shuffle with all-undefs must have been caught by previous cases, " in isWideDUPMask()
12477 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin(); in isWideDUPMask()
12482 size_t Elt0 = *FirstRealEltIter - FirstRealIndex; in isWideDUPMask()
12501 // Look for the first non-undef element. in isEXTMask()
12511 return Elt != ExpectedElt++ && Elt != -1; in isEXTMask()
12519 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. in isEXTMask()
12520 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. in isEXTMask()
12526 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) in isEXTMask()
12527 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) in isEXTMask()
12533 Imm -= NumElts; in isEXTMask()
12538 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
12557 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
12576 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
12598 int LastLHSMismatch = -1, LastRHSMismatch = -1; in isINSMask()
12601 if (M[i] == -1) { in isINSMask()
12618 if (NumLHSMatch == NumInputElements - 1) { in isINSMask()
12622 } else if (NumRHSMatch == NumInputElements - 1) { in isINSMask()
12656 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask(); in tryFormConcatFromShuffle()
12679 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
12680 /// the specified operations to build the shuffle. ID is the perfect-shuffle
12689 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); in GeneratePerfectShuffle()
12690 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); in GeneratePerfectShuffle()
12720 auto getPFIDLane = [](unsigned ID, int Elt) -> int { in GeneratePerfectShuffle()
12722 Elt = 3 - Elt; in GeneratePerfectShuffle()
12725 Elt--; in GeneratePerfectShuffle()
12727 return (ID % 9 == 8) ? -1 : ID % 9; in GeneratePerfectShuffle()
12744 if (MaskElt == -1) in GeneratePerfectShuffle()
12745 MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1; in GeneratePerfectShuffle()
12747 ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2); in GeneratePerfectShuffle()
12761 ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4); in GeneratePerfectShuffle()
12793 // vrev <4 x i16> -> REV32 in GeneratePerfectShuffle()
12798 // vrev <4 x i8> -> REV16 in GeneratePerfectShuffle()
12820 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64); in GeneratePerfectShuffle()
12826 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); in GeneratePerfectShuffle()
12862 // out of range values with 0s. We do need to make sure that any out-of-range in GenerateTBL()
12863 // values are really out-of-range for a v16i8 vector. in GenerateTBL()
12877 Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen; in GenerateTBL()
12951 // Can't handle cases where vector size is not 128-bit in constructDup()
12959 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC' in constructDup()
12961 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3 in constructDup()
12962 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5 in constructDup()
12975 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3 in constructDup()
12980 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1 in constructDup()
12982 Lane -= Idx * VT.getVectorNumElements() / 2; in constructDup()
12985 // Widen the operand to 128-bit register with undef. in constructDup()
13006 if (M0 == -1 && M1 == -1) { in isWideTypeMask()
13007 NewMask.push_back(-1); in isWideTypeMask()
13011 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) { in isWideTypeMask()
13016 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) { in isWideTypeMask()
13046 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask(); in tryWidenMaskForShuffle()
13048 // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ... in tryWidenMaskForShuffle()
13075 SDValue Tbl1 = Op->getOperand(0); in tryToConvertShuffleOfTbl2ToTbl4()
13076 SDValue Tbl2 = Op->getOperand(1); in tryToConvertShuffleOfTbl2ToTbl4()
13082 if (Tbl1->getOpcode() != ISD::INTRINSIC_WO_CHAIN || in tryToConvertShuffleOfTbl2ToTbl4()
13083 Tbl1->getOperand(0) != Tbl2ID || in tryToConvertShuffleOfTbl2ToTbl4()
13084 Tbl2->getOpcode() != ISD::INTRINSIC_WO_CHAIN || in tryToConvertShuffleOfTbl2ToTbl4()
13085 Tbl2->getOperand(0) != Tbl2ID) in tryToConvertShuffleOfTbl2ToTbl4()
13088 if (Tbl1->getValueType(0) != MVT::v16i8 || in tryToConvertShuffleOfTbl2ToTbl4()
13089 Tbl2->getValueType(0) != MVT::v16i8) in tryToConvertShuffleOfTbl2ToTbl4()
13092 SDValue Mask1 = Tbl1->getOperand(3); in tryToConvertShuffleOfTbl2ToTbl4()
13093 SDValue Mask2 = Tbl2->getOperand(3); in tryToConvertShuffleOfTbl2ToTbl4()
13097 TBLMaskParts[I] = Mask1->getOperand(ShuffleMask[I]); in tryToConvertShuffleOfTbl2ToTbl4()
13100 dyn_cast<ConstantSDNode>(Mask2->getOperand(ShuffleMask[I] - 16)); in tryToConvertShuffleOfTbl2ToTbl4()
13103 TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32); in tryToConvertShuffleOfTbl2ToTbl4()
13112 {ID, Tbl1->getOperand(1), Tbl1->getOperand(2), in tryToConvertShuffleOfTbl2ToTbl4()
13113 Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask}); in tryToConvertShuffleOfTbl2ToTbl4()
13116 // Baseline legalization for ZERO_EXTEND_VECTOR_INREG will blend-in zeros,
13118 // so custom-lower it as ZIP1-with-zeros.
13129 // FIXME: support multi-step zipping? in LowerZERO_EXTEND_VECTOR_INREG()
13144 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerVECTOR_SHUFFLE()
13147 // Convert shuffles that are directly supported on NEON to target-specific in LowerVECTOR_SHUFFLE()
13151 ArrayRef<int> ShuffleMask = SVN->getMask(); in LowerVECTOR_SHUFFLE()
13163 if (SVN->isSplat()) { in LowerVECTOR_SHUFFLE()
13164 int Lane = SVN->getSplatIndex(); in LowerVECTOR_SHUFFLE()
13166 if (Lane == -1) in LowerVECTOR_SHUFFLE()
13172 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- in LowerVECTOR_SHUFFLE()
13226 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) { in LowerVECTOR_SHUFFLE()
13273 SrcLane -= NumElts; in LowerVECTOR_SHUFFLE()
13292 // the PerfectShuffle-generated table to synthesize it from other shuffles. in LowerVECTOR_SHUFFLE()
13317 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerSPLAT_VECTOR()
13352 // Current lowering only supports the SVE-ACLE types. in LowerDUPQLane()
13361 if (CIdx && (CIdx->getZExtValue() <= 3)) { in LowerDUPQLane()
13362 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); in LowerDUPQLane()
13392 EVT VT = BVN->getValueType(0); in resolveBuildVector()
13396 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { in resolveBuildVector()
13412 // Try 64-bit splatted SIMD immediate.
13433 // Try 32-bit splatted SIMD immediate.
13486 // Try 16-bit splatted SIMD immediate.
13531 // Try 32-bit splatted SIMD immediate with shifted ones.
13562 // Try 8-bit splatted SIMD immediate.
13622 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0)); in isAllConstantBuildVector()
13625 EVT VT = Bvec->getValueType(0); in isAllConstantBuildVector()
13628 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt) in isAllConstantBuildVector()
13630 ConstVal = FirstElt->getZExtValue(); in isAllConstantBuildVector()
13664 // If we're compiling for a specific vector-length, we can check if the in isAllActivePredicate()
13684 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
13685 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
13688 EVT VT = N->getValueType(0); in tryLowerToSLI()
13698 SDValue FirstOp = N->getOperand(0); in tryLowerToSLI()
13700 SDValue SecondOp = N->getOperand(1); in tryLowerToSLI()
13741 C2 = C2node->getZExtValue(); in tryLowerToSLI()
13756 C1AsAPInt = ~(C1nodeImm->getAPIntValue() << C1nodeShift->getAPIntValue()); in tryLowerToSLI()
13779 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n"); in tryLowerToSLI()
13780 LLVM_DEBUG(N->dump(&DAG)); in tryLowerToSLI()
13782 LLVM_DEBUG(ResultSLI->dump(&DAG)); in tryLowerToSLI()
13791 !Subtarget->isNeonAvailable())) in LowerVectorOR()
13831 // We can always fall back to a non-immediate OR. in LowerVectorOR()
13848 for (SDValue Lane : Op->ops()) { in NormalizeBuildVector()
13850 // operands already. Otherwise, if Op is a floating-point splat in NormalizeBuildVector()
13855 CstLane->getZExtValue()); in NormalizeBuildVector()
13857 } else if (Lane.getNode()->isUndef()) { in NormalizeBuildVector()
13909 // FNegate each sub-element of the constant in ConstantBuildVector()
13934 (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16)))) in ConstantBuildVector()
13945 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) { in LowerBUILD_VECTOR()
13946 if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) { in LowerBUILD_VECTOR()
13949 SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT); in LowerBUILD_VECTOR()
13950 SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second); in LowerBUILD_VECTOR()
13961 // Thought this might return a non-BUILD_VECTOR (e.g. CONCAT_VECTORS), if so, in LowerBUILD_VECTOR()
13971 if (BVN->isConstant()) { in LowerBUILD_VECTOR()
13972 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) { in LowerBUILD_VECTOR()
13975 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue()); in LowerBUILD_VECTOR()
13979 if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode()) in LowerBUILD_VECTOR()
13980 if (Const->isZero() && !Const->isNegative()) in LowerBUILD_VECTOR()
13991 // 3) if only one constant value is used (w/ some non-constant lanes), in LowerBUILD_VECTOR()
13993 // in the non-constant lanes. in LowerBUILD_VECTOR()
13995 // select the values we'll be overwriting for the non-constant in LowerBUILD_VECTOR()
14053 // ------------------------------------------------------------------ in LowerBUILD_VECTOR()
14068 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector in LowerBUILD_VECTOR()
14085 if (!isa<ConstantSDNode>(N->getOperand(1))) { in LowerBUILD_VECTOR()
14090 SDValue N0 = N->getOperand(0); in LowerBUILD_VECTOR()
14108 uint64_t Val = N->getConstantOperandVal(1); in LowerBUILD_VECTOR()
14113 if (Val - 1 == 2 * i) { in LowerBUILD_VECTOR()
14138 // Use DUP for non-constant splats. For f32 constant splats, reduce to in LowerBUILD_VECTOR()
14145 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"); in LowerBUILD_VECTOR()
14155 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, " in LowerBUILD_VECTOR()
14168 EltTy == MVT::f64) && "Unsupported floating-point vector type"); in LowerBUILD_VECTOR()
14185 // If we need to insert a small number of different non-constant elements and in LowerBUILD_VECTOR()
14191 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) && in LowerBUILD_VECTOR()
14195 // start by splatting that value, then replace the non-constant lanes. This in LowerBUILD_VECTOR()
14204 ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize); in LowerBUILD_VECTOR()
14213 // Now insert the non-constant lanes. in LowerBUILD_VECTOR()
14340 // worse. For a vector with one or two non-undef values, that's in LowerBUILD_VECTOR()
14360 // vector element types. After type-legalization the inserted value is in LowerBUILD_VECTOR()
14389 !Subtarget->isNeonAvailable())) in LowerCONCAT_VECTORS()
14397 unsigned NumOperands = Op->getNumOperands(); in LowerCONCAT_VECTORS()
14405 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end()); in LowerCONCAT_VECTORS()
14428 !Subtarget->isNeonAvailable())) in LowerINSERT_VECTOR_ELT()
14449 // Check for non-constant or out of range lane. in LowerINSERT_VECTOR_ELT()
14451 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) in LowerINSERT_VECTOR_ELT()
14476 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerEXTRACT_VECTOR_ELT()
14479 // Check for non-constant or out of range lane. in LowerEXTRACT_VECTOR_ELT()
14481 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements()) in LowerEXTRACT_VECTOR_ELT()
14529 // If this is extracting the upper 64-bits of a 128-bit vector, we match in LowerEXTRACT_SUBVECTOR()
14531 if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable()) in LowerEXTRACT_SUBVECTOR()
14536 useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) { in LowerEXTRACT_SUBVECTOR()
14595 DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL)); in LowerINSERT_SUBVECTOR()
14663 !isa<ConstantSDNode>(Op->getOperand(0))) in isPow2Splat()
14666 SplatVal = Op->getConstantOperandVal(0); in isPow2Splat()
14675 if (isPowerOf2_64(-SplatVal)) { in isPow2Splat()
14676 SplatVal = -SplatVal; in isPow2Splat()
14700 DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0), in LowerDIV()
14711 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit in LowerDIV()
14734 if (!Subtarget->isNeonAvailable()) in shouldExpandBuildVectorWithShuffles()
14741 if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in isShuffleMaskLegal()
14778 /// getVShiftImm - Check if this is a valid build_vector for the immediate
14789 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, in getVShiftImm()
14797 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
14806 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); in isVShiftLImm()
14809 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
14838 !Subtarget->isNeonAvailable())) in LowerTRUNCATE()
14851 if (Shift->getOpcode() != ISD::SRL) in canLowerSRLToRoundingShiftForVT()
14858 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Shift->getOperand(1))); in canLowerSRLToRoundingShiftForVT()
14862 ShiftValue = ShiftOp1->getZExtValue(); in canLowerSRLToRoundingShiftForVT()
14866 SDValue Add = Shift->getOperand(0); in canLowerSRLToRoundingShiftForVT()
14867 if (Add->getOpcode() != ISD::ADD || !Add->hasOneUse()) in canLowerSRLToRoundingShiftForVT()
14873 uint64_t ExtraBits = VT.getScalarSizeInBits() - ResVT.getScalarSizeInBits(); in canLowerSRLToRoundingShiftForVT()
14874 if (ShiftValue > ExtraBits && !Add->getFlags().hasNoUnsignedWrap()) in canLowerSRLToRoundingShiftForVT()
14878 dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(Add->getOperand(1))); in canLowerSRLToRoundingShiftForVT()
14881 uint64_t AddValue = AddOp1->getZExtValue(); in canLowerSRLToRoundingShiftForVT()
14882 if (AddValue != 1ULL << (ShiftValue - 1)) in canLowerSRLToRoundingShiftForVT()
14885 RShOperand = Add->getOperand(0); in canLowerSRLToRoundingShiftForVT()
14902 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) in LowerVectorSRA_SRL_SHL()
14915 (Subtarget->hasSVE2() || in LowerVectorSRA_SRL_SHL()
14916 (Subtarget->hasSME() && Subtarget->isStreaming()))) { in LowerVectorSRA_SRL_SHL()
14926 useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) { in LowerVectorSRA_SRL_SHL()
14937 DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags()); in LowerVectorSRA_SRL_SHL()
14971 bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef, in EmitVectorComparison()
15076 !Subtarget->isNeonAvailable())) in LowerVSETCC()
15079 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); in LowerVSETCC()
15093 // Lower isnan(x) | isnan(never-nan) to x != x. in LowerVSETCC()
15094 // Lower !isnan(x) & !isnan(never-nan) to x == x. in LowerVSETCC()
15137 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs(); in LowerVSETCC()
15188 "Expected power-of-2 length vector"); in getVectorBitwiseReduce()
15274 bool OverrideNEON = !Subtarget->isNeonAvailable() || in LowerVECREDUCE()
15283 SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) { in LowerVECREDUCE()
15350 // LSE has an atomic load-clear instruction, but not a load-and. in LowerATOMIC_LOAD_AND()
15356 RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS); in LowerATOMIC_LOAD_AND()
15357 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(), in LowerATOMIC_LOAD_AND()
15359 AN->getMemOperand()); in LowerATOMIC_LOAD_AND()
15372 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue(); in LowerWindowsDYNAMIC_STACKALLOC()
15373 EVT VT = Node->getValueType(0); in LowerWindowsDYNAMIC_STACKALLOC()
15376 "no-stack-arg-probe")) { in LowerWindowsDYNAMIC_STACKALLOC()
15382 DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); in LowerWindowsDYNAMIC_STACKALLOC()
15391 SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(), in LowerWindowsDYNAMIC_STACKALLOC()
15394 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in LowerWindowsDYNAMIC_STACKALLOC()
15395 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask(); in LowerWindowsDYNAMIC_STACKALLOC()
15396 if (Subtarget->hasCustomCallingConv()) in LowerWindowsDYNAMIC_STACKALLOC()
15397 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask); in LowerWindowsDYNAMIC_STACKALLOC()
15406 // To match the actual intent better, we should read the output from X15 here in LowerWindowsDYNAMIC_STACKALLOC()
15408 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined in LowerWindowsDYNAMIC_STACKALLOC()
15419 DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); in LowerWindowsDYNAMIC_STACKALLOC()
15437 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue(); in LowerInlineDYNAMIC_STACKALLOC()
15439 EVT VT = Node->getValueType(0); in LowerInlineDYNAMIC_STACKALLOC()
15447 DAG.getConstant(-(uint64_t)Align->value(), dl, VT)); in LowerInlineDYNAMIC_STACKALLOC()
15460 if (Subtarget->isTargetWindows()) in LowerDYNAMIC_STACKALLOC()
15470 if (Subtarget->hasSVE2()) in LowerAVG()
15495 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType()); in setInfoSVEStN()
15500 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && in setInfoSVEStN()
15504 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(), in setInfoSVEStN()
15506 Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1); in setInfoSVEStN()
15513 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
15536 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); in getTgtMemIntrinsic()
15537 Info.ptrVal = I.getArgOperand(I.arg_size() - 1); in getTgtMemIntrinsic()
15554 unsigned NumElts = StructTy->getNumElements(); in getTgtMemIntrinsic()
15555 Type *VecTy = StructTy->getElementType(0); in getTgtMemIntrinsic()
15557 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); in getTgtMemIntrinsic()
15558 Info.ptrVal = I.getArgOperand(I.arg_size() - 1); in getTgtMemIntrinsic()
15574 Type *ArgTy = Arg->getType(); in getTgtMemIntrinsic()
15575 if (!ArgTy->isVectorTy()) in getTgtMemIntrinsic()
15579 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); in getTgtMemIntrinsic()
15580 Info.ptrVal = I.getArgOperand(I.arg_size() - 1); in getTgtMemIntrinsic()
15593 Type *VecTy = I.getArgOperand(0)->getType(); in getTgtMemIntrinsic()
15597 Type *ArgTy = Arg->getType(); in getTgtMemIntrinsic()
15598 if (!ArgTy->isVectorTy()) in getTgtMemIntrinsic()
15603 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); in getTgtMemIntrinsic()
15604 Info.ptrVal = I.getArgOperand(I.arg_size() - 1); in getTgtMemIntrinsic()
15652 Type *ElTy = cast<VectorType>(I.getType())->getElementType(); in getTgtMemIntrinsic()
15663 cast<VectorType>(I.getArgOperand(0)->getType())->getElementType(); in getTgtMemIntrinsic()
15665 Info.memVT = MVT::getVT(I.getOperand(0)->getType()); in getTgtMemIntrinsic()
15676 Info.memVT = MVT::getVT(Val->getType()); in getTgtMemIntrinsic()
15707 const SDValue &Base = Mem->getBasePtr(); in shouldReduceLoadWidth()
15712 // It's unknown whether a scalable vector has a power-of-2 bitwidth. in shouldReduceLoadWidth()
15713 if (Mem->getMemoryVT().isScalableVector()) in shouldReduceLoadWidth()
15718 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8; in shouldReduceLoadWidth()
15729 if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) { in shouldRemoveRedundantExtend()
15742 // Truncations from 64-bit GPR to 32-bit GPR is free.
15744 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) in isTruncateFree()
15746 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedValue(); in isTruncateFree()
15747 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedValue(); in isTruncateFree()
15762 if (I->getOpcode() != Instruction::FMul) in isProfitableToHoist()
15765 if (!I->hasOneUse()) in isProfitableToHoist()
15768 Instruction *User = I->user_back(); in isProfitableToHoist()
15770 if (!(User->getOpcode() == Instruction::FSub || in isProfitableToHoist()
15771 User->getOpcode() == Instruction::FAdd)) in isProfitableToHoist()
15775 const Function *F = I->getFunction(); in isProfitableToHoist()
15776 const DataLayout &DL = F->getDataLayout(); in isProfitableToHoist()
15777 Type *Ty = User->getOperand(0)->getType(); in isProfitableToHoist()
15785 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
15786 // 64-bit GPR.
15788 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) in isZExtFree()
15790 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); in isZExtFree()
15791 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); in isZExtFree()
15811 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. in isZExtFree()
15822 if (Ext->getType()->isVectorTy()) in isExtFreeImpl()
15825 for (const Use &U : Ext->uses()) { in isExtFreeImpl()
15833 switch (Instr->getOpcode()) { in isExtFreeImpl()
15835 if (!isa<ConstantInt>(Instr->getOperand(1))) in isExtFreeImpl()
15840 auto &DL = Ext->getDataLayout(); in isExtFreeImpl()
15841 std::advance(GTI, U.getOperandNo()-1); in isExtFreeImpl()
15844 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0. in isExtFreeImpl()
15846 // log2(sizeof(IdxTy)) - log2(8). in isExtFreeImpl()
15847 if (IdxTy->isScalableTy()) in isExtFreeImpl()
15850 llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) - in isExtFreeImpl()
15861 if (Instr->getType() == Ext->getOperand(0)->getType()) in isExtFreeImpl()
15876 return all_equal(Shuf->getShuffleMask()); in isSplatShuffle()
15885 auto *FullTy = FullV->getType(); in areExtractShuffleVectors()
15886 auto *HalfTy = HalfV->getType(); in areExtractShuffleVectors()
15887 return FullTy->getPrimitiveSizeInBits().getFixedValue() == in areExtractShuffleVectors()
15888 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue(); in areExtractShuffleVectors()
15892 auto *FullVT = cast<FixedVectorType>(FullV->getType()); in areExtractShuffleVectors()
15893 auto *HalfVT = cast<FixedVectorType>(HalfV->getType()); in areExtractShuffleVectors()
15894 return FullVT->getNumElements() == 2 * HalfVT->getNumElements(); in areExtractShuffleVectors()
15920 int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2; in areExtractShuffleVectors()
15940 return Ext->getType()->getScalarSizeInBits() == in areExtractExts()
15941 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits(); in areExtractExts()
15959 ElementIndex->getValue() == 1 && in isOperandOfVmullHighP64()
15960 isa<FixedVectorType>(VectorOperand->getType()) && in isOperandOfVmullHighP64()
15961 cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2; in isOperandOfVmullHighP64()
15972 if (!GEP || GEP->getNumOperands() != 2) in shouldSinkVectorOfPtrs()
15975 Value *Base = GEP->getOperand(0); in shouldSinkVectorOfPtrs()
15976 Value *Offsets = GEP->getOperand(1); in shouldSinkVectorOfPtrs()
15979 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy()) in shouldSinkVectorOfPtrs()
15982 // Sink extends that would allow us to use 32-bit offset vectors. in shouldSinkVectorOfPtrs()
15985 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 && in shouldSinkVectorOfPtrs()
15986 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32) in shouldSinkVectorOfPtrs()
15987 Ops.push_back(&GEP->getOperandUse(1)); in shouldSinkVectorOfPtrs()
16002 Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0)); in shouldSinkVScale()
16007 Value *ZExtOp = cast<Instruction>(Op)->getOperand(0); in shouldSinkVScale()
16008 Ops.push_back(&cast<Instruction>(ZExtOp)->getOperandUse(0)); in shouldSinkVScale()
16009 Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0)); in shouldSinkVScale()
16021 switch (II->getIntrinsicID()) { in shouldSinkOperands()
16024 if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1), in shouldSinkOperands()
16026 Ops.push_back(&II->getOperandUse(0)); in shouldSinkOperands()
16027 Ops.push_back(&II->getOperandUse(1)); in shouldSinkOperands()
16033 if (isa<VectorType>(I->getType()) && in shouldSinkOperands()
16034 cast<VectorType>(I->getType())->getElementType()->isHalfTy() && in shouldSinkOperands()
16035 !Subtarget->hasFullFP16()) in shouldSinkOperands()
16042 if (isSplatShuffle(II->getOperand(0))) in shouldSinkOperands()
16043 Ops.push_back(&II->getOperandUse(0)); in shouldSinkOperands()
16044 if (isSplatShuffle(II->getOperand(1))) in shouldSinkOperands()
16045 Ops.push_back(&II->getOperandUse(1)); in shouldSinkOperands()
16052 if (isSplatShuffle(II->getOperand(1))) in shouldSinkOperands()
16053 Ops.push_back(&II->getOperandUse(1)); in shouldSinkOperands()
16054 if (isSplatShuffle(II->getOperand(2))) in shouldSinkOperands()
16055 Ops.push_back(&II->getOperandUse(2)); in shouldSinkOperands()
16059 if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0))) in shouldSinkOperands()
16060 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue) in shouldSinkOperands()
16061 Ops.push_back(&II->getOperandUse(0)); in shouldSinkOperands()
16067 auto *Idx = dyn_cast<Instruction>(II->getOperand(1)); in shouldSinkOperands()
16068 if (!Idx || Idx->getOpcode() != Instruction::Add) in shouldSinkOperands()
16070 Ops.push_back(&II->getOperandUse(1)); in shouldSinkOperands()
16097 auto *Idx = dyn_cast<Instruction>(II->getOperand(3)); in shouldSinkOperands()
16098 if (!Idx || Idx->getOpcode() != Instruction::Add) in shouldSinkOperands()
16100 Ops.push_back(&II->getOperandUse(3)); in shouldSinkOperands()
16104 if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) in shouldSinkOperands()
16106 Ops.push_back(&II->getOperandUse(0)); in shouldSinkOperands()
16107 Ops.push_back(&II->getOperandUse(1)); in shouldSinkOperands()
16110 if (!areOperandsOfVmullHighP64(II->getArgOperand(0), in shouldSinkOperands()
16111 II->getArgOperand(1))) in shouldSinkOperands()
16113 Ops.push_back(&II->getArgOperandUse(0)); in shouldSinkOperands()
16114 Ops.push_back(&II->getArgOperandUse(1)); in shouldSinkOperands()
16117 if (!shouldSinkVectorOfPtrs(II->getArgOperand(0), Ops)) in shouldSinkOperands()
16119 Ops.push_back(&II->getArgOperandUse(0)); in shouldSinkOperands()
16122 if (!shouldSinkVectorOfPtrs(II->getArgOperand(1), Ops)) in shouldSinkOperands()
16124 Ops.push_back(&II->getArgOperandUse(1)); in shouldSinkOperands()
16132 switch (I->getOpcode()) { in shouldSinkOperands()
16136 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) { in shouldSinkOperands()
16137 if (shouldSinkVScale(I->getOperand(Op), Ops)) { in shouldSinkOperands()
16138 Ops.push_back(&I->getOperandUse(Op)); in shouldSinkOperands()
16147 if (!I->getType()->isVectorTy()) in shouldSinkOperands()
16150 switch (I->getOpcode()) { in shouldSinkOperands()
16153 if (!areExtractExts(I->getOperand(0), I->getOperand(1))) in shouldSinkOperands()
16158 auto Ext1 = cast<Instruction>(I->getOperand(0)); in shouldSinkOperands()
16159 auto Ext2 = cast<Instruction>(I->getOperand(1)); in shouldSinkOperands()
16160 if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) { in shouldSinkOperands()
16161 Ops.push_back(&Ext1->getOperandUse(0)); in shouldSinkOperands()
16162 Ops.push_back(&Ext2->getOperandUse(0)); in shouldSinkOperands()
16165 Ops.push_back(&I->getOperandUse(0)); in shouldSinkOperands()
16166 Ops.push_back(&I->getOperandUse(1)); in shouldSinkOperands()
16171 // Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) -> in shouldSinkOperands()
16172 // bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1) in shouldSinkOperands()
16173 if (Subtarget->hasNEON()) { in shouldSinkOperands()
16182 Instruction *MainAnd = I->getOperand(0) == OtherAnd in shouldSinkOperands()
16183 ? cast<Instruction>(I->getOperand(1)) in shouldSinkOperands()
16184 : cast<Instruction>(I->getOperand(0)); in shouldSinkOperands()
16187 if (I->getParent() != MainAnd->getParent() || in shouldSinkOperands()
16188 I->getParent() != OtherAnd->getParent()) in shouldSinkOperands()
16191 // Non-mask operands of both Ands should also be in same basic block in shouldSinkOperands()
16192 if (I->getParent() != IA->getParent() || in shouldSinkOperands()
16193 I->getParent() != IB->getParent()) in shouldSinkOperands()
16196 Ops.push_back(&MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0)); in shouldSinkOperands()
16197 Ops.push_back(&I->getOperandUse(0)); in shouldSinkOperands()
16198 Ops.push_back(&I->getOperandUse(1)); in shouldSinkOperands()
16209 for (auto &Op : I->operands()) { in shouldSinkOperands()
16211 if (any_of(Ops, [&](Use *U) { return U->get() == Op; })) in shouldSinkOperands()
16228 match(Shuffle->getOperand(0), m_ZExtOrSExt(m_Value()))) { in shouldSinkOperands()
16229 Ops.push_back(&Shuffle->getOperandUse(0)); in shouldSinkOperands()
16231 if (match(Shuffle->getOperand(0), m_SExt(m_Value()))) in shouldSinkOperands()
16241 Value *ShuffleOperand = Shuffle->getOperand(0); in shouldSinkOperands()
16246 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1)); in shouldSinkOperands()
16251 dyn_cast<ConstantInt>(Insert->getOperand(2)); in shouldSinkOperands()
16253 if (!ElementConstant || !ElementConstant->isZero()) in shouldSinkOperands()
16256 unsigned Opcode = OperandInstr->getOpcode(); in shouldSinkOperands()
16264 unsigned Bitwidth = I->getType()->getScalarSizeInBits(); in shouldSinkOperands()
16266 const DataLayout &DL = I->getDataLayout(); in shouldSinkOperands()
16272 Ops.push_back(&Shuffle->getOperandUse(0)); in shouldSinkOperands()
16302 for (unsigned I = IsLittleEndian ? 0 : Factor - 1; I < MaskLen; I += Factor) in createTblShuffleMask()
16312 auto *SrcTy = cast<FixedVectorType>(Op->getType()); in createTblShuffleForZExt()
16313 unsigned NumElts = SrcTy->getNumElements(); in createTblShuffleForZExt()
16314 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth(); in createTblShuffleForZExt()
16315 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth(); in createTblShuffleForZExt()
16333 auto *SrcTy = cast<FixedVectorType>(Op->getType()); in createTblShuffleForSExt()
16334 auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth(); in createTblShuffleForSExt()
16335 auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth(); in createTblShuffleForSExt()
16338 if (!createTblShuffleMask(SrcWidth, DstWidth, SrcTy->getNumElements(), in createTblShuffleForSExt()
16351 int NumElements = cast<FixedVectorType>(TI->getType())->getNumElements(); in createTblForTrunc()
16352 auto *SrcTy = cast<FixedVectorType>(TI->getOperand(0)->getType()); in createTblForTrunc()
16353 auto *DstTy = cast<FixedVectorType>(TI->getType()); in createTblForTrunc()
16354 assert(SrcTy->getElementType()->isIntegerTy() && in createTblForTrunc()
16355 "Non-integer type source vector element is not supported"); in createTblForTrunc()
16356 assert(DstTy->getElementType()->isIntegerTy(8) && in createTblForTrunc()
16359 cast<IntegerType>(SrcTy->getElementType())->getBitWidth(); in createTblForTrunc()
16361 cast<IntegerType>(DstTy->getElementType())->getBitWidth(); in createTblForTrunc()
16373 // 0,8,16,..Y*8th bytes for the little-endian format in createTblForTrunc()
16379 : Itr * TruncFactor + (TruncFactor - 1))); in createTblForTrunc()
16402 Builder.CreateShuffleVector(TI->getOperand(0), ShuffleLanes), VecTy)); in createTblForTrunc()
16405 auto *F = Intrinsic::getDeclaration(TI->getModule(), in createTblForTrunc()
16435 auto *F = Intrinsic::getDeclaration(TI->getModule(), TblID, VecTy); in createTblForTrunc()
16463 TI->replaceAllUsesWith(FinalResult); in createTblForTrunc()
16464 TI->eraseFromParent(); in createTblForTrunc()
16471 if (!EnableExtToTBL || Subtarget->useSVEForFixedLengthVectors()) in optimizeExtendOrTruncateConversion()
16478 Function *F = I->getParent()->getParent(); in optimizeExtendOrTruncateConversion()
16479 if (!L || L->getHeader() != I->getParent() || F->hasMinSize() || in optimizeExtendOrTruncateConversion()
16480 F->hasOptSize()) in optimizeExtendOrTruncateConversion()
16483 auto *SrcTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType()); in optimizeExtendOrTruncateConversion()
16484 auto *DstTy = dyn_cast<FixedVectorType>(I->getType()); in optimizeExtendOrTruncateConversion()
16492 if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) { in optimizeExtendOrTruncateConversion()
16493 auto DstWidth = DstTy->getElementType()->getScalarSizeInBits(); in optimizeExtendOrTruncateConversion()
16499 // If the ZExt can be lowered to a single ZExt to the next power-of-2 and in optimizeExtendOrTruncateConversion()
16501 auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits(); in optimizeExtendOrTruncateConversion()
16502 if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType, in optimizeExtendOrTruncateConversion()
16505 if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits()) in optimizeExtendOrTruncateConversion()
16512 Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()), in optimizeExtendOrTruncateConversion()
16513 DstTy, Subtarget->isLittleEndian()); in optimizeExtendOrTruncateConversion()
16516 ZExt->replaceAllUsesWith(Result); in optimizeExtendOrTruncateConversion()
16517 ZExt->eraseFromParent(); in optimizeExtendOrTruncateConversion()
16522 if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) && in optimizeExtendOrTruncateConversion()
16523 DstTy->getElementType()->isFloatTy()) { in optimizeExtendOrTruncateConversion()
16526 Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy), in optimizeExtendOrTruncateConversion()
16527 FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian()); in optimizeExtendOrTruncateConversion()
16530 I->replaceAllUsesWith(UI); in optimizeExtendOrTruncateConversion()
16531 I->eraseFromParent(); in optimizeExtendOrTruncateConversion()
16536 if (SIToFP && SrcTy->getElementType()->isIntegerTy(8) && in optimizeExtendOrTruncateConversion()
16537 DstTy->getElementType()->isFloatTy()) { in optimizeExtendOrTruncateConversion()
16539 auto *Shuffle = createTblShuffleForSExt(Builder, I->getOperand(0), in optimizeExtendOrTruncateConversion()
16541 Subtarget->isLittleEndian()); in optimizeExtendOrTruncateConversion()
16546 I->replaceAllUsesWith(SI); in optimizeExtendOrTruncateConversion()
16547 I->eraseFromParent(); in optimizeExtendOrTruncateConversion()
16555 (SrcTy->getNumElements() == 8 || SrcTy->getNumElements() == 16) && in optimizeExtendOrTruncateConversion()
16556 SrcTy->getElementType()->isFloatTy() && in optimizeExtendOrTruncateConversion()
16557 DstTy->getElementType()->isIntegerTy(8)) { in optimizeExtendOrTruncateConversion()
16559 auto *WideConv = Builder.CreateFPToUI(FPToUI->getOperand(0), in optimizeExtendOrTruncateConversion()
16562 I->replaceAllUsesWith(TruncI); in optimizeExtendOrTruncateConversion()
16563 I->eraseFromParent(); in optimizeExtendOrTruncateConversion()
16564 createTblForTrunc(cast<TruncInst>(TruncI), Subtarget->isLittleEndian()); in optimizeExtendOrTruncateConversion()
16570 // per lane of the input that is represented using 1,2,3 or 4 128-bit table in optimizeExtendOrTruncateConversion()
16573 if (TI && DstTy->getElementType()->isIntegerTy(8) && in optimizeExtendOrTruncateConversion()
16574 ((SrcTy->getElementType()->isIntegerTy(32) || in optimizeExtendOrTruncateConversion()
16575 SrcTy->getElementType()->isIntegerTy(64)) && in optimizeExtendOrTruncateConversion()
16576 (SrcTy->getNumElements() == 16 || SrcTy->getNumElements() == 8))) { in optimizeExtendOrTruncateConversion()
16577 createTblForTrunc(TI, Subtarget->isLittleEndian()); in optimizeExtendOrTruncateConversion()
16600 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); in getNumInterleavedAccesses()
16601 unsigned MinElts = VecTy->getElementCount().getKnownMinValue(); in getNumInterleavedAccesses()
16603 VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); in getNumInterleavedAccesses()
16609 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor && in getTargetMMOFlags()
16617 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType()); in isLegalInterleavedAccessType()
16618 auto EC = VecTy->getElementCount(); in isLegalInterleavedAccessType()
16623 if (isa<FixedVectorType>(VecTy) && !Subtarget->isNeonAvailable() && in isLegalInterleavedAccessType()
16624 (!Subtarget->useSVEForFixedLengthVectors() || in isLegalInterleavedAccessType()
16629 !Subtarget->isSVEorStreamingSVEAvailable()) in isLegalInterleavedAccessType()
16646 if (Subtarget->useSVEForFixedLengthVectors()) { in isLegalInterleavedAccessType()
16648 std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); in isLegalInterleavedAccessType()
16651 (!Subtarget->isNeonAvailable() || VecSize > 128))) { in isLegalInterleavedAccessType()
16659 return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0); in isLegalInterleavedAccessType()
16663 if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext())) in getSVEContainerIRType()
16664 return ScalableVectorType::get(VTy->getElementType(), 2); in getSVEContainerIRType()
16666 if (VTy->getElementType() == Type::getFloatTy(VTy->getContext())) in getSVEContainerIRType()
16667 return ScalableVectorType::get(VTy->getElementType(), 4); in getSVEContainerIRType()
16669 if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext())) in getSVEContainerIRType()
16670 return ScalableVectorType::get(VTy->getElementType(), 8); in getSVEContainerIRType()
16672 if (VTy->getElementType() == Type::getHalfTy(VTy->getContext())) in getSVEContainerIRType()
16673 return ScalableVectorType::get(VTy->getElementType(), 8); in getSVEContainerIRType()
16675 if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext())) in getSVEContainerIRType()
16676 return ScalableVectorType::get(VTy->getElementType(), 2); in getSVEContainerIRType()
16678 if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext())) in getSVEContainerIRType()
16679 return ScalableVectorType::get(VTy->getElementType(), 4); in getSVEContainerIRType()
16681 if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext())) in getSVEContainerIRType()
16682 return ScalableVectorType::get(VTy->getElementType(), 8); in getSVEContainerIRType()
16684 if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext())) in getSVEContainerIRType()
16685 return ScalableVectorType::get(VTy->getElementType(), 16); in getSVEContainerIRType()
16701 return Intrinsic::getDeclaration(M, SVELoads[Factor - 2], {LDVTy}); in getStructuredLoadFunction()
16703 return Intrinsic::getDeclaration(M, NEONLoads[Factor - 2], {LDVTy, PtrTy}); in getStructuredLoadFunction()
16717 return Intrinsic::getDeclaration(M, SVEStores[Factor - 2], {STVTy}); in getStructuredStoreFunction()
16719 return Intrinsic::getDeclaration(M, NEONStores[Factor - 2], {STVTy, PtrTy}); in getStructuredStoreFunction()
16742 const DataLayout &DL = LI->getDataLayout(); in lowerInterleavedLoad()
16744 VectorType *VTy = Shuffles[0]->getType(); in lowerInterleavedLoad()
16759 Type *EltTy = FVTy->getElementType(); in lowerInterleavedLoad()
16760 if (EltTy->isPointerTy()) in lowerInterleavedLoad()
16762 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements()); in lowerInterleavedLoad()
16764 // If we're going to generate more than one load, reset the sub-vector type in lowerInterleavedLoad()
16766 FVTy = FixedVectorType::get(FVTy->getElementType(), in lowerInterleavedLoad()
16767 FVTy->getNumElements() / NumLoads); in lowerInterleavedLoad()
16775 Value *BaseAddr = LI->getPointerOperand(); in lowerInterleavedLoad()
16777 Type *PtrTy = LI->getPointerOperandType(); in lowerInterleavedLoad()
16778 Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()), in lowerInterleavedLoad()
16779 LDVTy->getElementCount()); in lowerInterleavedLoad()
16781 Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor, in lowerInterleavedLoad()
16784 // Holds sub-vectors extracted from the load intrinsic return values. The in lowerInterleavedLoad()
16785 // sub-vectors are associated with the shufflevector instructions they will in lowerInterleavedLoad()
16792 getSVEPredPatternFromNumElements(FVTy->getNumElements()); in lowerInterleavedLoad()
16793 if (Subtarget->getMinSVEVectorSizeInBits() == in lowerInterleavedLoad()
16794 Subtarget->getMaxSVEVectorSizeInBits() && in lowerInterleavedLoad()
16795 Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy)) in lowerInterleavedLoad()
16799 ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern); in lowerInterleavedLoad()
16809 BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr, in lowerInterleavedLoad()
16810 FVTy->getNumElements() * Factor); in lowerInterleavedLoad()
16818 // Extract and store the sub-vectors returned by the load intrinsic. in lowerInterleavedLoad()
16828 ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0)); in lowerInterleavedLoad()
16831 if (EltTy->isPointerTy()) in lowerInterleavedLoad()
16833 SubVec, FixedVectorType::get(SVI->getType()->getElementType(), in lowerInterleavedLoad()
16834 FVTy->getNumElements())); in lowerInterleavedLoad()
16840 // Replace uses of the shufflevector instructions with the sub-vectors in lowerInterleavedLoad()
16842 // associated with more than one sub-vector, those sub-vectors will be in lowerInterleavedLoad()
16848 SVI->replaceAllUsesWith(WideVec); in lowerInterleavedLoad()
16860 Ptr->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); in hasNearbyPairedStore()
16863 if (It->isDebugOrPseudoInst()) in hasNearbyPairedStore()
16865 if (MaxLookupDist-- == 0) in hasNearbyPairedStore()
16869 SI->getPointerOperand()->stripAndAccumulateInBoundsConstantOffsets( in hasNearbyPairedStore()
16872 (OffsetA.sextOrTrunc(IdxWidth) - OffsetB.sextOrTrunc(IdxWidth)) in hasNearbyPairedStore()
16914 auto *VecTy = cast<FixedVectorType>(SVI->getType()); in lowerInterleavedStore()
16915 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store"); in lowerInterleavedStore()
16917 unsigned LaneLen = VecTy->getNumElements() / Factor; in lowerInterleavedStore()
16918 Type *EltTy = VecTy->getElementType(); in lowerInterleavedStore()
16921 const DataLayout &DL = SI->getDataLayout(); in lowerInterleavedStore()
16932 Value *Op0 = SVI->getOperand(0); in lowerInterleavedStore()
16933 Value *Op1 = SVI->getOperand(1); in lowerInterleavedStore()
16938 if (EltTy->isPointerTy()) { in lowerInterleavedStore()
16941 cast<FixedVectorType>(Op0->getType())->getNumElements(); in lowerInterleavedStore()
16952 // and sub-vector type to something legal. in lowerInterleavedStore()
16954 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen); in lowerInterleavedStore()
16960 Value *BaseAddr = SI->getPointerOperand(); in lowerInterleavedStore()
16962 auto Mask = SVI->getShuffleMask(); in lowerInterleavedStore()
16965 // If mask is `poison`, `Mask` may be a vector of -1s. in lowerInterleavedStore()
16972 // that points to BaseAddr+16 or BaseAddr-16 then it can be better left as a in lowerInterleavedStore()
16974 if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 && in lowerInterleavedStore()
16976 hasNearbyPairedStore(SI->getIterator(), SI->getParent()->end(), BaseAddr, in lowerInterleavedStore()
16978 hasNearbyPairedStore(SI->getReverseIterator(), SI->getParent()->rend(), in lowerInterleavedStore()
16982 Type *PtrTy = SI->getPointerOperandType(); in lowerInterleavedStore()
16983 Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()), in lowerInterleavedStore()
16984 STVTy->getElementCount()); in lowerInterleavedStore()
16986 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor, in lowerInterleavedStore()
16992 getSVEPredPatternFromNumElements(SubVecTy->getNumElements()); in lowerInterleavedStore()
16993 if (Subtarget->getMinSVEVectorSizeInBits() == in lowerInterleavedStore()
16994 Subtarget->getMaxSVEVectorSizeInBits() && in lowerInterleavedStore()
16995 Subtarget->getMinSVEVectorSizeInBits() == in lowerInterleavedStore()
17000 ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern); in lowerInterleavedStore()
17021 StartMask = Mask[IdxJ] - j; in lowerInterleavedStore()
17037 ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0)); in lowerInterleavedStore()
17048 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(), in lowerInterleavedStore()
17060 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2) in lowerDeinterleaveIntrinsicToLoad()
17066 VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0)); in lowerDeinterleaveIntrinsicToLoad()
17067 const DataLayout &DL = DI->getDataLayout(); in lowerDeinterleaveIntrinsicToLoad()
17074 if (UseScalable && !VTy->isScalableTy()) in lowerDeinterleaveIntrinsicToLoad()
17080 VectorType::get(VTy->getElementType(), in lowerDeinterleaveIntrinsicToLoad()
17081 VTy->getElementCount().divideCoefficientBy(NumLoads)); in lowerDeinterleaveIntrinsicToLoad()
17083 Type *PtrTy = LI->getPointerOperandType(); in lowerDeinterleaveIntrinsicToLoad()
17084 Function *LdNFunc = getStructuredLoadFunction(DI->getModule(), Factor, in lowerDeinterleaveIntrinsicToLoad()
17092 Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue()); in lowerDeinterleaveIntrinsicToLoad()
17094 Value *BaseAddr = LI->getPointerOperand(); in lowerDeinterleaveIntrinsicToLoad()
17111 Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue()); in lowerDeinterleaveIntrinsicToLoad()
17118 Result = PoisonValue::get(DI->getType()); in lowerDeinterleaveIntrinsicToLoad()
17128 DI->replaceAllUsesWith(Result); in lowerDeinterleaveIntrinsicToLoad()
17135 if (II->getIntrinsicID() != Intrinsic::vector_interleave2) in lowerInterleaveIntrinsicToStore()
17141 VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType()); in lowerInterleaveIntrinsicToStore()
17142 const DataLayout &DL = II->getDataLayout(); in lowerInterleaveIntrinsicToStore()
17149 if (UseScalable && !VTy->isScalableTy()) in lowerInterleaveIntrinsicToStore()
17155 VectorType::get(VTy->getElementType(), in lowerInterleaveIntrinsicToStore()
17156 VTy->getElementCount().divideCoefficientBy(NumStores)); in lowerInterleaveIntrinsicToStore()
17158 Type *PtrTy = SI->getPointerOperandType(); in lowerInterleaveIntrinsicToStore()
17159 Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor, in lowerInterleaveIntrinsicToStore()
17164 Value *BaseAddr = SI->getPointerOperand(); in lowerInterleaveIntrinsicToStore()
17169 Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue()); in lowerInterleaveIntrinsicToStore()
17171 Value *L = II->getOperand(0); in lowerInterleaveIntrinsicToStore()
17172 Value *R = II->getOperand(1); in lowerInterleaveIntrinsicToStore()
17181 Builder.getInt64(I * StTy->getElementCount().getKnownMinValue()); in lowerInterleaveIntrinsicToStore()
17182 L = Builder.CreateExtractVector(StTy, II->getOperand(0), Idx); in lowerInterleaveIntrinsicToStore()
17183 R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx); in lowerInterleaveIntrinsicToStore()
17198 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; in getOptimalMemOpType()
17199 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; in getOptimalMemOpType()
17200 // Only use AdvSIMD to implement memset of 32-byte and above. It would have in getOptimalMemOpType()
17228 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat; in getOptimalMemOpLLT()
17229 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat; in getOptimalMemOpLLT()
17230 // Only use AdvSIMD to implement memset of 32-byte and above. It would have in getOptimalMemOpLLT()
17255 // 12-bit optionally shifted immediates are legal for adds.
17273 if (!Subtarget->hasSVE2()) in isLegalAddScalableImmediate()
17304 // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
17317 const int64_t C1 = C1Node->getSExtValue(); in isMulAddWithConstProfitable()
17318 const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue(); in isMulAddWithConstProfitable()
17338 /// isLegalAddressingMode - Return true if the addressing mode represented
17345 // reg + 9-bit signed offset in isLegalAddressingMode()
17346 // reg + SIZE_IN_BYTES * 12-bit unsigned offset in isLegalAddressingMode()
17377 if (Ty->isScalableTy()) { in isLegalAddressingMode()
17379 // See if we have a foldable vscale-based offset, for vector types which in isLegalAddressingMode()
17390 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8; in isLegalAddressingMode()
17398 // No scalable offsets allowed for non-scalable types. in isLegalAddressingMode()
17405 if (Ty->isSized()) { in isLegalAddressingMode()
17412 return Subtarget->getInstrInfo()->isLegalAddressingMode(NumBytes, AM.BaseOffs, in isLegalAddressingMode()
17444 return Subtarget->hasFullFP16(); in isFMAFasterThanFMulAndFAdd()
17457 switch (Ty->getScalarType()->getTypeID()) { in isFMAFasterThanFMulAndFAdd()
17474 // LR is a callee-save register, but we must treat it as clobbered by any call in getScratchRegisters()
17476 // as implicit-defs for stackmaps and patchpoints. in getScratchRegisters()
17491 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || in isDesirableToCommuteWithShift()
17492 N->getOpcode() == ISD::SRL) && in isDesirableToCommuteWithShift()
17495 SDValue ShiftLHS = N->getOperand(0); in isDesirableToCommuteWithShift()
17496 EVT VT = N->getValueType(0); in isDesirableToCommuteWithShift()
17508 if (N->getOpcode() == ISD::SHL) in isDesirableToCommuteWithShift()
17509 if (auto *SHLC = dyn_cast<ConstantSDNode>(N->getOperand(1))) in isDesirableToCommuteWithShift()
17510 return SRLC->getZExtValue() == SHLC->getZExtValue(); in isDesirableToCommuteWithShift()
17521 assert(N->getOpcode() == ISD::XOR && in isDesirableToCommuteXorWithShift()
17522 (N->getOperand(0).getOpcode() == ISD::SHL || in isDesirableToCommuteXorWithShift()
17523 N->getOperand(0).getOpcode() == ISD::SRL) && in isDesirableToCommuteXorWithShift()
17527 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1)); in isDesirableToCommuteXorWithShift()
17528 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); in isDesirableToCommuteXorWithShift()
17531 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) { in isDesirableToCommuteXorWithShift()
17532 unsigned ShiftAmt = ShiftC->getZExtValue(); in isDesirableToCommuteXorWithShift()
17533 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits(); in isDesirableToCommuteXorWithShift()
17534 if (N->getOperand(0).getOpcode() == ISD::SHL) in isDesirableToCommuteXorWithShift()
17535 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt); in isDesirableToCommuteXorWithShift()
17536 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt); in isDesirableToCommuteXorWithShift()
17545 assert(((N->getOpcode() == ISD::SHL && in shouldFoldConstantShiftPairToMask()
17546 N->getOperand(0).getOpcode() == ISD::SRL) || in shouldFoldConstantShiftPairToMask()
17547 (N->getOpcode() == ISD::SRL && in shouldFoldConstantShiftPairToMask()
17548 N->getOperand(0).getOpcode() == ISD::SHL)) && in shouldFoldConstantShiftPairToMask()
17549 "Expected shift-shift mask"); in shouldFoldConstantShiftPairToMask()
17551 if (!N->getOperand(0)->hasOneUse()) in shouldFoldConstantShiftPairToMask()
17555 EVT VT = N->getValueType(0); in shouldFoldConstantShiftPairToMask()
17556 if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 || VT == MVT::i64)) { in shouldFoldConstantShiftPairToMask()
17557 auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)); in shouldFoldConstantShiftPairToMask()
17558 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); in shouldFoldConstantShiftPairToMask()
17559 return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue()); in shouldFoldConstantShiftPairToMask()
17572 assert(Ty->isIntegerTy()); in shouldConvertConstantLoadToIntImm()
17574 unsigned BitSize = Ty->getPrimitiveSizeInBits(); in shouldConvertConstantLoadToIntImm()
17585 Val &= (1LL << 32) - 1; in shouldConvertConstantLoadToIntImm()
17601 /// xor (sra X, elt_size(X)-1), -1
17606 EVT VT = N->getValueType(0); in foldVectorXorShiftIntoCmp()
17607 if (!Subtarget->hasNEON() || !VT.isVector()) in foldVectorXorShiftIntoCmp()
17612 SDValue Shift = N->getOperand(0); in foldVectorXorShiftIntoCmp()
17613 SDValue Ones = N->getOperand(1); in foldVectorXorShiftIntoCmp()
17621 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1) in foldVectorXorShiftIntoCmp()
17645 if (N->getValueType(0) != MVT::i32) in performVecReduceAddCombineWithUADDLP()
17648 SDValue VecReduceOp0 = N->getOperand(0); in performVecReduceAddCombineWithUADDLP()
17651 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32) in performVecReduceAddCombineWithUADDLP()
17656 if (ABS->getOperand(0)->getOpcode() != ISD::SUB || in performVecReduceAddCombineWithUADDLP()
17657 ABS->getOperand(0)->getValueType(0) != MVT::v16i32) in performVecReduceAddCombineWithUADDLP()
17660 SDValue SUB = ABS->getOperand(0); in performVecReduceAddCombineWithUADDLP()
17661 unsigned Opcode0 = SUB->getOperand(0).getOpcode(); in performVecReduceAddCombineWithUADDLP()
17662 unsigned Opcode1 = SUB->getOperand(1).getOpcode(); in performVecReduceAddCombineWithUADDLP()
17664 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 || in performVecReduceAddCombineWithUADDLP()
17665 SUB->getOperand(1)->getValueType(0) != MVT::v16i32) in performVecReduceAddCombineWithUADDLP()
17677 SDValue EXT0 = SUB->getOperand(0); in performVecReduceAddCombineWithUADDLP()
17678 SDValue EXT1 = SUB->getOperand(1); in performVecReduceAddCombineWithUADDLP()
17680 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 || in performVecReduceAddCombineWithUADDLP()
17681 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8) in performVecReduceAddCombineWithUADDLP()
17689 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0), in performVecReduceAddCombineWithUADDLP()
17692 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0), in performVecReduceAddCombineWithUADDLP()
17700 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0), in performVecReduceAddCombineWithUADDLP()
17703 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0), in performVecReduceAddCombineWithUADDLP()
17725 if (!ST->isNeonAvailable()) in performVecReduceAddCombine()
17728 if (!ST->hasDotProd()) in performVecReduceAddCombine()
17731 SDValue Op0 = N->getOperand(0); in performVecReduceAddCombine()
17732 if (N->getValueType(0) != MVT::i32 || Op0.getValueType().isScalableVT() || in performVecReduceAddCombine()
17757 // For non-mla reductions B can be set to 1. For MLA we take the operand of in performVecReduceAddCombine()
17781 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); in performVecReduceAddCombine()
17804 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16); in performVecReduceAddCombine()
17820 DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot); in performVecReduceAddCombine()
17821 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16, in performVecReduceAddCombine()
17876 // We can convert a UADDV(add(zext(64-bit source), zext(64-bit source))) into
17877 // UADDLV(concat), where the concat represents the 64-bit zext sources.
17879 // Look for add(zext(64-bit source), zext(64-bit source)), returning in performUADDVZextCombine()
17893 // Check zext VTs are the same and 64-bit length. in performUADDVZextCombine()
17917 SDValue A = N->getOperand(0); in performUADDVCombine()
17920 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R); in performUADDVCombine()
17941 if (isIntDivCheap(N->getValueType(0), Attr)) in BuildSDIVPow2()
17944 EVT VT = N->getValueType(0); in BuildSDIVPow2()
17949 (VT.isFixedLengthVector() && Subtarget->useSVEForFixedLengthVectors())) in BuildSDIVPow2()
17957 // If the divisor is 2 or -2, the default expansion is better. It will add in BuildSDIVPow2()
17958 // (N->getValueType(0) >> (BitWidth - 1)) to it before shifting right. in BuildSDIVPow2()
17960 Divisor == APInt(Divisor.getBitWidth(), -2, /*isSigned*/ true)) in BuildSDIVPow2()
17971 if (isIntDivCheap(N->getValueType(0), Attr)) in BuildSREMPow2()
17974 EVT VT = N->getValueType(0); in BuildSREMPow2()
17978 if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors()) in BuildSREMPow2()
17991 SDValue N0 = N->getOperand(0); in BuildSREMPow2()
17992 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); in BuildSREMPow2()
18036 /// Calculates what the pre-extend type is, based on the extension
18040 /// pre-extend type is pulled directly from the operand, while other extend
18058 return TypeNode->getVT(); in calculatePreExtendType()
18066 uint32_t Mask = Constant->getZExtValue(); in calculatePreExtendType()
18093 SDValue Extend = BV->getOperand(0); in performBuildShuffleExtendCombine()
18107 // Restrict valid pre-extend data type in performBuildShuffleExtendCombine()
18114 for (SDValue Op : drop_begin(BV->ops())) { in performBuildShuffleExtendCombine()
18131 for (SDValue Op : BV->ops()) in performBuildShuffleExtendCombine()
18142 cast<ShuffleVectorSDNode>(BV)->getMask()); in performBuildShuffleExtendCombine()
18151 EVT VT = Mul->getValueType(0); in performMulVectorExtendCombine()
18155 SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG); in performMulVectorExtendCombine()
18156 SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG); in performMulVectorExtendCombine()
18163 return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0), in performMulVectorExtendCombine()
18164 Op1 ? Op1 : Mul->getOperand(1)); in performMulVectorExtendCombine()
18167 // Combine v4i32 Mul(And(Srl(X, 15), 0x10001), 0xffff) -> v8i16 CMLTz
18170 EVT VT = N->getValueType(0); in performMulVectorCmpZeroCombine()
18174 if (N->getOperand(0).getOpcode() != ISD::AND || in performMulVectorCmpZeroCombine()
18175 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL) in performMulVectorCmpZeroCombine()
18178 SDValue And = N->getOperand(0); in performMulVectorCmpZeroCombine()
18182 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) || in performMulVectorCmpZeroCombine()
18189 V3 != (HalfSize - 1)) in performMulVectorCmpZeroCombine()
18207 EVT VT = N->getValueType(0); in performVectorExtCombine()
18209 (N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && in performVectorExtCombine()
18210 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) || in performVectorExtCombine()
18211 (N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && in performVectorExtCombine()
18212 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) || in performVectorExtCombine()
18213 N->getOperand(0).getOperand(0).getValueType() != in performVectorExtCombine()
18214 N->getOperand(1).getOperand(0).getValueType()) in performVectorExtCombine()
18217 if (N->getOpcode() == ISD::MUL && in performVectorExtCombine()
18218 N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode()) in performVectorExtCombine()
18221 SDValue N0 = N->getOperand(0).getOperand(0); in performVectorExtCombine()
18222 SDValue N1 = N->getOperand(1).getOperand(0); in performVectorExtCombine()
18233 SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0); in performVectorExtCombine()
18234 SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1); in performVectorExtCombine()
18235 SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1); in performVectorExtCombine()
18236 return DAG.getNode(N->getOpcode() == ISD::MUL ? N->getOperand(0).getOpcode() in performVectorExtCombine()
18257 // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y, in performMulCombine()
18259 // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X. in performMulCombine()
18261 EVT VT = N->getValueType(0); in performMulCombine()
18262 SDValue N0 = N->getOperand(0); in performMulCombine()
18263 SDValue N1 = N->getOperand(1); in performMulCombine()
18267 auto IsAddSubWith1 = [&](SDValue V) -> bool { in performMulCombine()
18268 AddSubOpc = V->getOpcode(); in performMulCombine()
18269 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { in performMulCombine()
18270 SDValue Opnd = V->getOperand(1); in performMulCombine()
18271 MulOper = V->getOperand(0); in performMulCombine()
18275 return C->isOne(); in performMulCombine()
18295 const APInt &ConstValue = C->getAPIntValue(); in performMulCombine()
18300 (N0->getOpcode() == ISD::TRUNCATE && in performMulCombine()
18301 (IsSVECntIntrinsic(N0->getOperand(0))))) in performMulCombine()
18308 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and in performMulCombine()
18309 // 64-bit is 5 cycles, so this is always a win. in performMulCombine()
18321 if (N0->hasOneUse() && (isSignExtended(N0, DAG) || in performMulCombine()
18326 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD || in performMulCombine()
18327 N->use_begin()->getOpcode() == ISD::SUB)) in performMulCombine()
18362 // C = 45 is equal to (1+4)*(1+8), we don't decompose it into (1+2)*(16-1) as in performMulCombine()
18363 // the (2^N - 1) can't be execused via a single instruction. in performMulCombine()
18370 APInt NVMinus1 = N - 1; in performMulCombine()
18380 // C = 11 is equal to (1+4)*2+1, we don't decompose it into (1+2)*4-1 as in performMulCombine()
18381 // the (2^N - 1) can't be execused via a single instruction. in performMulCombine()
18383 APInt CVMinus1 = C - 1; in performMulCombine()
18387 APInt SCVMinus1 = CVMinus1.ashr(TrailingZeroes) - 1; in performMulCombine()
18397 // Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg: in performMulCombine()
18398 // C = 29 is equal to 1 - (1 - 2^3) * 2^2. in performMulCombine()
18400 APInt CVMinus1 = C - 1; in performMulCombine()
18416 // (mul x, 2^N - 1) => (sub (shl x, N), x) in performMulCombine()
18417 // (mul x, (2^(N-M) - 1) * 2^M) => (sub (shl x, N), (shl x, M)) in performMulCombine()
18422 // (mul x, 1 - (1 - 2^M) * 2^N)) in performMulCombine()
18423 // => MV = sub (x - (shl x, M)); sub (x - (shl MV, N)) in performMulCombine()
18424 APInt SCVMinus1 = ShiftedConstValue - 1; in performMulCombine()
18438 if (Subtarget->hasALULSLFast() && in performMulCombine()
18440 APInt CVMMinus1 = CVM - 1; in performMulCombine()
18441 APInt CVNMinus1 = CVN - 1; in performMulCombine()
18450 if (Subtarget->hasALULSLFast() && in performMulCombine()
18461 if (Subtarget->hasALULSLFast() && in performMulCombine()
18472 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) in performMulCombine()
18473 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) in performMulCombine()
18474 // (mul x, -(2^(N-M) - 1) * 2^M) => (sub (shl x, M), (shl x, N)) in performMulCombine()
18475 APInt SCVPlus1 = -ShiftedConstValue + 1; in performMulCombine()
18476 APInt CVNegPlus1 = -ConstValue + 1; in performMulCombine()
18477 APInt CVNegMinus1 = -ConstValue - 1; in performMulCombine()
18495 // Take advantage of vector comparisons producing 0 or -1 in each lane to in performVectorCompareAndMaskUnaryOpCombine()
18499 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> in performVectorCompareAndMaskUnaryOpCombine()
18506 EVT VT = N->getValueType(0); in performVectorCompareAndMaskUnaryOpCombine()
18507 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || in performVectorCompareAndMaskUnaryOpCombine()
18508 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || in performVectorCompareAndMaskUnaryOpCombine()
18509 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits()) in performVectorCompareAndMaskUnaryOpCombine()
18513 // make the transformation for non-constant splats as well, but it's unclear in performVectorCompareAndMaskUnaryOpCombine()
18517 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) { in performVectorCompareAndMaskUnaryOpCombine()
18519 if (!BV->isConstant()) in performVectorCompareAndMaskUnaryOpCombine()
18524 EVT IntVT = BV->getValueType(0); in performVectorCompareAndMaskUnaryOpCombine()
18527 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); in performVectorCompareAndMaskUnaryOpCombine()
18531 N->getOperand(0)->getOperand(0), MaskConst); in performVectorCompareAndMaskUnaryOpCombine()
18546 EVT VT = N->getValueType(0); in performIntToFpCombine()
18551 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) in performIntToFpCombine()
18554 // If the result of an integer load is only used by an integer-to-float in performIntToFpCombine()
18556 // This eliminates an "integer-to-vector-move" UOP and improves throughput. in performIntToFpCombine()
18557 SDValue N0 = N->getOperand(0); in performIntToFpCombine()
18558 if (Subtarget->isNeonAvailable() && ISD::isNormalLoad(N0.getNode()) && in performIntToFpCombine()
18561 !cast<LoadSDNode>(N0)->isVolatile()) { in performIntToFpCombine()
18563 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), in performIntToFpCombine()
18564 LN0->getPointerInfo(), LN0->getAlign(), in performIntToFpCombine()
18565 LN0->getMemOperand()->getFlags()); in performIntToFpCombine()
18572 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; in performIntToFpCombine()
18579 /// Fold a floating-point multiply by power of two into floating-point to
18580 /// fixed-point conversion.
18584 if (!Subtarget->isNeonAvailable()) in performFpToIntCombine()
18587 if (!N->getValueType(0).isSimple()) in performFpToIntCombine()
18590 SDValue Op = N->getOperand(0); in performFpToIntCombine()
18597 SDValue ConstVec = Op->getOperand(1); in performFpToIntCombine()
18604 (FloatBits != 16 || !Subtarget->hasFullFP16())) in performFpToIntCombine()
18607 MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); in performFpToIntCombine()
18612 // Avoid conversions where iN is larger than the float (e.g., float -> i64). in performFpToIntCombine()
18619 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1); in performFpToIntCombine()
18620 if (C == -1 || C == 0 || C > Bits) in performFpToIntCombine()
18627 if (N->getOpcode() == ISD::FP_TO_SINT_SAT || in performFpToIntCombine()
18628 N->getOpcode() == ISD::FP_TO_UINT_SAT) { in performFpToIntCombine()
18629 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT(); in performFpToIntCombine()
18635 bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT || in performFpToIntCombine()
18636 N->getOpcode() == ISD::FP_TO_SINT_SAT); in performFpToIntCombine()
18642 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32)); in performFpToIntCombine()
18645 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv); in performFpToIntCombine()
18652 EVT VT = N->getValueType(0); in tryCombineToBSL()
18667 SDValue N0 = N->getOperand(0); in tryCombineToBSL()
18671 SDValue N1 = N->getOperand(1); in tryCombineToBSL()
18675 // InstCombine does (not (neg a)) => (add a -1). in tryCombineToBSL()
18676 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c) in tryCombineToBSL()
18678 for (int i = 1; i >= 0; --i) { in tryCombineToBSL()
18679 for (int j = 1; j >= 0; --j) { in tryCombineToBSL()
18680 SDValue O0 = N0->getOperand(i); in tryCombineToBSL()
18681 SDValue O1 = N1->getOperand(j); in tryCombineToBSL()
18688 SubSibling = N0->getOperand(1 - i); in tryCombineToBSL()
18689 AddSibling = N1->getOperand(1 - j); in tryCombineToBSL()
18693 AddSibling = N0->getOperand(1 - i); in tryCombineToBSL()
18694 SubSibling = N1->getOperand(1 - j); in tryCombineToBSL()
18716 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1); in tryCombineToBSL()
18717 for (int i = 1; i >= 0; --i) in tryCombineToBSL()
18718 for (int j = 1; j >= 0; --j) { in tryCombineToBSL()
18721 if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) && in tryCombineToBSL()
18722 ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) && in tryCombineToBSL()
18724 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i), in tryCombineToBSL()
18725 N0->getOperand(1 - i), N1->getOperand(1 - j)); in tryCombineToBSL()
18727 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i)); in tryCombineToBSL()
18728 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j)); in tryCombineToBSL()
18734 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k)); in tryCombineToBSL()
18735 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k)); in tryCombineToBSL()
18737 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) { in tryCombineToBSL()
18743 return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i), in tryCombineToBSL()
18744 N0->getOperand(1 - i), N1->getOperand(1 - j)); in tryCombineToBSL()
18761 EVT VT = N->getValueType(0); in performANDORCSELCombine()
18762 SDValue CSel0 = N->getOperand(0); in performANDORCSELCombine()
18763 SDValue CSel1 = N->getOperand(1); in performANDORCSELCombine()
18769 if (!CSel0->hasOneUse() || !CSel1->hasOneUse()) in performANDORCSELCombine()
18782 if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse()) in performANDORCSELCombine()
18797 if (N->getOpcode() == ISD::AND) { in performANDORCSELCombine()
18810 if (Op1 && Op1->getAPIntValue().isNegative() && in performANDORCSELCombine()
18811 Op1->getAPIntValue().sgt(-32)) { in performANDORCSELCombine()
18813 // if the Op1 is a constant in the range [-31, -1], we in performANDORCSELCombine()
18816 DAG.getConstant(Op1->getAPIntValue().abs(), DL, Op1->getValueType(0)); in performANDORCSELCombine()
18832 EVT VT = N->getValueType(0); in performORCombine()
18866 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR) in isConstantSplatVectorMaskForType()
18867 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) in isConstantSplatVectorMaskForType()
18868 return Op0->getAPIntValue().getLimitedValue() == MaskForTy; in isConstantSplatVectorMaskForType()
18875 SDValue Op = N->getOperand(0); in performReinterpretCastCombine()
18878 Op = Op->getOperand(0); in performReinterpretCastCombine()
18887 SDValue Src = N->getOperand(0); in performSVEAndCombine()
18888 unsigned Opc = Src->getOpcode(); in performSVEAndCombine()
18892 SDValue UnpkOp = Src->getOperand(0); in performSVEAndCombine()
18893 SDValue Dup = N->getOperand(1); in performSVEAndCombine()
18899 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0)); in performSVEAndCombine()
18903 uint64_t ExtVal = C->getZExtValue(); in performSVEAndCombine()
18905 auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool { in performSVEAndCombine()
18913 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType(); in performSVEAndCombine()
18917 // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check in performSVEAndCombine()
18918 // to see if the mask is all-ones of size MemTy. in performSVEAndCombine()
18920 if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD || in performSVEAndCombine()
18921 MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) { in performSVEAndCombine()
18922 EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType(); in performSVEAndCombine()
18928 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits()); in performSVEAndCombine()
18932 Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0), in performSVEAndCombine()
18936 UnpkOp->getValueType(0), UnpkOp, Dup); in performSVEAndCombine()
18938 return DAG.getNode(Opc, DL, N->getValueType(0), And); in performSVEAndCombine()
18946 if (isAllActivePredicate(DAG, N->getOperand(0))) in performSVEAndCombine()
18947 return N->getOperand(1); in performSVEAndCombine()
18948 if (isAllActivePredicate(DAG, N->getOperand(1))) in performSVEAndCombine()
18949 return N->getOperand(0); in performSVEAndCombine()
18954 SDValue Mask = N->getOperand(1); in performSVEAndCombine()
18961 // SVE load instructions perform an implicit zero-extend, which makes them in performSVEAndCombine()
18967 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT(); in performSVEAndCombine()
18984 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT(); in performSVEAndCombine()
19003 SDValue SetCC = N->getOperand(0); in performANDSETCCCombine()
19004 EVT VT = N->getValueType(0); in performANDSETCCCombine()
19010 for (auto U : N->uses()) in performANDSETCCCombine()
19011 if (U->getOpcode() == ISD::SELECT) in performANDSETCCCombine()
19014 // Check if the operand is a SETCC node with floating-point comparison in performANDSETCCCombine()
19039 SDValue LHS = N->getOperand(0); in performANDCombine()
19040 SDValue RHS = N->getOperand(1); in performANDCombine()
19041 EVT VT = N->getValueType(0); in performANDCombine()
19102 SDValue LHS = N->getOperand(0); in performFADDCombine()
19103 SDValue RHS = N->getOperand(1); in performFADDCombine()
19104 EVT VT = N->getValueType(0); in performFADDCombine()
19107 if (!N->getFlags().hasAllowReassociation()) in performFADDCombine()
19110 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c) in performFADDCombine()
19122 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()), in performFADDCombine()
19124 VCMLA->setFlags(A->getFlags()); in performFADDCombine()
19174 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); in performFirstTrueTestVectorCombine()
19176 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize()) in performFirstTrueTestVectorCombine()
19179 SDValue N0 = N->getOperand(0); in performFirstTrueTestVectorCombine()
19183 !isNullConstant(N->getOperand(1))) in performFirstTrueTestVectorCombine()
19187 // flag-setting operation. in performFirstTrueTestVectorCombine()
19194 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE); in performFirstTrueTestVectorCombine()
19197 // Materialize : Idx = (add (mul vscale, NumEls), -1)
19204 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); in performLastTrueTestVectorCombine()
19206 if (!Subtarget->hasSVE() || DCI.isBeforeLegalize()) in performLastTrueTestVectorCombine()
19209 SDValue N0 = N->getOperand(0); in performLastTrueTestVectorCombine()
19215 // Idx == (add (mul vscale, NumEls), -1) in performLastTrueTestVectorCombine()
19216 SDValue Idx = N->getOperand(1); in performLastTrueTestVectorCombine()
19228 // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0 in performLastTrueTestVectorCombine()
19231 return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE); in performLastTrueTestVectorCombine()
19237 assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT); in performExtractVectorEltCombine()
19244 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); in performExtractVectorEltCombine()
19246 EVT VT = N->getValueType(0); in performExtractVectorEltCombine()
19248 bool IsStrict = N0->isStrictFPOpcode(); in performExtractVectorEltCombine()
19250 // extract(dup x) -> x in performExtractVectorEltCombine()
19259 // -> in performExtractVectorEltCombine()
19264 if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) && in performExtractVectorEltCombine()
19267 SDValue N00 = N0->getOperand(IsStrict ? 1 : 0); in performExtractVectorEltCombine()
19268 SDValue N01 = N0->getOperand(IsStrict ? 2 : 1); in performExtractVectorEltCombine()
19279 if (Shuffle && Shuffle->getMaskElt(0) == 1 && in performExtractVectorEltCombine()
19280 Other == Shuffle->getOperand(0)) { in performExtractVectorEltCombine()
19286 return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2); in performExtractVectorEltCombine()
19289 // with the strict_fadd, but we also need uses of the chain output of the in performExtractVectorEltCombine()
19290 // original strict_fadd to use the chain output of the new strict_fadd as in performExtractVectorEltCombine()
19292 SDValue Ret = DAG.getNode(N0->getOpcode(), DL, in performExtractVectorEltCombine()
19294 {N0->getOperand(0), Extract1, Extract2}); in performExtractVectorEltCombine()
19308 EVT VT = N->getValueType(0); in performConcatVectorsCombine()
19309 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); in performConcatVectorsCombine()
19310 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode(); in performConcatVectorsCombine()
19319 // -> in performConcatVectorsCombine()
19323 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed in performConcatVectorsCombine()
19325 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8. in performConcatVectorsCombine()
19326 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE && in performConcatVectorsCombine()
19328 SDValue N00 = N0->getOperand(0); in performConcatVectorsCombine()
19329 SDValue N10 = N1->getOperand(0); in performConcatVectorsCombine()
19347 if (N->getOperand(0).getValueType() == MVT::v4i8 || in performConcatVectorsCombine()
19348 N->getOperand(0).getValueType() == MVT::v2i16 || in performConcatVectorsCombine()
19349 N->getOperand(0).getValueType() == MVT::v2i8) { in performConcatVectorsCombine()
19350 EVT SrcVT = N->getOperand(0).getValueType(); in performConcatVectorsCombine()
19354 if (N->getNumOperands() % 2 == 0 && in performConcatVectorsCombine()
19355 all_of(N->op_values(), [SrcVT](SDValue V) { in performConcatVectorsCombine()
19361 return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() && in performConcatVectorsCombine()
19362 LD->getExtensionType() == ISD::NON_EXTLOAD; in performConcatVectorsCombine()
19365 EVT NVT = EVT::getVectorVT(*DAG.getContext(), FVT, N->getNumOperands()); in performConcatVectorsCombine()
19368 for (unsigned i = 0; i < N->getNumOperands(); i++) { in performConcatVectorsCombine()
19369 SDValue V = N->getOperand(i); in performConcatVectorsCombine()
19374 SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(), in performConcatVectorsCombine()
19375 LD->getBasePtr(), LD->getMemOperand()); in performConcatVectorsCombine()
19380 return DAG.getBitcast(N->getValueType(0), in performConcatVectorsCombine()
19390 // -> in performConcatVectorsCombine()
19393 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE && in performConcatVectorsCombine()
19394 N1Opc == ISD::TRUNCATE && N->isOnlyUserOf(N0.getNode()) && in performConcatVectorsCombine()
19395 N->isOnlyUserOf(N1.getNode())) { in performConcatVectorsCombine()
19397 return V->getOpcode() == ISD::XOR && in performConcatVectorsCombine()
19400 SDValue N00 = N0->getOperand(0); in performConcatVectorsCombine()
19401 SDValue N10 = N1->getOperand(0); in performConcatVectorsCombine()
19402 if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) && in performConcatVectorsCombine()
19403 isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) { in performConcatVectorsCombine()
19408 N00->getOperand(0)), in performConcatVectorsCombine()
19410 N10->getOperand(0))), in performConcatVectorsCombine()
19420 // Optimise concat_vectors of two identical binops with a 128-bit destination in performConcatVectorsCombine()
19422 // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d)) in performConcatVectorsCombine()
19423 if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() && in performConcatVectorsCombine()
19424 DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() && in performConcatVectorsCombine()
19425 N1->hasOneUse()) { in performConcatVectorsCombine()
19426 SDValue N00 = N0->getOperand(0); in performConcatVectorsCombine()
19427 SDValue N01 = N0->getOperand(1); in performConcatVectorsCombine()
19428 SDValue N10 = N1->getOperand(0); in performConcatVectorsCombine()
19429 SDValue N11 = N1->getOperand(1); in performConcatVectorsCombine()
19459 if (Imm != 1ULL << (ShtAmt - 1)) in performConcatVectorsCombine()
19464 // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y)) in performConcatVectorsCombine()
19465 if (N->getNumOperands() == 2 && IsRSHRN(N0) && in performConcatVectorsCombine()
19477 DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT)); in performConcatVectorsCombine()
19484 if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 && in performConcatVectorsCombine()
19497 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) { in performConcatVectorsCombine()
19503 // Canonicalise concat_vectors so that the right-hand vector has as few in performConcatVectorsCombine()
19504 // bit-casts as possible before its real operation. The primary matching in performConcatVectorsCombine()
19506 // which depend on the operation being performed on this right-hand vector. in performConcatVectorsCombine()
19512 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST) in performConcatVectorsCombine()
19514 SDValue RHS = N1->getOperand(0); in performConcatVectorsCombine()
19521 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"); in performConcatVectorsCombine()
19537 EVT VT = N->getValueType(0); in performExtractSubvectorCombine()
19541 SDValue V = N->getOperand(0); in performExtractSubvectorCombine()
19544 // blocks this combine because the non-const case requires custom lowering. in performExtractSubvectorCombine()
19546 // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) in performExtractSubvectorCombine()
19558 SDValue Vec = N->getOperand(0); in performInsertSubvectorCombine()
19559 SDValue SubVec = N->getOperand(1); in performInsertSubvectorCombine()
19560 uint64_t IdxVal = N->getConstantOperandVal(2); in performInsertSubvectorCombine()
19580 // Fold insert_subvector -> concat_vectors in performInsertSubvectorCombine()
19581 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi)) in performInsertSubvectorCombine()
19582 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub) in performInsertSubvectorCombine()
19609 // register allocator to avoid cross-class register copies that aren't in tryCombineFixedPointConvert()
19613 SDValue Op1 = N->getOperand(1); in tryCombineFixedPointConvert()
19618 SDValue IID = N->getOperand(0); in tryCombineFixedPointConvert()
19619 SDValue Shift = N->getOperand(2); in tryCombineFixedPointConvert()
19622 EVT ResTy = N->getValueType(0); in tryCombineFixedPointConvert()
19644 // AArch64 high-vector "long" operations are formed by performing the non-high
19653 // (dupv64 scalar) --> (extract_high (dup128 scalar))
19657 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
19693 N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops()); in tryExtendDUPToExtractHigh()
19749 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); in isSetCC()
19755 // - csel 1, 0, cc in isSetCC()
19756 // - csel 0, 1, !cc in isSetCC()
19777 if (!TValue->isOne()) { in isSetCC()
19783 return TValue->isOne() && FValue->isZero(); in isSetCC()
19791 isSetCC(Op->getOperand(0), Info)); in isSetCCOrZExtSetCC()
19796 // -->
19801 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); in performSetccAddFolding()
19802 SDValue LHS = Op->getOperand(0); in performSetccAddFolding()
19803 SDValue RHS = Op->getOperand(1); in performSetccAddFolding()
19822 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType() in performSetccAddFolding()
19823 : InfoAndKind.Info.Generic.Opnd0->getValueType(); in performSetccAddFolding()
19841 EVT VT = Op->getValueType(0); in performSetccAddFolding()
19846 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
19848 EVT VT = N->getValueType(0); in performAddUADDVCombine()
19850 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger()) in performAddUADDVCombine()
19853 SDValue LHS = N->getOperand(0); in performAddUADDVCombine()
19854 SDValue RHS = N->getOperand(1); in performAddUADDVCombine()
19859 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); in performAddUADDVCombine()
19860 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1)); in performAddUADDVCombine()
19861 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero()) in performAddUADDVCombine()
19864 SDValue Op1 = LHS->getOperand(0); in performAddUADDVCombine()
19865 SDValue Op2 = RHS->getOperand(0); in performAddUADDVCombine()
19875 EVT ValVT = Val1->getValueType(0); in performAddUADDVCombine()
19885 /// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
19887 EVT VT = N->getValueType(0); in performAddCSelIntoCSinc()
19888 if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD) in performAddCSelIntoCSinc()
19891 SDValue LHS = N->getOperand(0); in performAddCSelIntoCSinc()
19892 SDValue RHS = N->getOperand(1); in performAddCSelIntoCSinc()
19918 (CTVal->isOne() || CFVal->isOne())) && in performAddCSelIntoCSinc()
19920 (CTVal->isOne() || CFVal->isAllOnes()))) in performAddCSelIntoCSinc()
19924 if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() && in performAddCSelIntoCSinc()
19925 !CFVal->isOne()) { in performAddCSelIntoCSinc()
19931 // Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc) in performAddCSelIntoCSinc()
19932 if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() && in performAddCSelIntoCSinc()
19933 !CFVal->isAllOnes()) { in performAddCSelIntoCSinc()
19934 APInt C = -1 * CFVal->getAPIntValue(); in performAddCSelIntoCSinc()
19942 APInt ADDC = CTVal->getAPIntValue(); in performAddCSelIntoCSinc()
19947 assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) || in performAddCSelIntoCSinc()
19948 (LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) && in performAddCSelIntoCSinc()
19958 // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
19960 EVT VT = N->getValueType(0); in performAddDotCombine()
19961 if (N->getOpcode() != ISD::ADD) in performAddDotCombine()
19964 SDValue Dot = N->getOperand(0); in performAddDotCombine()
19965 SDValue A = N->getOperand(1); in performAddDotCombine()
19994 // (neg (csel X, Y)) -> (csel (neg X), (neg Y))
20003 SDValue CSel = N->getOperand(1); in performNegCSelCombine()
20004 if (CSel.getOpcode() != AArch64ISD::CSEL || !CSel->hasOneUse()) in performNegCSelCombine()
20025 // which act on the high-half of their inputs. They are normally matched by
20030 // -> uaddl2 vD, vN, vM
20041 MVT VT = N->getSimpleValueType(0); in performAddSubLongCombine()
20043 if (N->getOpcode() == ISD::ADD) in performAddSubLongCombine()
20049 SDValue LHS = N->getOperand(0); in performAddSubLongCombine()
20050 SDValue RHS = N->getOperand(1); in performAddSubLongCombine()
20074 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); in performAddSubLongCombine()
20079 !Op.getNode()->hasAnyUseOfValue(0); in isCMP()
20103 SDValue CmpOp = Op->getOperand(2); in foldOverflowCheck()
20115 SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1); in foldOverflowCheck()
20120 return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(), in foldOverflowCheck()
20121 Op->getOperand(0), Op->getOperand(1), in foldOverflowCheck()
20127 SDValue LHS = N->getOperand(0); in foldADCToCINC()
20128 SDValue RHS = N->getOperand(1); in foldADCToCINC()
20129 SDValue Cond = N->getOperand(2); in foldADCToCINC()
20134 EVT VT = N->getValueType(0); in foldADCToCINC()
20146 EVT VT = N->getValueType(0); in performBuildVectorCombine()
20150 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1), in performBuildVectorCombine()
20151 Elt2 = N->getOperand(2), Elt3 = N->getOperand(3); in performBuildVectorCombine()
20152 if (Elt0->getOpcode() == ISD::FP_ROUND && in performBuildVectorCombine()
20153 Elt1->getOpcode() == ISD::FP_ROUND && in performBuildVectorCombine()
20154 isa<ConstantSDNode>(Elt0->getOperand(1)) && in performBuildVectorCombine()
20155 isa<ConstantSDNode>(Elt1->getOperand(1)) && in performBuildVectorCombine()
20156 Elt0->getConstantOperandVal(1) == Elt1->getConstantOperandVal(1) && in performBuildVectorCombine()
20157 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20158 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20160 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20161 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20162 Elt0->getOperand(0)->getOperand(0) == in performBuildVectorCombine()
20163 Elt1->getOperand(0)->getOperand(0) && in performBuildVectorCombine()
20164 Elt0->getOperand(0)->getConstantOperandVal(1) == 0 && in performBuildVectorCombine()
20165 Elt1->getOperand(0)->getConstantOperandVal(1) == 1) { in performBuildVectorCombine()
20166 SDValue LowLanesSrcVec = Elt0->getOperand(0)->getOperand(0); in performBuildVectorCombine()
20169 if (Elt2->getOpcode() == ISD::UNDEF && in performBuildVectorCombine()
20170 Elt3->getOpcode() == ISD::UNDEF) { in performBuildVectorCombine()
20172 } else if (Elt2->getOpcode() == ISD::FP_ROUND && in performBuildVectorCombine()
20173 Elt3->getOpcode() == ISD::FP_ROUND && in performBuildVectorCombine()
20174 isa<ConstantSDNode>(Elt2->getOperand(1)) && in performBuildVectorCombine()
20175 isa<ConstantSDNode>(Elt3->getOperand(1)) && in performBuildVectorCombine()
20176 Elt2->getConstantOperandVal(1) == in performBuildVectorCombine()
20177 Elt3->getConstantOperandVal(1) && in performBuildVectorCombine()
20178 Elt2->getOperand(0)->getOpcode() == in performBuildVectorCombine()
20180 Elt3->getOperand(0)->getOpcode() == in performBuildVectorCombine()
20183 isa<ConstantSDNode>(Elt2->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20184 isa<ConstantSDNode>(Elt3->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20185 Elt2->getOperand(0)->getOperand(0) == in performBuildVectorCombine()
20186 Elt3->getOperand(0)->getOperand(0) && in performBuildVectorCombine()
20187 Elt2->getOperand(0)->getConstantOperandVal(1) == 0 && in performBuildVectorCombine()
20188 Elt3->getOperand(0)->getConstantOperandVal(1) == 1) { in performBuildVectorCombine()
20189 SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0); in performBuildVectorCombine()
20199 Elt0->getOperand(1)); in performBuildVectorCombine()
20206 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1); in performBuildVectorCombine()
20207 if (Elt0->getOpcode() == ISD::FP_EXTEND && in performBuildVectorCombine()
20208 Elt1->getOpcode() == ISD::FP_EXTEND && in performBuildVectorCombine()
20209 Elt0->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20210 Elt1->getOperand(0)->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20211 Elt0->getOperand(0)->getOperand(0) == in performBuildVectorCombine()
20212 Elt1->getOperand(0)->getOperand(0) && in performBuildVectorCombine()
20214 isa<ConstantSDNode>(Elt0->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20215 isa<ConstantSDNode>(Elt1->getOperand(0)->getOperand(1)) && in performBuildVectorCombine()
20216 Elt0->getOperand(0)->getConstantOperandVal(1) + 1 == in performBuildVectorCombine()
20217 Elt1->getOperand(0)->getConstantOperandVal(1) && in performBuildVectorCombine()
20220 Elt0->getOperand(0)->getConstantOperandVal(1) % in performBuildVectorCombine()
20223 SDValue SrcVec = Elt0->getOperand(0)->getOperand(0); in performBuildVectorCombine()
20228 SDValue SubvectorIdx = Elt0->getOperand(0)->getOperand(1); in performBuildVectorCombine()
20238 // extract subvector where the inner vector is any-extended to the in performBuildVectorCombine()
20249 SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1); in performBuildVectorCombine()
20250 // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT. in performBuildVectorCombine()
20251 if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20252 Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT && in performBuildVectorCombine()
20254 isa<ConstantSDNode>(Elt0->getOperand(1)) && in performBuildVectorCombine()
20255 isa<ConstantSDNode>(Elt1->getOperand(1)) && in performBuildVectorCombine()
20257 Elt0->getOperand(0) == Elt1->getOperand(0) && in performBuildVectorCombine()
20259 Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1) && in performBuildVectorCombine()
20262 Elt0->getConstantOperandVal(1) % VT.getVectorMinNumElements() == 0) { in performBuildVectorCombine()
20263 SDValue VecToExtend = Elt0->getOperand(0); in performBuildVectorCombine()
20268 SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL); in performBuildVectorCombine()
20280 EVT VT = N->getValueType(0); in performTruncateCombine()
20281 SDValue N0 = N->getOperand(0); in performTruncateCombine()
20300 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); in isExtendOrShiftOperand()
20309 uint64_t AndMask = CSD->getZExtValue(); in isExtendOrShiftOperand()
20318 // (N - Y) + Z --> (Z - Y) + N
20341 EVT VT = N->getValueType(0); in performAddCombineSubShift()
20352 if (N->getOpcode() != ISD::ADD) in performAddCombineForShiftedOperands()
20357 EVT VT = N->getValueType(0); in performAddCombineForShiftedOperands()
20362 SDValue LHS = N->getOperand(0); in performAddCombineForShiftedOperands()
20363 SDValue RHS = N->getOperand(1); in performAddCombineForShiftedOperands()
20377 // with LSL (shift > 4). For the rest of processors, this is no-op for in performAddCombineForShiftedOperands()
20390 if (N->getOpcode() != ISD::SUB) in performSubAddMULCombine()
20393 SDValue Add = N->getOperand(1); in performSubAddMULCombine()
20394 SDValue X = N->getOperand(0); in performSubAddMULCombine()
20412 EVT VT = N->getValueType(0); in performSubAddMULCombine()
20434 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB) in performSVEMulAddSubCombine()
20437 if (!N->getValueType(0).isFixedLengthVector()) in performSVEMulAddSubCombine()
20440 auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue { in performSVEMulAddSubCombine()
20444 if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero()) in performSVEMulAddSubCombine()
20447 SDValue MulValue = Op1->getOperand(0); in performSVEMulAddSubCombine()
20460 DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue}); in performSVEMulAddSubCombine()
20461 return convertFromScalableVector(DAG, N->getValueType(0), NewValue); in performSVEMulAddSubCombine()
20464 if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1))) in performSVEMulAddSubCombine()
20466 else if (N->getOpcode() == ISD::ADD) in performSVEMulAddSubCombine()
20467 return performOpt(N->getOperand(1), N->getOperand(0)); in performSVEMulAddSubCombine()
20475 EVT VT = N->getValueType(0); in performAddSubIntoVectorOp()
20477 DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64)) in performAddSubIntoVectorOp()
20479 SDValue Op0 = N->getOperand(0); in performAddSubIntoVectorOp()
20480 SDValue Op1 = N->getOperand(1); in performAddSubIntoVectorOp()
20504 DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1), in performAddSubIntoVectorOp()
20510 if (!BV->hasOneUse()) in isLoadOrMultipleLoads()
20513 if (!Ld || !Ld->isSimple()) in isLoadOrMultipleLoads()
20521 if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse()) in isLoadOrMultipleLoads()
20552 if (SV1->getMaskElt(I) != I || in isLoadOrMultipleLoads()
20553 SV1->getMaskElt(I + NumSubElts) != I + NumSubElts || in isLoadOrMultipleLoads()
20554 SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 || in isLoadOrMultipleLoads()
20555 SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts) in isLoadOrMultipleLoads()
20558 if (SV2->getMaskElt(I) != I || in isLoadOrMultipleLoads()
20559 SV2->getMaskElt(I + NumSubElts) != I + NumSubElts || in isLoadOrMultipleLoads()
20560 SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts) in isLoadOrMultipleLoads()
20563 auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0)); in isLoadOrMultipleLoads()
20564 auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1)); in isLoadOrMultipleLoads()
20565 auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0)); in isLoadOrMultipleLoads()
20567 if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() || in isLoadOrMultipleLoads()
20568 !Ld2->isSimple() || !Ld3->isSimple()) in isLoadOrMultipleLoads()
20593 unsigned Size = get<0>(L)->getValueType(0).getSizeInBits(); in areLoadedOffsetButOtherwiseSame()
20594 return Size == get<1>(L)->getValueType(0).getSizeInBits() && in areLoadedOffsetButOtherwiseSame()
20635 EVT VT = N->getValueType(0); in performExtBinopLoadFold()
20641 SDValue Other = N->getOperand(0); in performExtBinopLoadFold()
20642 SDValue Shift = N->getOperand(1); in performExtBinopLoadFold()
20643 if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB) in performExtBinopLoadFold()
20692 SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(), in performExtBinopLoadFold()
20693 L0->getBasePtr(), L0->getPointerInfo(), in performExtBinopLoadFold()
20694 L0->getOriginalAlign()); in performExtBinopLoadFold()
20703 for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values())) in performExtBinopLoadFold()
20749 return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift); in performExtBinopLoadFold()
20780 // Massage DAGs which we can use the high-half "long" operations on into
20783 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
20793 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1); in tryCombineLongOpWithDup()
20794 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2); in tryCombineLongOpWithDup()
20800 // just as well use the non-high version) so look for a corresponding extract in tryCombineLongOpWithDup()
20814 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS); in tryCombineLongOpWithDup()
20816 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), in tryCombineLongOpWithDup()
20817 N->getOperand(0), LHS, RHS); in tryCombineLongOpWithDup()
20821 MVT ElemTy = N->getSimpleValueType(0).getScalarType(); in tryCombineShiftImm()
20825 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) { in tryCombineShiftImm()
20829 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, in tryCombineShiftImm()
20835 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) { in tryCombineShiftImm()
20836 ShiftAmount = CVN->getSExtValue(); in tryCombineShiftImm()
20842 return N->getOperand(1); in tryCombineShiftImm()
20877 ShiftAmount = -ShiftAmount; in tryCombineShiftImm()
20884 EVT VT = N->getValueType(0); in tryCombineShiftImm()
20885 SDValue Op = N->getOperand(1); in tryCombineShiftImm()
20892 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) { in tryCombineShiftImm()
20894 DAG.getConstant(-ShiftAmount, dl, MVT::i32)); in tryCombineShiftImm()
20895 if (N->getValueType(0) == MVT::i64) in tryCombineShiftImm()
20902 if (N->getValueType(0) == MVT::i64) in tryCombineShiftImm()
20915 SDValue AndN = N->getOperand(2); in tryCombineCRC32()
20920 if (!CMask || CMask->getZExtValue() != Mask) in tryCombineCRC32()
20924 N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); in tryCombineCRC32()
20930 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), in combineAcrossLanesIntrinsic()
20932 N->getOperand(1).getSimpleValueType(), in combineAcrossLanesIntrinsic()
20933 N->getOperand(1)), in combineAcrossLanesIntrinsic()
20939 SDValue Op1 = N->getOperand(1); in LowerSVEIntrinsicIndex()
20940 SDValue Op2 = N->getOperand(2); in LowerSVEIntrinsicIndex()
20946 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0)); in LowerSVEIntrinsicIndex()
20947 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2); in LowerSVEIntrinsicIndex()
20948 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step); in LowerSVEIntrinsicIndex()
20949 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1); in LowerSVEIntrinsicIndex()
20950 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base); in LowerSVEIntrinsicIndex()
20955 SDValue Scalar = N->getOperand(3); in LowerSVEIntrinsicDUP()
20961 SDValue Passthru = N->getOperand(1); in LowerSVEIntrinsicDUP()
20962 SDValue Pred = N->getOperand(2); in LowerSVEIntrinsicDUP()
20963 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0), in LowerSVEIntrinsicDUP()
20970 EVT VT = N->getValueType(0); in LowerSVEIntrinsicEXT()
20974 // Current lowering only supports the SVE-ACLE types. in LowerSVEIntrinsicEXT()
20984 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1)); in LowerSVEIntrinsicEXT()
20985 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2)); in LowerSVEIntrinsicEXT()
20986 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3), in LowerSVEIntrinsicEXT()
20999 SDValue Comparator = N->getOperand(3); in tryConvertSVEWideCompare()
21003 EVT VT = N->getValueType(0); in tryConvertSVEWideCompare()
21004 EVT CmpVT = N->getOperand(2).getValueType(); in tryConvertSVEWideCompare()
21005 SDValue Pred = N->getOperand(1); in tryConvertSVEWideCompare()
21022 int64_t ImmVal = CN->getSExtValue(); in tryConvertSVEWideCompare()
21023 if (ImmVal >= -16 && ImmVal <= 15) in tryConvertSVEWideCompare()
21036 uint64_t ImmVal = CN->getZExtValue(); in tryConvertSVEWideCompare()
21051 N->getOperand(2), Splat, DAG.getCondCode(CC)); in tryConvertSVEWideCompare()
21099 SDValue Pred = N->getOperand(1); in combineSVEReductionInt()
21100 SDValue VecToReduce = N->getOperand(2); in combineSVEReductionInt()
21104 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0)); in combineSVEReductionInt()
21110 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, in combineSVEReductionInt()
21118 SDValue Pred = N->getOperand(1); in combineSVEReductionFP()
21119 SDValue VecToReduce = N->getOperand(2); in combineSVEReductionFP()
21127 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, in combineSVEReductionFP()
21135 SDValue Pred = N->getOperand(1); in combineSVEReductionOrderedFP()
21136 SDValue InitVal = N->getOperand(2); in combineSVEReductionOrderedFP()
21137 SDValue VecToReduce = N->getOperand(3); in combineSVEReductionOrderedFP()
21150 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce, in combineSVEReductionOrderedFP()
21160 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!"); in convertMergedOpToPredOp()
21161 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!"); in convertMergedOpToPredOp()
21162 SDValue Pg = N->getOperand(1); in convertMergedOpToPredOp()
21163 SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2); in convertMergedOpToPredOp()
21164 SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3); in convertMergedOpToPredOp()
21169 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2); in convertMergedOpToPredOp()
21171 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2); in convertMergedOpToPredOp()
21184 if (!Subtarget->hasSVE2p1()) in tryCombineWhileLo()
21187 if (!N->hasNUsesOfValue(2, 0)) in tryCombineWhileLo()
21190 const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2; in tryCombineWhileLo()
21194 auto It = N->use_begin(); in tryCombineWhileLo()
21198 if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR || in tryCombineWhileLo()
21199 Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR) in tryCombineWhileLo()
21202 uint64_t OffLo = Lo->getConstantOperandVal(1); in tryCombineWhileLo()
21203 uint64_t OffHi = Hi->getConstantOperandVal(1); in tryCombineWhileLo()
21213 EVT HalfVec = Lo->getValueType(0); in tryCombineWhileLo()
21214 if (HalfVec != Hi->getValueType(0) || in tryCombineWhileLo()
21222 SDValue Idx = N->getOperand(1); in tryCombineWhileLo()
21223 SDValue TC = N->getOperand(2); in tryCombineWhileLo()
21230 {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC}); in tryCombineWhileLo()
21262 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21263 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21265 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21266 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21268 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21269 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21271 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21272 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21274 return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21275 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21277 return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21278 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21280 return DAG.getNode(AArch64ISD::PMULL, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21281 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21293 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21294 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21296 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21297 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21306 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64) in performIntrinsicCombine()
21331 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21332 N->getOperand(1)); in performIntrinsicCombine()
21336 return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21337 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21339 return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21340 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21342 return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21343 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21345 return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21346 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21348 return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21349 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21351 return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21352 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21354 return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21355 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21357 return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21358 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21360 return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21361 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21363 return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21364 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21366 return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21367 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21369 return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21370 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21372 return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21373 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21375 return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21376 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21378 return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21379 N->getOperand(1), N->getOperand(3), N->getOperand(4), in performIntrinsicCombine()
21380 N->getOperand(2)); in performIntrinsicCombine()
21382 return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21383 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21385 return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21386 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21388 return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21389 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21391 return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21392 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21394 return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2), in performIntrinsicCombine()
21395 N->getOperand(3)); in performIntrinsicCombine()
21397 return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2), in performIntrinsicCombine()
21398 N->getOperand(3)); in performIntrinsicCombine()
21402 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2), in performIntrinsicCombine()
21403 N->getOperand(3)); in performIntrinsicCombine()
21405 return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21406 N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21408 return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2), in performIntrinsicCombine()
21409 N->getOperand(3)); in performIntrinsicCombine()
21411 return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2), in performIntrinsicCombine()
21412 N->getOperand(3)); in performIntrinsicCombine()
21414 return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21415 N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21417 return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21418 N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21420 return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21421 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21423 return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21424 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21428 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21429 N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21433 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21434 N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21436 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21437 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21439 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21440 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21442 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21443 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21445 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21446 N->getOperand(1), N->getOperand(2)); in performIntrinsicCombine()
21448 return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21449 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21451 if (!N->getOperand(2).getValueType().isFloatingPoint()) in performIntrinsicCombine()
21453 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21454 N->getOperand(3), DAG.getCondCode(ISD::SETUGE)); in performIntrinsicCombine()
21457 if (!N->getOperand(2).getValueType().isFloatingPoint()) in performIntrinsicCombine()
21459 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21460 N->getOperand(3), DAG.getCondCode(ISD::SETUGT)); in performIntrinsicCombine()
21465 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21466 N->getOperand(3), DAG.getCondCode(ISD::SETGE)); in performIntrinsicCombine()
21471 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21472 N->getOperand(3), DAG.getCondCode(ISD::SETGT)); in performIntrinsicCombine()
21477 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21478 N->getOperand(3), DAG.getCondCode(ISD::SETEQ)); in performIntrinsicCombine()
21483 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21484 N->getOperand(3), DAG.getCondCode(ISD::SETNE)); in performIntrinsicCombine()
21488 N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21489 N->getOperand(3), DAG.getCondCode(ISD::SETUO)); in performIntrinsicCombine()
21504 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0), in performIntrinsicCombine()
21505 N->getOperand(1), N->getOperand(2), N->getOperand(3)); in performIntrinsicCombine()
21527 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21530 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21533 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), in performIntrinsicCombine()
21542 unsigned OC = N->getOpcode(); in isCheapToExtend()
21556 assert(N->getOpcode() == ISD::SIGN_EXTEND && in performSignExtendSetCCCombine()
21557 N->getOperand(0)->getOpcode() == ISD::SETCC); in performSignExtendSetCCCombine()
21558 const SDValue SetCC = N->getOperand(0); in performSignExtendSetCCCombine()
21562 if (!CCOp0->getValueType(0).isInteger() || in performSignExtendSetCCCombine()
21563 !CCOp1->getValueType(0).isInteger()) in performSignExtendSetCCCombine()
21567 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get(); in performSignExtendSetCCCombine()
21575 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0); in performSignExtendSetCCCombine()
21577 DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1); in performSignExtendSetCCCombine()
21580 SDLoc(SetCC), N->getValueType(0), Ext1, Ext2, in performSignExtendSetCCCombine()
21581 cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get()); in performSignExtendSetCCCombine()
21594 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && in performExtendCombine()
21595 (N->getOperand(0).getOpcode() == ISD::ABDU || in performExtendCombine()
21596 N->getOperand(0).getOpcode() == ISD::ABDS)) { in performExtendCombine()
21597 SDNode *ABDNode = N->getOperand(0).getNode(); in performExtendCombine()
21603 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); in performExtendCombine()
21606 if (N->getValueType(0).isFixedLengthVector() && in performExtendCombine()
21607 N->getOpcode() == ISD::SIGN_EXTEND && in performExtendCombine()
21608 N->getOperand(0)->getOpcode() == ISD::SETCC) in performExtendCombine()
21631 OrigAlignment, St.getMemOperand()->getFlags()); in splitStoreSplat()
21634 if (BasePtr->getOpcode() == ISD::ADD && in splitStoreSplat()
21635 isa<ConstantSDNode>(BasePtr->getOperand(1))) { in splitStoreSplat()
21636 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue(); in splitStoreSplat()
21637 BasePtr = BasePtr->getOperand(0); in splitStoreSplat()
21641 while (--NumVecElts) { in splitStoreSplat()
21648 St.getMemOperand()->getFlags()); in splitStoreSplat()
21686 EVT VT = N->getValueType(0); in performLD1Combine()
21696 SDValue Ops[] = { N->getOperand(0), // Chain in performLD1Combine()
21697 N->getOperand(2), // Pg in performLD1Combine()
21698 N->getOperand(3), // Base in performLD1Combine()
21712 EVT VT = N->getValueType(0); in performLDNT1Combine()
21713 EVT PtrTy = N->getOperand(3).getValueType(); in performLDNT1Combine()
21721 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(), in performLDNT1Combine()
21722 MINode->getOperand(3), DAG.getUNDEF(PtrTy), in performLDNT1Combine()
21723 MINode->getOperand(2), PassThru, in performLDNT1Combine()
21724 MINode->getMemoryVT(), MINode->getMemOperand(), in performLDNT1Combine()
21741 EVT VT = N->getValueType(0); in performLD1ReplicateCombine()
21747 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)}; in performLD1ReplicateCombine()
21759 SDValue Data = N->getOperand(2); in performST1Combine()
21773 SDValue Ops[] = { N->getOperand(0), // Chain in performST1Combine()
21775 N->getOperand(4), // Base in performST1Combine()
21776 N->getOperand(3), // Pg in performST1Combine()
21780 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops); in performST1Combine()
21786 SDValue Data = N->getOperand(2); in performSTNT1Combine()
21788 EVT PtrTy = N->getOperand(4).getValueType(); in performSTNT1Combine()
21794 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4), in performSTNT1Combine()
21795 DAG.getUNDEF(PtrTy), MINode->getOperand(3), in performSTNT1Combine()
21796 MINode->getMemoryVT(), MINode->getMemOperand(), in performSTNT1Combine()
21803 /// if the zero constant is not re-used, since one instructions and one register
21849 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1); in replaceZeroVectorStore()
21850 if (Offset < -512 || Offset > 504) in replaceZeroVectorStore()
21904 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1); in replaceSplatVectorStore()
21921 uint64_t IndexVal = CIndex->getZExtValue(); in replaceSplatVectorStore()
21940 if (S->isVolatile() || S->isIndexed()) in splitStores()
21943 SDValue StVal = S->getValue(); in splitStores()
21959 if (!Subtarget->isMisaligned128StoreSlow()) in splitStores()
21962 // Don't split at -Oz. in splitStores()
21967 // those up regresses performance on micro-benchmarks and olden/bh. in splitStores()
21976 if (VT.getSizeInBits() != 128 || S->getAlign() >= Align(16) || in splitStores()
21977 S->getAlign() <= Align(2)) in splitStores()
21995 SDValue BasePtr = S->getBasePtr(); in splitStores()
21997 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), in splitStores()
21998 S->getAlign(), S->getMemOperand()->getFlags()); in splitStores()
22002 S->getPointerInfo(), S->getAlign(), in splitStores()
22003 S->getMemOperand()->getFlags()); in splitStores()
22007 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!"); in performSpliceCombine()
22009 // splice(pg, op1, undef) -> op1 in performSpliceCombine()
22010 if (N->getOperand(2).isUndef()) in performSpliceCombine()
22011 return N->getOperand(1); in performSpliceCombine()
22018 assert((N->getOpcode() == AArch64ISD::UUNPKHI || in performUnpackCombine()
22019 N->getOpcode() == AArch64ISD::UUNPKLO) && in performUnpackCombine()
22022 // uunpklo/hi undef -> undef in performUnpackCombine()
22023 if (N->getOperand(0).isUndef()) in performUnpackCombine()
22024 return DAG.getUNDEF(N->getValueType(0)); in performUnpackCombine()
22029 if (N->getOperand(0).getOpcode() == ISD::MLOAD && in performUnpackCombine()
22030 N->getOpcode() == AArch64ISD::UUNPKLO) { in performUnpackCombine()
22031 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0)); in performUnpackCombine()
22032 SDValue Mask = MLD->getMask(); in performUnpackCombine()
22035 if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD && in performUnpackCombine()
22036 SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE && in performUnpackCombine()
22037 (MLD->getPassThru()->isUndef() || in performUnpackCombine()
22038 isZerosVector(MLD->getPassThru().getNode()))) { in performUnpackCombine()
22039 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); in performUnpackCombine()
22040 unsigned PgPattern = Mask->getConstantOperandVal(0); in performUnpackCombine()
22041 EVT VT = N->getValueType(0); in performUnpackCombine()
22051 VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask, in performUnpackCombine()
22052 PassThru, MLD->getMemoryVT(), MLD->getMemOperand(), in performUnpackCombine()
22053 MLD->getAddressingMode(), ISD::ZEXTLOAD); in performUnpackCombine()
22066 if (N->getOpcode() != AArch64ISD::UZP1) in isHalvingTruncateAndConcatOfLegalIntScalableType()
22068 SDValue Op0 = N->getOperand(0); in isHalvingTruncateAndConcatOfLegalIntScalableType()
22069 EVT SrcVT = Op0->getValueType(0); in isHalvingTruncateAndConcatOfLegalIntScalableType()
22070 EVT DstVT = N->getValueType(0); in isHalvingTruncateAndConcatOfLegalIntScalableType()
22078 // uzp1(rshrnb(uunpklo(X),C), rshrnb(uunpkhi(X), C)) -> urshr(X, C)
22080 assert(N->getOpcode() == AArch64ISD::UZP1 && "Only UZP1 expected."); in tryCombineExtendRShTrunc()
22081 SDValue Op0 = N->getOperand(0); in tryCombineExtendRShTrunc()
22082 SDValue Op1 = N->getOperand(1); in tryCombineExtendRShTrunc()
22083 EVT ResVT = N->getValueType(0); in tryCombineExtendRShTrunc()
22111 // t1 = nxv8i16 add(X, 1 << (ShiftValue - 1))
22121 EVT VT = Srl->getValueType(0); in trySimplifySrlAddToRshrnb()
22122 if (!VT.isScalableVector() || !Subtarget->hasSVE2()) in trySimplifySrlAddToRshrnb()
22149 SDValue Op0 = N->getOperand(0); in performUzpCombine()
22150 SDValue Op1 = N->getOperand(1); in performUzpCombine()
22151 EVT ResVT = N->getValueType(0); in performUzpCombine()
22153 // uzp(extract_lo(x), extract_hi(x)) -> extract_lo(uzp x, x) in performUzpCombine()
22165 SDValue Uzp = DAG.getNode(N->getOpcode(), DL, WidenedResVT, SourceVec, in performUzpCombine()
22173 if (N->getOpcode() == AArch64ISD::UZP2) in performUzpCombine()
22176 // uzp1(x, undef) -> concat(truncate(x), undef) in performUzpCombine()
22232 // uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y) in performUzpCombine()
22251 // truncating uzp1(x, y) -> xtn(concat (x, y)) in performUzpCombine()
22264 // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y)) in performUzpCombine()
22317 unsigned Opc = N->getOpcode(); in performGLD1Combine()
22335 SDValue Chain = N->getOperand(0); in performGLD1Combine()
22336 SDValue Pg = N->getOperand(1); in performGLD1Combine()
22337 SDValue Base = N->getOperand(2); in performGLD1Combine()
22338 SDValue Offset = N->getOperand(3); in performGLD1Combine()
22339 SDValue Ty = N->getOperand(4); in performGLD1Combine()
22341 EVT ResVT = N->getValueType(0); in performGLD1Combine()
22353 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType(); in performGLD1Combine()
22355 // If the predicate for the sign- or zero-extended offset is the in performGLD1Combine()
22356 // same as the predicate used for this load and the sign-/zero-extension in performGLD1Combine()
22357 // was from a 32-bits... in performGLD1Combine()
22378 assert(N->getOpcode() == AArch64ISD::VASHR || in performVectorShiftCombine()
22379 N->getOpcode() == AArch64ISD::VLSHR); in performVectorShiftCombine()
22381 SDValue Op = N->getOperand(0); in performVectorShiftCombine()
22384 unsigned ShiftImm = N->getConstantOperandVal(1); in performVectorShiftCombine()
22388 if (N->getOpcode() == AArch64ISD::VASHR && in performVectorShiftCombine()
22390 N->getOperand(1) == Op.getOperand(1)) in performVectorShiftCombine()
22395 if (N->getFlags().hasExact()) in performVectorShiftCombine()
22408 // sunpklo(sext(pred)) -> sext(extract_low_half(pred)) in performSunpkloCombine()
22411 if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND && in performSunpkloCombine()
22412 N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() == in performSunpkloCombine()
22414 SDValue CC = N->getOperand(0)->getOperand(0); in performSunpkloCombine()
22415 auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext()); in performSunpkloCombine()
22418 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk); in performSunpkloCombine()
22424 /// Target-specific DAG combine function for post-increment LD1 (lane) and
22425 /// post-increment LD1R.
22433 EVT VT = N->getValueType(0); in performPostLD1Combine()
22439 SDNode *LD = N->getOperand(LoadIdx).getNode(); in performPostLD1Combine()
22441 if (LD->getOpcode() != ISD::LOAD) in performPostLD1Combine()
22447 Lane = N->getOperand(2); in performPostLD1Combine()
22449 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements()) in performPostLD1Combine()
22454 EVT MemVT = LoadSDN->getMemoryVT(); in performPostLD1Combine()
22461 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; in performPostLD1Combine()
22472 if (N->hasOneUse()) { in performPostLD1Combine()
22473 unsigned UseOpc = N->use_begin()->getOpcode(); in performPostLD1Combine()
22478 SDValue Addr = LD->getOperand(1); in performPostLD1Combine()
22479 SDValue Vector = N->getOperand(0); in performPostLD1Combine()
22481 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = in performPostLD1Combine()
22482 Addr.getNode()->use_end(); UI != UE; ++UI) { in performPostLD1Combine()
22484 if (User->getOpcode() != ISD::ADD in performPostLD1Combine()
22489 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); in performPostLD1Combine()
22491 uint32_t IncVal = CInc->getZExtValue(); in performPostLD1Combine()
22511 Ops.push_back(LD->getOperand(0)); // Chain in performPostLD1Combine()
22524 LoadSDN->getMemOperand()); in performPostLD1Combine()
22558 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && in foldTruncStoreOfExt()
22562 if (!Store->isTruncatingStore() || Store->isIndexed()) in foldTruncStoreOfExt()
22564 SDValue Ext = Store->getValue(); in foldTruncStoreOfExt()
22569 SDValue Orig = Ext->getOperand(0); in foldTruncStoreOfExt()
22570 if (Store->getMemoryVT() != Orig.getValueType()) in foldTruncStoreOfExt()
22572 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig, in foldTruncStoreOfExt()
22573 Store->getBasePtr(), Store->getMemOperand()); in foldTruncStoreOfExt()
22596 EVT MemVT = LD->getMemoryVT(); in combineV3I8LoadExt()
22598 LD->getOriginalAlign() >= 4) in combineV3I8LoadExt()
22603 SDValue Chain = LD->getChain(); in combineV3I8LoadExt()
22604 SDValue BasePtr = LD->getBasePtr(); in combineV3I8LoadExt()
22605 MachineMemOperand *MMO = LD->getMemOperand(); in combineV3I8LoadExt()
22606 assert(LD->getOffset().isUndef() && "undef offset expected"); in combineV3I8LoadExt()
22635 // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit
22641 if (Subtarget->supportsAddressTopByteIgnored()) in performLOADCombine()
22642 performTBISimplification(N->getOperand(1), DCI, DAG); in performLOADCombine()
22645 if (LD->isVolatile() || !Subtarget->isLittleEndian()) in performLOADCombine()
22651 if (!LD->isNonTemporal()) in performLOADCombine()
22654 EVT MemVT = LD->getMemoryVT(); in performLOADCombine()
22661 SDValue Chain = LD->getChain(); in performLOADCombine()
22662 SDValue BasePtr = LD->getBasePtr(); in performLOADCombine()
22663 SDNodeFlags Flags = LD->getFlags(); in performLOADCombine()
22666 // Replace any non temporal load over 256-bit with a series of 256 bit loads in performLOADCombine()
22667 // and a scalar/vector load less than 256. This way we can utilize 256-bit in performLOADCombine()
22673 // Create all 256-bit loads starting from offset 0 and up to Num256Loads-1*32. in performLOADCombine()
22678 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset); in performLOADCombine()
22680 NewVT, DL, Chain, NewPtr, LD->getPointerInfo().getWithOffset(PtrOffset), in performLOADCombine()
22681 NewAlign, LD->getMemOperand()->getFlags(), LD->getAAInfo()); in performLOADCombine()
22688 // 256-bit loads and inserting the remaining load to it. We extract the in performLOADCombine()
22691 unsigned PtrOffset = (MemVT.getSizeInBits() - BitsRemaining) / 8; in performLOADCombine()
22697 Align NewAlign = commonAlignment(LD->getAlign(), PtrOffset); in performLOADCombine()
22700 LD->getPointerInfo().getWithOffset(PtrOffset), NewAlign, in performLOADCombine()
22701 LD->getMemOperand()->getFlags(), LD->getAAInfo()); in performLOADCombine()
22737 for (SDValue Operand : Op->op_values()) { in tryGetOriginalBoolVectorType()
22774 unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector in vectorToScalarBitmask()
22794 // create 8x 16-bit values, and the perform the vector reduce. in vectorToScalarBitmask()
22814 unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1); in vectorToScalarBitmask()
22829 if (!Store->isTruncatingStore()) in combineBoolVectorAndTruncateStore()
22833 SDValue VecOp = Store->getValue(); in combineBoolVectorAndTruncateStore()
22835 EVT MemVT = Store->getMemoryVT(); in combineBoolVectorAndTruncateStore()
22854 return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(), in combineBoolVectorAndTruncateStore()
22855 Store->getMemOperand()); in combineBoolVectorAndTruncateStore()
22867 SDValue Value = ST->getValue(); in combineI8TruncStore()
22870 if (ST->isVolatile() || !Subtarget->isLittleEndian() || in combineI8TruncStore()
22875 assert(ST->getOffset().isUndef() && "undef offset expected"); in combineI8TruncStore()
22879 Value->getOperand(0).getValueType().getVectorElementType(), 4); in combineI8TruncStore()
22883 {UndefVector, Value->getOperand(0), DAG.getVectorIdxConstant(0, DL)}); in combineI8TruncStore()
22889 SDValue Chain = ST->getChain(); in combineI8TruncStore()
22890 MachineMemOperand *MMO = ST->getMemOperand(); in combineI8TruncStore()
22895 SDValue Ptr2 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset2, DL); in combineI8TruncStore()
22901 SDValue Ptr1 = DAG.getMemBasePlusOffset(ST->getBasePtr(), Offset1, DL); in combineI8TruncStore()
22906 Chain = DAG.getStore(Chain, DL, E0, ST->getBasePtr(), in combineI8TruncStore()
22916 SDValue Chain = ST->getChain(); in performSTORECombine()
22917 SDValue Value = ST->getValue(); in performSTORECombine()
22918 SDValue Ptr = ST->getBasePtr(); in performSTORECombine()
22934 Value.getNode()->hasOneUse() && ST->isUnindexed() && in performSTORECombine()
22935 Subtarget->useSVEForFixedLengthVectors() && in performSTORECombine()
22937 ValueVT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits() && in performSTORECombine()
22940 ST->getMemoryVT(), ST->getMemOperand()); in performSTORECombine()
22945 if (Subtarget->supportsAddressTopByteIgnored() && in performSTORECombine()
22946 performTBISimplification(N->getOperand(2), DCI, DAG)) in performSTORECombine()
22955 if (ST->isTruncatingStore()) { in performSTORECombine()
22956 EVT StoreVT = ST->getMemoryVT(); in performSTORECombine()
22960 trySimplifySrlAddToRshrnb(ST->getOperand(1), DAG, Subtarget)) { in performSTORECombine()
22961 return DAG.getTruncStore(ST->getChain(), ST, Rshrnb, ST->getBasePtr(), in performSTORECombine()
22962 StoreVT, ST->getMemOperand()); in performSTORECombine()
22974 SDValue Value = MST->getValue(); in performMSTORECombine()
22975 SDValue Mask = MST->getMask(); in performMSTORECombine()
22981 if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() && in performMSTORECombine()
22982 MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE && in performMSTORECombine()
22991 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); in performMSTORECombine()
22992 unsigned PgPattern = Mask->getConstantOperandVal(0); in performMSTORECombine()
23000 return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0), in performMSTORECombine()
23001 MST->getBasePtr(), MST->getOffset(), Mask, in performMSTORECombine()
23002 MST->getMemoryVT(), MST->getMemOperand(), in performMSTORECombine()
23003 MST->getAddressingMode(), in performMSTORECombine()
23010 if (MST->isTruncatingStore()) { in performMSTORECombine()
23011 EVT ValueVT = Value->getValueType(0); in performMSTORECombine()
23012 EVT MemVT = MST->getMemoryVT(); in performMSTORECombine()
23016 return DAG.getMaskedStore(MST->getChain(), DL, Rshrnb, MST->getBasePtr(), in performMSTORECombine()
23017 MST->getOffset(), MST->getMask(), in performMSTORECombine()
23018 MST->getMemoryVT(), MST->getMemOperand(), in performMSTORECombine()
23019 MST->getAddressingMode(), true); in performMSTORECombine()
23037 // -> in foldIndexIntoBase()
23052 // -> in foldIndexIntoBase()
23083 while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG)) in findMoreOptimalIndexType()
23093 EVT DataVT = N->getOperand(1).getValueType(); in findMoreOptimalIndexType()
23095 // will later be re-extended to 64 bits in legalization in findMoreOptimalIndexType()
23098 if (ISD::isVectorShrinkable(Index.getNode(), 32, N->isIndexSigned())) { in findMoreOptimalIndexType()
23108 Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue(); in findMoreOptimalIndexType()
23118 Stride = Step << Shift->getZExtValue(); in findMoreOptimalIndexType()
23156 SDValue Chain = MGS->getChain(); in performMaskedGatherScatterCombine()
23157 SDValue Scale = MGS->getScale(); in performMaskedGatherScatterCombine()
23158 SDValue Index = MGS->getIndex(); in performMaskedGatherScatterCombine()
23159 SDValue Mask = MGS->getMask(); in performMaskedGatherScatterCombine()
23160 SDValue BasePtr = MGS->getBasePtr(); in performMaskedGatherScatterCombine()
23161 ISD::MemIndexType IndexType = MGS->getIndexType(); in performMaskedGatherScatterCombine()
23169 SDValue PassThru = MGT->getPassThru(); in performMaskedGatherScatterCombine()
23172 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, in performMaskedGatherScatterCombine()
23173 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); in performMaskedGatherScatterCombine()
23176 SDValue Data = MSC->getValue(); in performMaskedGatherScatterCombine()
23178 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, in performMaskedGatherScatterCombine()
23179 Ops, MSC->getMemOperand(), IndexType, in performMaskedGatherScatterCombine()
23180 MSC->isTruncatingStore()); in performMaskedGatherScatterCombine()
23183 /// Target-specific DAG combine function for NEON load/store intrinsics
23191 unsigned AddrOpIdx = N->getNumOperands() - 1; in performNEONPostLDSTCombine()
23192 SDValue Addr = N->getOperand(AddrOpIdx); in performNEONPostLDSTCombine()
23195 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), in performNEONPostLDSTCombine()
23196 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { in performNEONPostLDSTCombine()
23198 if (User->getOpcode() != ISD::ADD || in performNEONPostLDSTCombine()
23219 unsigned IntNo = N->getConstantOperandVal(1); in performNEONPostLDSTCombine()
23268 VecTy = N->getOperand(2).getValueType(); in performNEONPostLDSTCombine()
23270 VecTy = N->getValueType(0); in performNEONPostLDSTCombine()
23273 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); in performNEONPostLDSTCombine()
23275 uint32_t IncVal = CInc->getZExtValue(); in performNEONPostLDSTCombine()
23284 Ops.push_back(N->getOperand(0)); // Incoming chain in performNEONPostLDSTCombine()
23288 Ops.push_back(N->getOperand(i)); in performNEONPostLDSTCombine()
23304 MemInt->getMemoryVT(), in performNEONPostLDSTCombine()
23305 MemInt->getMemOperand()); in performNEONPostLDSTCombine()
23326 switch(V.getNode()->getOpcode()) { in checkValueWidth()
23331 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8) in checkValueWidth()
23332 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) { in checkValueWidth()
23333 ExtType = LoadNode->getExtensionType(); in checkValueWidth()
23339 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); in checkValueWidth()
23340 if ((TypeNode->getVT() == MVT::i8 && width == 8) in checkValueWidth()
23341 || (TypeNode->getVT() == MVT::i16 && width == 16)) { in checkValueWidth()
23348 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1)); in checkValueWidth()
23349 if ((TypeNode->getVT() == MVT::i8 && width == 8) in checkValueWidth()
23350 || (TypeNode->getVT() == MVT::i16 && width == 16)) { in checkValueWidth()
23358 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) < in checkValueWidth()
23359 1LL << (width - 1); in checkValueWidth()
23370 // +-------------+ +-------------+ +-------------+ +-------------+
23372 // +-------------+ +-------------+ +-------------+ +-------------+
23374 // V V | +----------+
23375 // +-------------+ +----+ | |
23377 // +-------------+ +----+ | |
23380 // +-------------+ | |
23382 // +-------------+ | |
23384 // +-----+ | |
23387 // +-------------+
23389 // +-------------+
23397 // The specific equations were worked out by generating output tables for each
23401 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
23431 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can in isEquivalentMaskless()
23441 AddConstant -= (1 << (width-1)); in isEquivalentMaskless()
23447 (CompConstant == MaxUInt - 1 && AddConstant < 0) || in isEquivalentMaskless()
23462 (AddConstant <= 0 && CompConstant >= -1 && in isEquivalentMaskless()
23502 // (X & C) >u Mask --> (X & (C & (~Mask)) != 0
23503 // (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
23508 ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1)); in performSubsToAndsCombine()
23512 APInt SubsAP = SubsC->getAPIntValue(); in performSubsToAndsCombine()
23522 ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1)); in performSubsToAndsCombine()
23526 APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1); in performSubsToAndsCombine()
23529 APInt AndSMask = (~MaskAP) & AndC->getAPIntValue(); in performSubsToAndsCombine()
23531 AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0), in performSubsToAndsCombine()
23532 DAG.getConstant(AndSMask, DL, SubsC->getValueType(0))); in performSubsToAndsCombine()
23535 N->getOperand(CCIndex)->getValueType(0)); in performSubsToAndsCombine()
23545 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC, in performSubsToAndsCombine()
23547 return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops); in performSubsToAndsCombine()
23555 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue(); in performCONDCombine()
23556 SDNode *SubsNode = N->getOperand(CmpIndex).getNode(); in performCONDCombine()
23557 unsigned CondOpcode = SubsNode->getOpcode(); in performCONDCombine()
23559 if (CondOpcode != AArch64ISD::SUBS || SubsNode->hasAnyUseOfValue(0) || in performCONDCombine()
23560 !SubsNode->hasOneUse()) in performCONDCombine()
23566 SDNode *AndNode = SubsNode->getOperand(0).getNode(); in performCONDCombine()
23569 if (AndNode->getOpcode() != ISD::AND) in performCONDCombine()
23576 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) { in performCONDCombine()
23577 uint32_t CNV = CN->getZExtValue(); in performCONDCombine()
23587 SDValue AddValue = AndNode->getOperand(0); in performCONDCombine()
23594 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0); in performCONDCombine()
23595 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1); in performCONDCombine()
23596 SDValue SubsInputValue = SubsNode->getOperand(1); in performCONDCombine()
23613 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(), in performCONDCombine()
23614 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue())) in performCONDCombine()
23619 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0), in performCONDCombine()
23620 SubsNode->getValueType(1)); in performCONDCombine()
23621 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) }; in performCONDCombine()
23642 SDValue Chain = N->getOperand(0); in performBRCONDCombine()
23643 SDValue Dest = N->getOperand(1); in performBRCONDCombine()
23644 SDValue CCVal = N->getOperand(2); in performBRCONDCombine()
23645 SDValue Cmp = N->getOperand(3); in performBRCONDCombine()
23648 unsigned CC = CCVal->getAsZExtVal(); in performBRCONDCombine()
23658 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) in performBRCONDCombine()
23693 unsigned CC = N->getConstantOperandVal(2); in foldCSELofCTTZ()
23694 SDValue SUBS = N->getOperand(3); in foldCSELofCTTZ()
23698 Zero = N->getOperand(0); in foldCSELofCTTZ()
23699 CTTZ = N->getOperand(1); in foldCSELofCTTZ()
23701 Zero = N->getOperand(1); in foldCSELofCTTZ()
23702 CTTZ = N->getOperand(0); in foldCSELofCTTZ()
23728 DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType()); in foldCSELofCTTZ()
23741 SDValue L = Op->getOperand(0); in foldCSELOfCSEL()
23742 SDValue R = Op->getOperand(1); in foldCSELOfCSEL()
23744 static_cast<AArch64CC::CondCode>(Op->getConstantOperandVal(2)); in foldCSELOfCSEL()
23746 SDValue OpCmp = Op->getOperand(3); in foldCSELOfCSEL()
23758 SDValue X = CmpLHS->getOperand(0); in foldCSELOfCSEL()
23759 SDValue Y = CmpLHS->getOperand(1); in foldCSELOfCSEL()
23769 if (CX->getAPIntValue() == CY->getAPIntValue()) in foldCSELOfCSEL()
23773 static_cast<AArch64CC::CondCode>(CmpLHS->getConstantOperandVal(2)); in foldCSELOfCSEL()
23774 SDValue Cond = CmpLHS->getOperand(3); in foldCSELOfCSEL()
23787 EVT VT = Op->getValueType(0); in foldCSELOfCSEL()
23797 // CSEL x, x, cc -> x in performCSELCombine()
23798 if (N->getOperand(0) == N->getOperand(1)) in performCSELCombine()
23799 return N->getOperand(0); in performCSELCombine()
23804 // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 in performCSELCombine()
23805 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 in performCSELCombine()
23812 // Try to re-use an already extended operand of a vector SetCC feeding a
23816 EVT Op0MVT = Op->getOperand(0).getValueType(); in tryToWidenSetCCOperands()
23817 if (!Op0MVT.isVector() || Op->use_empty()) in tryToWidenSetCCOperands()
23822 SDNode *FirstUse = *Op->use_begin(); in tryToWidenSetCCOperands()
23823 if (FirstUse->getOpcode() != ISD::VSELECT) in tryToWidenSetCCOperands()
23825 EVT UseMVT = FirstUse->getValueType(0); in tryToWidenSetCCOperands()
23828 if (any_of(Op->uses(), [&UseMVT](const SDNode *N) { in tryToWidenSetCCOperands()
23829 return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT; in tryToWidenSetCCOperands()
23834 if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V)) in tryToWidenSetCCOperands()
23840 ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get(); in tryToWidenSetCCOperands()
23842 // split the SET_CC and re-use the extended version of the operand. in tryToWidenSetCCOperands()
23844 Op->getOperand(0)); in tryToWidenSetCCOperands()
23846 Op->getOperand(0)); in tryToWidenSetCCOperands()
23849 Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1)); in tryToWidenSetCCOperands()
23852 Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1)); in tryToWidenSetCCOperands()
23857 Op0ExtV, Op1ExtV, Op->getOperand(2)); in tryToWidenSetCCOperands()
23863 SDValue Vec = N->getOperand(0); in performVecReduceBitwiseCombine()
23869 return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL, in performVecReduceBitwiseCombine()
23879 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); in performSETCCCombine()
23880 SDValue LHS = N->getOperand(0); in performSETCCCombine()
23881 SDValue RHS = N->getOperand(1); in performSETCCCombine()
23882 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); in performSETCCCombine()
23884 EVT VT = N->getValueType(0); in performSETCCCombine()
23891 LHS->getOpcode() == AArch64ISD::CSEL && in performSETCCCombine()
23892 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) && in performSETCCCombine()
23893 LHS->hasOneUse()) { in performSETCCCombine()
23907 // setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne in performSETCCCombine()
23909 LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) && in performSETCCCombine()
23910 LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() && in performSETCCCombine()
23911 LHS->hasOneUse()) { in performSETCCCombine()
23912 EVT TstVT = LHS->getValueType(0); in performSETCCCombine()
23915 uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1); in performSETCCCombine()
23916 SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0), in performSETCCCombine()
23918 return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2)); in performSETCCCombine()
23924 // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne) in performSETCCCombine()
23925 // ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne) in performSETCCCombine()
23929 LHS->getOpcode() == ISD::BITCAST) { in performSETCCCombine()
23930 EVT ToVT = LHS->getValueType(0); in performSETCCCombine()
23931 EVT FromVT = LHS->getOperand(0).getValueType(); in performSETCCCombine()
23936 DL, MVT::i1, LHS->getOperand(0)); in performSETCCCombine()
23950 // Replace a flag-setting operator (eg ANDS) with the generic version
23956 SDValue LHS = N->getOperand(0); in performFlagSettingCombine()
23957 SDValue RHS = N->getOperand(1); in performFlagSettingCombine()
23958 EVT VT = N->getValueType(0); in performFlagSettingCombine()
23961 if (!N->hasAnyUseOfValue(1)) { in performFlagSettingCombine()
23962 SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops()); in performFlagSettingCombine()
23967 // Combine identical generic nodes into this node, re-using the result. in performFlagSettingCombine()
23979 SDValue Pred = N->getOperand(0); in performSetCCPunpkCombine()
23980 SDValue LHS = N->getOperand(1); in performSetCCPunpkCombine()
23981 SDValue RHS = N->getOperand(2); in performSetCCPunpkCombine()
23982 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get(); in performSetCCPunpkCombine()
23985 LHS->getOpcode() != ISD::SIGN_EXTEND) in performSetCCPunpkCombine()
23988 SDValue Extract = LHS->getOperand(0); in performSetCCPunpkCombine()
23989 if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR || in performSetCCPunpkCombine()
23990 Extract->getValueType(0) != N->getValueType(0) || in performSetCCPunpkCombine()
23991 Extract->getConstantOperandVal(1) != 0) in performSetCCPunpkCombine()
23994 SDValue InnerSetCC = Extract->getOperand(0); in performSetCCPunpkCombine()
23995 if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO) in performSetCCPunpkCombine()
24006 Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 && in performSetCCPunpkCombine()
24007 Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256) in performSetCCPunpkCombine()
24015 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && in performSetccMergeZeroCombine()
24019 SDValue Pred = N->getOperand(0); in performSetccMergeZeroCombine()
24020 SDValue LHS = N->getOperand(1); in performSetccMergeZeroCombine()
24021 SDValue RHS = N->getOperand(2); in performSetccMergeZeroCombine()
24022 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get(); in performSetccMergeZeroCombine()
24028 LHS->getOpcode() == ISD::SIGN_EXTEND && in performSetccMergeZeroCombine()
24029 LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) { in performSetccMergeZeroCombine()
24033 if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && in performSetccMergeZeroCombine()
24034 LHS->getOperand(0)->getOperand(0) == Pred) in performSetccMergeZeroCombine()
24035 return LHS->getOperand(0); in performSetccMergeZeroCombine()
24039 // -> nxvNi1 ... in performSetccMergeZeroCombine()
24041 return LHS->getOperand(0); in performSetccMergeZeroCombine()
24045 // -> nxvNi1 and(pred, ...) in performSetccMergeZeroCombine()
24049 return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), in performSetccMergeZeroCombine()
24050 LHS->getOperand(0), Pred); in performSetccMergeZeroCombine()
24063 if (!Op->hasOneUse()) in getTestBitOperand()
24066 // We don't handle undef/constant-fold cases below, as they should have in getTestBitOperand()
24070 // (tbz (trunc x), b) -> (tbz x, b) in getTestBitOperand()
24072 if (Op->getOpcode() == ISD::TRUNCATE && in getTestBitOperand()
24073 Bit < Op->getValueType(0).getSizeInBits()) { in getTestBitOperand()
24074 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24077 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. in getTestBitOperand()
24078 if (Op->getOpcode() == ISD::ANY_EXTEND && in getTestBitOperand()
24079 Bit < Op->getOperand(0).getValueSizeInBits()) { in getTestBitOperand()
24080 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24083 if (Op->getNumOperands() != 2) in getTestBitOperand()
24086 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1)); in getTestBitOperand()
24090 switch (Op->getOpcode()) { in getTestBitOperand()
24094 // (tbz (and x, m), b) -> (tbz x, b) in getTestBitOperand()
24096 if ((C->getZExtValue() >> Bit) & 1) in getTestBitOperand()
24097 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24100 // (tbz (shl x, c), b) -> (tbz x, b-c) in getTestBitOperand()
24102 if (C->getZExtValue() <= Bit && in getTestBitOperand()
24103 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { in getTestBitOperand()
24104 Bit = Bit - C->getZExtValue(); in getTestBitOperand()
24105 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24109 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x in getTestBitOperand()
24111 Bit = Bit + C->getZExtValue(); in getTestBitOperand()
24112 if (Bit >= Op->getValueType(0).getSizeInBits()) in getTestBitOperand()
24113 Bit = Op->getValueType(0).getSizeInBits() - 1; in getTestBitOperand()
24114 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24116 // (tbz (srl x, c), b) -> (tbz x, b+c) in getTestBitOperand()
24118 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) { in getTestBitOperand()
24119 Bit = Bit + C->getZExtValue(); in getTestBitOperand()
24120 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24124 // (tbz (xor x, -1), b) -> (tbnz x, b) in getTestBitOperand()
24126 if ((C->getZExtValue() >> Bit) & 1) in getTestBitOperand()
24128 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG); in getTestBitOperand()
24132 // Optimize test single bit zero/non-zero and branch.
24136 unsigned Bit = N->getConstantOperandVal(2); in performTBZCombine()
24138 SDValue TestSrc = N->getOperand(1); in performTBZCombine()
24144 unsigned NewOpc = N->getOpcode(); in performTBZCombine()
24155 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc, in performTBZCombine()
24156 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3)); in performTBZCombine()
24165 auto SelectA = N->getOperand(1); in trySwapVSelectOperands()
24166 auto SelectB = N->getOperand(2); in trySwapVSelectOperands()
24167 auto NTy = N->getValueType(0); in trySwapVSelectOperands()
24171 SDValue SetCC = N->getOperand(0); in trySwapVSelectOperands()
24186 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); in trySwapVSelectOperands()
24197 // vselect (v1i1 setcc) ->
24206 SDValue N0 = N->getOperand(0); in performVSelectCombine()
24210 return N->getOperand(1); in performVSelectCombine()
24213 return N->getOperand(2); in performVSelectCombine()
24215 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform in performVSelectCombine()
24216 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the in performVSelectCombine()
24218 SDValue SetCC = N->getOperand(0); in performVSelectCombine()
24224 SDNode *SplatLHS = N->getOperand(1).getNode(); in performVSelectCombine()
24225 SDNode *SplatRHS = N->getOperand(2).getNode(); in performVSelectCombine()
24227 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() && in performVSelectCombine()
24237 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N), in performVSelectCombine()
24242 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1)); in performVSelectCombine()
24254 EVT ResVT = N->getValueType(0); in performVSelectCombine()
24260 SDValue IfTrue = N->getOperand(1); in performVSelectCombine()
24261 SDValue IfFalse = N->getOperand(2); in performVSelectCombine()
24264 cast<CondCodeSDNode>(N0.getOperand(2))->get()); in performVSelectCombine()
24270 /// the compare-mask instructions rather than going via NZCV, even if LHS and
24276 SDValue N0 = N->getOperand(0); in performSelectCombine()
24277 EVT ResVT = N->getValueType(0); in performSelectCombine()
24289 "Scalar-SETCC feeding SELECT has unexpected result type!"); in performSelectCombine()
24292 // largest real NEON comparison is 64-bits per lane, which means the result is in performSelectCombine()
24293 // at most 32-bits and an illegal vector. Just bail out for now. in performSelectCombine()
24335 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); in performSelectCombine()
24340 EVT VT = N->getValueType(0); in performDUPCombine()
24346 SmallVector<SDValue> Ops(N->ops()); in performDUPCombine()
24347 if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(), in performDUPCombine()
24354 if (N->getOpcode() == AArch64ISD::DUP) { in performDUPCombine()
24363 SDValue EXTRACT_VEC_ELT = N->getOperand(0); in performDUPCombine()
24381 if (N->getValueType(0) == N->getOperand(0).getValueType()) in performNVCASTCombine()
24382 return N->getOperand(0); in performNVCASTCombine()
24383 if (N->getOperand(0).getOpcode() == AArch64ISD::NVCAST) in performNVCASTCombine()
24384 return DAG.getNode(AArch64ISD::NVCAST, SDLoc(N), N->getValueType(0), in performNVCASTCombine()
24385 N->getOperand(0).getOperand(0)); in performNVCASTCombine()
24392 // globaladdr as (globaladdr + constant) - constant.
24397 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) != in performGlobalAddressCombine()
24401 uint64_t MinOffset = -1ull; in performGlobalAddressCombine()
24402 for (SDNode *N : GN->uses()) { in performGlobalAddressCombine()
24403 if (N->getOpcode() != ISD::ADD) in performGlobalAddressCombine()
24405 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0)); in performGlobalAddressCombine()
24407 C = dyn_cast<ConstantSDNode>(N->getOperand(1)); in performGlobalAddressCombine()
24410 MinOffset = std::min(MinOffset, C->getZExtValue()); in performGlobalAddressCombine()
24412 uint64_t Offset = MinOffset + GN->getOffset(); in performGlobalAddressCombine()
24416 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1). in performGlobalAddressCombine()
24417 if (Offset <= uint64_t(GN->getOffset())) in performGlobalAddressCombine()
24432 const GlobalValue *GV = GN->getGlobal(); in performGlobalAddressCombine()
24433 Type *T = GV->getValueType(); in performGlobalAddressCombine()
24434 if (!T->isSized() || in performGlobalAddressCombine()
24435 Offset > GV->getDataLayout().getTypeAllocSize(T)) in performGlobalAddressCombine()
24446 SDValue BR = N->getOperand(0); in performCTLZCombine()
24447 if (!Subtarget->hasCSSC() || BR.getOpcode() != ISD::BITREVERSE || in performCTLZCombine()
24499 OffsetConst->getZExtValue(), ScalarSizeInBytes); in isValidImmForSVEVecImmAddrMode()
24505 const SDValue Src = N->getOperand(2); in performScatterStoreCombine()
24506 const EVT SrcVT = Src->getValueType(0); in performScatterStoreCombine()
24528 SDValue Base = N->getOperand(4); in performScatterStoreCombine()
24531 SDValue Offset = N->getOperand(5); in performScatterStoreCombine()
24534 // applies to non-temporal scatters because there's no instruction that takes in performScatterStoreCombine()
24546 // In the case of non-temporal gather loads there's only one SVE instruction in performScatterStoreCombine()
24547 // per data-size: "scalar + vector", i.e. in performScatterStoreCombine()
24559 // immediates outside that range and non-immediate scalar offsets use SST1 or in performScatterStoreCombine()
24590 // Keep the original type of the input data to store - this is needed to be in performScatterStoreCombine()
24605 SDValue Ops[] = {N->getOperand(0), // Chain in performScatterStoreCombine()
24607 N->getOperand(3), // Pg in performScatterStoreCombine()
24618 const EVT RetVT = N->getValueType(0); in performGatherLoadCombine()
24630 SDValue Base = N->getOperand(3); in performGatherLoadCombine()
24633 SDValue Offset = N->getOperand(4); in performGatherLoadCombine()
24636 // offsets. This applies to non-temporal and quadword gathers, which do not in performGatherLoadCombine()
24648 // In the case of non-temporal gather loads and quadword gather loads there's in performGatherLoadCombine()
24662 // immediates outside that range and non-immediate scalar offsets use in performGatherLoadCombine()
24695 // Keep the original output value type around - this is needed to be able to in performGatherLoadCombine()
24703 SDValue Ops[] = {N->getOperand(0), // Chain in performGatherLoadCombine()
24704 N->getOperand(2), // Pg in performGatherLoadCombine()
24725 SDValue Src = N->getOperand(0); in performSignExtendInRegCombine()
24726 unsigned Opc = Src->getOpcode(); in performSignExtendInRegCombine()
24728 // Sign extend of an unsigned unpack -> signed unpack in performSignExtendInRegCombine()
24738 // -> in performSignExtendInRegCombine()
24740 // -> in performSignExtendInRegCombine()
24742 SDValue ExtOp = Src->getOperand(0); in performSignExtendInRegCombine()
24743 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT(); in performSignExtendInRegCombine()
24755 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext); in performSignExtendInRegCombine()
24830 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT(); in performSignExtendInRegCombine()
24831 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT(); in performSignExtendInRegCombine()
24836 EVT DstVT = N->getValueType(0); in performSignExtendInRegCombine()
24840 for (unsigned I = 0; I < Src->getNumOperands(); ++I) in performSignExtendInRegCombine()
24841 Ops.push_back(Src->getOperand(I)); in performSignExtendInRegCombine()
24852 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
24856 SDValue Offset = N->getOperand(OffsetPos); in legalizeSVEGatherPrefetchOffsVec()
24862 // Extend the unpacked offset vector to 64-bit lanes. in legalizeSVEGatherPrefetchOffsVec()
24865 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end()); in legalizeSVEGatherPrefetchOffsVec()
24866 // Replace the offset operand with the 64-bit one. in legalizeSVEGatherPrefetchOffsVec()
24869 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); in legalizeSVEGatherPrefetchOffsVec()
24881 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes)) in combineSVEPrefetchVecBaseImmOff()
24885 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end()); in combineSVEPrefetchVecBaseImmOff()
24893 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops); in combineSVEPrefetchVecBaseImmOff()
24922 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!"); in removeRedundantInsertVectorElt()
24923 SDValue InsertVec = N->getOperand(0); in removeRedundantInsertVectorElt()
24924 SDValue InsertElt = N->getOperand(1); in removeRedundantInsertVectorElt()
24925 SDValue InsertIdx = N->getOperand(2); in removeRedundantInsertVectorElt()
24944 // If we get here we are effectively trying to zero lanes 1-N of a vector. in removeRedundantInsertVectorElt()
24947 if (N->getValueType(0) != ExtractVec.getValueType()) in removeRedundantInsertVectorElt()
24968 SDValue N0 = N->getOperand(0); in performFPExtendCombine()
24969 EVT VT = N->getValueType(0); in performFPExtendCombine()
24972 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) in performFPExtendCombine()
24980 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) in performFPExtendCombine()
24984 N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() && in performFPExtendCombine()
24986 VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) { in performFPExtendCombine()
24989 LN0->getChain(), LN0->getBasePtr(), in performFPExtendCombine()
24990 N0.getValueType(), LN0->getMemOperand()); in performFPExtendCombine()
25005 EVT VT = N->getValueType(0); in performBSPExpandForSVE()
25008 if (!VT.isScalableVector() || Subtarget->hasSVE2() || Subtarget->hasSME()) in performBSPExpandForSVE()
25013 SDValue Mask = N->getOperand(0); in performBSPExpandForSVE()
25014 SDValue In1 = N->getOperand(1); in performBSPExpandForSVE()
25015 SDValue In2 = N->getOperand(2); in performBSPExpandForSVE()
25024 EVT VT = N->getValueType(0); in performDupLane128Combine()
25026 SDValue Insert = N->getOperand(0); in performDupLane128Combine()
25034 uint64_t IdxDupLane = N->getConstantOperandVal(1); in performDupLane128Combine()
25052 DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2)); in performDupLane128Combine()
25054 NewInsert, N->getOperand(1)); in performDupLane128Combine()
25065 SDValue LHS = N->getOperand(0); in tryCombineMULLWithUZP1()
25066 SDValue RHS = N->getOperand(1); in tryCombineMULLWithUZP1()
25094 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll in tryCombineMULLWithUZP1()
25117 if (ExtractHighSrcVec->use_size() != 2) in tryCombineMULLWithUZP1()
25121 for (SDNode *User : ExtractHighSrcVec.getNode()->uses()) { in tryCombineMULLWithUZP1()
25125 if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR || in tryCombineMULLWithUZP1()
25126 !isNullConstant(User->getOperand(1))) { in tryCombineMULLWithUZP1()
25134 if (!ExtractLow || !ExtractLow->hasOneUse()) in tryCombineMULLWithUZP1()
25139 SDNode *ExtractLowUser = *ExtractLow.getNode()->use_begin(); in tryCombineMULLWithUZP1()
25140 if (ExtractLowUser->getOpcode() != N->getOpcode()) { in tryCombineMULLWithUZP1()
25143 if (ExtractLowUser->getOperand(0) == ExtractLow) { in tryCombineMULLWithUZP1()
25144 if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE) in tryCombineMULLWithUZP1()
25145 TruncLow = ExtractLowUser->getOperand(1); in tryCombineMULLWithUZP1()
25149 if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE) in tryCombineMULLWithUZP1()
25150 TruncLow = ExtractLowUser->getOperand(0); in tryCombineMULLWithUZP1()
25159 // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll in tryCombineMULLWithUZP1()
25222 EVT VT = N->getValueType(0); in performScalarToVectorCombine()
25226 SDValue ZEXT = N->getOperand(0); in performScalarToVectorCombine()
25258 switch (N->getOpcode()) { in PerformDAGCombine()
25291 APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits()); in PerformDAGCombine()
25293 APInt::getAllOnes(N->getValueType(0).getVectorNumElements()); in PerformDAGCombine()
25415 switch (N->getConstantOperandVal(1)) { in PerformDAGCombine()
25566 unsigned IntrinsicID = N->getConstantOperandVal(1); in PerformDAGCombine()
25573 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64)); in PerformDAGCombine()
25583 DAG.getVTList(MVT::Other), N->getOperand(0), in PerformDAGCombine()
25584 N->getOperand(2), N->getOperand(3)); in PerformDAGCombine()
25587 DAG.getVTList(MVT::Other), N->getOperand(0), in PerformDAGCombine()
25588 N->getOperand(2), N->getOperand(3)); in PerformDAGCombine()
25604 // we can't perform a tail-call. In particular, we need to check for
25609 if (N->getNumValues() != 1) in isUsedByReturnOnly()
25611 if (!N->hasNUsesOfValue(1, 0)) in isUsedByReturnOnly()
25615 SDNode *Copy = *N->use_begin(); in isUsedByReturnOnly()
25616 if (Copy->getOpcode() == ISD::CopyToReg) { in isUsedByReturnOnly()
25619 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == in isUsedByReturnOnly()
25622 TCChain = Copy->getOperand(0); in isUsedByReturnOnly()
25623 } else if (Copy->getOpcode() != ISD::FP_EXTEND) in isUsedByReturnOnly()
25627 for (SDNode *Node : Copy->uses()) { in isUsedByReturnOnly()
25628 if (Node->getOpcode() != AArch64ISD::RET_GLUE) in isUsedByReturnOnly()
25645 return CI->isTailCall(); in mayBeEmittedAsTailCall()
25652 if (!CstOffset || CstOffset->isZero()) in isIndexingLegal()
25658 return isInt<9>(CstOffset->getSExtValue()); in isIndexingLegal()
25665 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) in getIndexedAddressParts()
25668 // Non-null if there is exactly one user of the loaded value (ignoring chain). in getIndexedAddressParts()
25670 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; in getIndexedAddressParts()
25677 ValOnlyUser = nullptr; // Multiple non-chain uses, bail out. in getIndexedAddressParts()
25688 if (ValOnlyUser && ValOnlyUser->getValueType(0).isScalableVector() && in getIndexedAddressParts()
25689 (ValOnlyUser->getOpcode() == ISD::SPLAT_VECTOR || in getIndexedAddressParts()
25690 (ValOnlyUser->getOpcode() == AArch64ISD::DUP_MERGE_PASSTHRU && in getIndexedAddressParts()
25691 IsUndefOrZero(ValOnlyUser->getOperand(2))))) in getIndexedAddressParts()
25694 Base = Op->getOperand(0); in getIndexedAddressParts()
25697 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { in getIndexedAddressParts()
25698 int64_t RHSC = RHS->getSExtValue(); in getIndexedAddressParts()
25699 if (Op->getOpcode() == ISD::SUB) in getIndexedAddressParts()
25700 RHSC = -(uint64_t)RHSC; in getIndexedAddressParts()
25703 // Always emit pre-inc/post-inc addressing mode. Use negated constant offset in getIndexedAddressParts()
25705 Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0)); in getIndexedAddressParts()
25718 VT = LD->getMemoryVT(); in getPreIndexedAddressParts()
25719 Ptr = LD->getBasePtr(); in getPreIndexedAddressParts()
25721 VT = ST->getMemoryVT(); in getPreIndexedAddressParts()
25722 Ptr = ST->getBasePtr(); in getPreIndexedAddressParts()
25738 VT = LD->getMemoryVT(); in getPostIndexedAddressParts()
25739 Ptr = LD->getBasePtr(); in getPostIndexedAddressParts()
25741 VT = ST->getMemoryVT(); in getPostIndexedAddressParts()
25742 Ptr = ST->getBasePtr(); in getPostIndexedAddressParts()
25748 // Post-indexing updates the base, so it's not a valid transform in getPostIndexedAddressParts()
25760 SDValue Op = N->getOperand(0); in replaceBoolVectorBitcast()
25761 EVT VT = N->getValueType(0); in replaceBoolVectorBitcast()
25788 SDValue Op = N->getOperand(0); in CustomNonLegalBITCASTResults()
25789 EVT VT = N->getValueType(0); in CustomNonLegalBITCASTResults()
25802 SDValue Op = N->getOperand(0); in ReplaceBITCASTResults()
25803 EVT VT = N->getValueType(0); in ReplaceBITCASTResults()
25823 "Expected fp->int bitcast!"); in ReplaceBITCASTResults()
25854 EVT VT = N->getValueType(0); in ReplaceAddWithADDP()
25857 !N->getFlags().hasAllowReassociation()) || in ReplaceAddWithADDP()
25858 (VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()) || in ReplaceAddWithADDP()
25862 SDValue X = N->getOperand(0); in ReplaceAddWithADDP()
25863 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1)); in ReplaceAddWithADDP()
25865 Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0)); in ReplaceAddWithADDP()
25866 X = N->getOperand(1); in ReplaceAddWithADDP()
25871 if (Shuf->getOperand(0) != X || !Shuf->getOperand(1)->isUndef()) in ReplaceAddWithADDP()
25875 ArrayRef<int> Mask = Shuf->getMask(); in ReplaceAddWithADDP()
25877 if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1)) in ReplaceAddWithADDP()
25906 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); in ReplaceReductionResults()
25915 SDValue In = N->getOperand(0); in ReplaceExtractSubVectorResults()
25923 EVT VT = N->getValueType(0); in ReplaceExtractSubVectorResults()
25932 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1)); in ReplaceExtractSubVectorResults()
25936 unsigned Index = CIndex->getZExtValue(); in ReplaceExtractSubVectorResults()
25943 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0)); in ReplaceExtractSubVectorResults()
25966 assert(N->getValueType(0) == MVT::i128 && in ReplaceCMP_SWAP_128Results()
25969 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); in ReplaceCMP_SWAP_128Results()
25970 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) { in ReplaceCMP_SWAP_128Results()
25971 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type, in ReplaceCMP_SWAP_128Results()
25974 createGPRPairNode(DAG, N->getOperand(2)), // Compare value in ReplaceCMP_SWAP_128Results()
25975 createGPRPairNode(DAG, N->getOperand(3)), // Store value in ReplaceCMP_SWAP_128Results()
25976 N->getOperand(1), // Ptr in ReplaceCMP_SWAP_128Results()
25977 N->getOperand(0), // Chain in in ReplaceCMP_SWAP_128Results()
25981 switch (MemOp->getMergedOrdering()) { in ReplaceCMP_SWAP_128Results()
26017 switch (MemOp->getMergedOrdering()) { in ReplaceCMP_SWAP_128Results()
26036 auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64); in ReplaceCMP_SWAP_128Results()
26037 auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64); in ReplaceCMP_SWAP_128Results()
26038 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second, in ReplaceCMP_SWAP_128Results()
26039 New.first, New.second, N->getOperand(0)}; in ReplaceCMP_SWAP_128Results()
26053 // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because in getAtomicLoad128Opcode()
26054 // the type is not legal. Therefore we shouldn't expect to see a 128-bit in getAtomicLoad128Opcode()
26129 // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it in ReplaceATOMIC_LOAD_128Results()
26137 assert(N->getValueType(0) == MVT::i128 && in ReplaceATOMIC_LOAD_128Results()
26140 if (!Subtarget->hasLSE128()) in ReplaceATOMIC_LOAD_128Results()
26143 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); in ReplaceATOMIC_LOAD_128Results()
26144 const SDValue &Chain = N->getOperand(0); in ReplaceATOMIC_LOAD_128Results()
26145 const SDValue &Ptr = N->getOperand(1); in ReplaceATOMIC_LOAD_128Results()
26146 const SDValue &Val128 = N->getOperand(2); in ReplaceATOMIC_LOAD_128Results()
26150 const unsigned ISDOpcode = N->getOpcode(); in ReplaceATOMIC_LOAD_128Results()
26152 getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering()); in ReplaceATOMIC_LOAD_128Results()
26158 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first); in ReplaceATOMIC_LOAD_128Results()
26161 DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second); in ReplaceATOMIC_LOAD_128Results()
26184 switch (N->getOpcode()) { in ReplaceNodeResults()
26239 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); in ReplaceNodeResults()
26246 assert(N->getValueType(0) != MVT::i128 && in ReplaceNodeResults()
26247 "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP"); in ReplaceNodeResults()
26252 assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 && in ReplaceNodeResults()
26253 "Expected 128-bit atomicrmw."); in ReplaceNodeResults()
26261 EVT MemVT = LoadNode->getMemoryVT(); in ReplaceNodeResults()
26262 // Handle lowering 256 bit non temporal loads into LDNP for little-endian in ReplaceNodeResults()
26264 if (LoadNode->isNonTemporal() && Subtarget->isLittleEndian() && in ReplaceNodeResults()
26276 {LoadNode->getChain(), LoadNode->getBasePtr()}, in ReplaceNodeResults()
26277 LoadNode->getMemoryVT(), LoadNode->getMemOperand()); in ReplaceNodeResults()
26285 if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) || in ReplaceNodeResults()
26286 LoadNode->getMemoryVT() != MVT::i128) { in ReplaceNodeResults()
26287 // Non-volatile or atomic loads are optimized later in AArch64's load/store in ReplaceNodeResults()
26295 AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire; in ReplaceNodeResults()
26299 assert(Subtarget->hasFeature(AArch64::FeatureRCPC3)); in ReplaceNodeResults()
26303 {LoadNode->getChain(), LoadNode->getBasePtr()}, in ReplaceNodeResults()
26304 LoadNode->getMemoryVT(), LoadNode->getMemOperand()); in ReplaceNodeResults()
26310 Result.getValue(FirstRes), Result.getValue(1 - FirstRes)); in ReplaceNodeResults()
26321 // CONCAT_VECTORS -- but delegate to common code for result type in ReplaceNodeResults()
26325 EVT VT = N->getValueType(0); in ReplaceNodeResults()
26328 static_cast<Intrinsic::ID>(N->getConstantOperandVal(0)); in ReplaceNodeResults()
26336 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); in ReplaceNodeResults()
26338 N->getOperand(1), Op2, N->getOperand(3)); in ReplaceNodeResults()
26346 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); in ReplaceNodeResults()
26348 N->getOperand(1), Op2, N->getOperand(3)); in ReplaceNodeResults()
26357 N->getOperand(1), N->getOperand(2)); in ReplaceNodeResults()
26366 N->getOperand(1), N->getOperand(2)); in ReplaceNodeResults()
26380 auto V = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, NewVT, N->ops()); in ReplaceNodeResults()
26388 assert(N->getValueType(0) == MVT::i128 && in ReplaceNodeResults()
26389 "READ_REGISTER custom lowering is only for 128-bit sysregs"); in ReplaceNodeResults()
26390 SDValue Chain = N->getOperand(0); in ReplaceNodeResults()
26391 SDValue SysRegName = N->getOperand(1); in ReplaceNodeResults()
26398 // of the 128-bit System Register value. in ReplaceNodeResults()
26409 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia()) in useLoadStackGuardNode()
26431 // In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
26432 // provided the address is 16-byte aligned.
26434 if (!Subtarget->hasLSE2()) in isOpSuitableForLDPSTP()
26438 return LI->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForLDPSTP()
26439 LI->getAlign() >= Align(16); in isOpSuitableForLDPSTP()
26442 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForLDPSTP()
26443 SI->getAlign() >= Align(16); in isOpSuitableForLDPSTP()
26449 if (!Subtarget->hasLSE128()) in isOpSuitableForLSE128()
26455 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForLSE128()
26456 SI->getAlign() >= Align(16) && in isOpSuitableForLSE128()
26457 (SI->getOrdering() == AtomicOrdering::Release || in isOpSuitableForLSE128()
26458 SI->getOrdering() == AtomicOrdering::SequentiallyConsistent); in isOpSuitableForLSE128()
26461 return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForLSE128()
26462 RMW->getAlign() >= Align(16) && in isOpSuitableForLSE128()
26463 (RMW->getOperation() == AtomicRMWInst::Xchg || in isOpSuitableForLSE128()
26464 RMW->getOperation() == AtomicRMWInst::And || in isOpSuitableForLSE128()
26465 RMW->getOperation() == AtomicRMWInst::Or); in isOpSuitableForLSE128()
26471 if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3()) in isOpSuitableForRCPC3()
26475 return LI->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForRCPC3()
26476 LI->getAlign() >= Align(16) && in isOpSuitableForRCPC3()
26477 LI->getOrdering() == AtomicOrdering::Acquire; in isOpSuitableForRCPC3()
26480 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 && in isOpSuitableForRCPC3()
26481 SI->getAlign() >= Align(16) && in isOpSuitableForRCPC3()
26482 SI->getOrdering() == AtomicOrdering::Release; in isOpSuitableForRCPC3()
26500 // Store-Release instructions only provide seq_cst guarantees when paired with in shouldInsertTrailingFenceForAtomicStore()
26501 // Load-Acquire instructions. MSVC CRT does not use these instructions to in shouldInsertTrailingFenceForAtomicStore()
26504 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) in shouldInsertTrailingFenceForAtomicStore()
26507 switch (I->getOpcode()) { in shouldInsertTrailingFenceForAtomicStore()
26511 return cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() == in shouldInsertTrailingFenceForAtomicStore()
26514 return cast<AtomicRMWInst>(I)->getOrdering() == in shouldInsertTrailingFenceForAtomicStore()
26517 return cast<StoreInst>(I)->getOrdering() == in shouldInsertTrailingFenceForAtomicStore()
26522 // Loads and stores less than 128-bits are already atomic; ones above that
26527 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicStoreInIR()
26539 // Loads and stores less than 128-bits are already atomic; ones above that
26544 unsigned Size = LI->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicLoadInIR()
26554 // At -O0, fast-regalloc cannot cope with the live vregs necessary to in shouldExpandAtomicLoadInIR()
26558 // succeed. So at -O0 lower this operation to a CAS loop. in shouldExpandAtomicLoadInIR()
26564 return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg in shouldExpandAtomicLoadInIR()
26569 // However, with the LSE instructions (or outline-atomics mode, which provides
26570 // library routines in place of the LSE-instructions), we can directly emit many
26573 // Floating-point operations are always emitted to a cmpxchg loop, because they
26577 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicRMWInIR()
26580 if (AI->isFloatingPointOperation()) in shouldExpandAtomicRMWInIR()
26583 bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 && in shouldExpandAtomicRMWInIR()
26584 (AI->getOperation() == AtomicRMWInst::Xchg || in shouldExpandAtomicRMWInIR()
26585 AI->getOperation() == AtomicRMWInst::Or || in shouldExpandAtomicRMWInIR()
26586 AI->getOperation() == AtomicRMWInst::And); in shouldExpandAtomicRMWInIR()
26592 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { in shouldExpandAtomicRMWInIR()
26593 if (Subtarget->hasLSE()) in shouldExpandAtomicRMWInIR()
26595 if (Subtarget->outlineAtomics()) { in shouldExpandAtomicRMWInIR()
26599 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf in shouldExpandAtomicRMWInIR()
26600 // (2) low level libgcc and compiler-rt support implemented by: in shouldExpandAtomicRMWInIR()
26602 if (AI->getOperation() != AtomicRMWInst::Min && in shouldExpandAtomicRMWInIR()
26603 AI->getOperation() != AtomicRMWInst::Max && in shouldExpandAtomicRMWInIR()
26604 AI->getOperation() != AtomicRMWInst::UMin && in shouldExpandAtomicRMWInIR()
26605 AI->getOperation() != AtomicRMWInst::UMax) { in shouldExpandAtomicRMWInIR()
26611 // At -O0, fast-regalloc cannot cope with the live vregs necessary to in shouldExpandAtomicRMWInIR()
26615 // succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if in shouldExpandAtomicRMWInIR()
26618 Subtarget->hasLSE()) in shouldExpandAtomicRMWInIR()
26628 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) in shouldExpandAtomicCmpXchgInIR()
26630 // At -O0, fast-regalloc cannot cope with the live vregs necessary to in shouldExpandAtomicCmpXchgInIR()
26634 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. in shouldExpandAtomicCmpXchgInIR()
26638 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand in shouldExpandAtomicCmpXchgInIR()
26640 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicCmpXchgInIR()
26650 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in emitLoadLinked()
26653 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd in emitLoadLinked()
26656 if (ValueTy->getPrimitiveSizeInBits() == 128) { in emitLoadLinked()
26671 Type *Tys[] = { Addr->getType() }; in emitLoadLinked()
26676 const DataLayout &DL = M->getDataLayout(); in emitLoadLinked()
26679 CI->addParamAttr( in emitLoadLinked()
26688 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in emitAtomicCmpXchgNoStoreLLBalance()
26695 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in emitStoreConditional()
26701 if (Val->getType()->getPrimitiveSizeInBits() == 128) { in emitStoreConditional()
26705 Type *Int64Ty = Type::getInt64Ty(M->getContext()); in emitStoreConditional()
26714 Type *Tys[] = { Addr->getType() }; in emitStoreConditional()
26717 const DataLayout &DL = M->getDataLayout(); in emitStoreConditional()
26718 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType())); in emitStoreConditional()
26723 Val, Stxr->getFunctionType()->getParamType(0)), in emitStoreConditional()
26725 CI->addParamAttr(1, Attribute::get(Builder.getContext(), in emitStoreConditional()
26726 Attribute::ElementType, Val->getType())); in emitStoreConditional()
26733 if (!Ty->isArrayTy()) { in functionArgumentNeedsConsecutiveRegisters()
26734 const TypeSize &TySize = Ty->getPrimitiveSizeInBits(); in functionArgumentNeedsConsecutiveRegisters()
26750 Module *M = IRB.GetInsertBlock()->getParent()->getParent(); in UseTlsOffset()
26763 if (Subtarget->isTargetAndroid()) in getIRStackGuard()
26768 if (Subtarget->isTargetFuchsia()) in getIRStackGuard()
26769 return UseTlsOffset(IRB, -0x10); in getIRStackGuard()
26776 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) { in insertSSPDeclarations()
26783 M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(), in insertSSPDeclarations()
26787 F->setCallingConv(CallingConv::Win64); in insertSSPDeclarations()
26788 F->addParamAttr(0, Attribute::AttrKind::InReg); in insertSSPDeclarations()
26797 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) in getSDagStackGuard()
26804 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) in getSSPStackGuardCheck()
26805 return M.getFunction(Subtarget->getSecurityCheckCookieName()); in getSSPStackGuardCheck()
26814 if (Subtarget->isTargetAndroid()) in getSafeStackPointerLocation()
26819 if (Subtarget->isTargetFuchsia()) in getSafeStackPointerLocation()
26820 return UseTlsOffset(IRB, -0x8); in getSafeStackPointerLocation()
26835 return Mask->getValue().isPowerOf2(); in isMaskAndCmp0FoldingBeneficial()
26855 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin()) in preferredShiftLegalizationStrategy()
26863 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>(); in initializeSplitCSR()
26864 AFI->setIsSplitCSR(true); in initializeSplitCSR()
26870 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); in insertCopiesSplitCSR()
26871 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); in insertCopiesSplitCSR()
26875 const TargetInstrInfo *TII = Subtarget->getInstrInfo(); in insertCopiesSplitCSR()
26876 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); in insertCopiesSplitCSR()
26877 MachineBasicBlock::iterator MBBI = Entry->begin(); in insertCopiesSplitCSR()
26887 Register NewVR = MRI->createVirtualRegister(RC); in insertCopiesSplitCSR()
26889 // FIXME: this currently does not emit CFI pseudo-instructions, it works in insertCopiesSplitCSR()
26890 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be in insertCopiesSplitCSR()
26892 // CFI pseudo-instructions. in insertCopiesSplitCSR()
26893 assert(Entry->getParent()->getFunction().hasFnAttribute( in insertCopiesSplitCSR()
26896 Entry->addLiveIn(*I); in insertCopiesSplitCSR()
26897 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) in insertCopiesSplitCSR()
26900 // Insert the copy-back instructions right before the terminator. in insertCopiesSplitCSR()
26902 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), in insertCopiesSplitCSR()
26903 TII->get(TargetOpcode::COPY), *I) in insertCopiesSplitCSR()
26913 // integer division, leaving the division as-is is a loss even in terms of in isIntDivCheap()
26921 // We want inc-of-add for scalars and sub-of-not for vectors. in preferIncOfAddToSubOfNot()
26929 if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16()) in shouldConvertFpToSat()
26940 assert(MBBI->isCall() && MBBI->getCFIType() && in EmitKCFICheck()
26943 switch (MBBI->getOpcode()) { in EmitKCFICheck()
26955 MachineOperand &Target = MBBI->getOperand(0); in EmitKCFICheck()
26959 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK)) in EmitKCFICheck()
26961 .addImm(MBBI->getCFIType()) in EmitKCFICheck()
26966 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint(); in enableAggressiveFMAFusion()
26971 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) in getVaListSizeInBits()
27029 if (GV.isThreadLocal() && Subtarget->isTargetMachO()) in shouldLocalize()
27046 auto APF = MI.getOperand(1).getFPImm()->getValueAPF(); in shouldLocalize()
27057 APInt Imm = CI->getValue(); in shouldLocalize()
27058 InstructionCost Cost = TTI->getIntImmCost( in shouldLocalize()
27059 Imm, CI->getType(), TargetTransformInfo::TCK_CodeSize); in shouldLocalize()
27068 --MaxUses; in shouldLocalize()
27089 if (Inst.getType()->isScalableTy()) { in fallBackToDAGISel()
27094 if (Inst.getOperand(i)->getType()->isScalableTy()) in fallBackToDAGISel()
27098 if (AI->getAllocatedType()->isScalableTy()) in fallBackToDAGISel()
27234 EVT MemVT = Load->getMemoryVT(); in LowerFixedLengthVectorLoadToSVE()
27244 LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg, in LowerFixedLengthVectorLoadToSVE()
27245 DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(), in LowerFixedLengthVectorLoadToSVE()
27246 Load->getAddressingMode(), Load->getExtensionType()); in LowerFixedLengthVectorLoadToSVE()
27249 if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) { in LowerFixedLengthVectorLoadToSVE()
27251 Load->getMemoryVT().getVectorElementType()); in LowerFixedLengthVectorLoadToSVE()
27292 SDValue Mask = Load->getMask(); in LowerFixedLengthVectorMLoadToSVE()
27296 assert(Load->getExtensionType() != ISD::NON_EXTLOAD && in LowerFixedLengthVectorMLoadToSVE()
27305 if (Load->getPassThru()->isUndef()) { in LowerFixedLengthVectorMLoadToSVE()
27313 if (isZerosVector(Load->getPassThru().getNode())) in LowerFixedLengthVectorMLoadToSVE()
27318 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), in LowerFixedLengthVectorMLoadToSVE()
27319 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(), in LowerFixedLengthVectorMLoadToSVE()
27320 Load->getAddressingMode(), Load->getExtensionType()); in LowerFixedLengthVectorMLoadToSVE()
27325 convertToScalableVector(DAG, ContainerVT, Load->getPassThru()); in LowerFixedLengthVectorMLoadToSVE()
27340 EVT VT = Store->getValue().getValueType(); in LowerFixedLengthVectorStoreToSVE()
27342 EVT MemVT = Store->getMemoryVT(); in LowerFixedLengthVectorStoreToSVE()
27345 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue()); in LowerFixedLengthVectorStoreToSVE()
27347 if (VT.isFloatingPoint() && Store->isTruncatingStore()) { in LowerFixedLengthVectorStoreToSVE()
27349 Store->getMemoryVT().getVectorElementType()); in LowerFixedLengthVectorStoreToSVE()
27362 return DAG.getMaskedStore(Store->getChain(), DL, NewValue, in LowerFixedLengthVectorStoreToSVE()
27363 Store->getBasePtr(), Store->getOffset(), Pg, MemVT, in LowerFixedLengthVectorStoreToSVE()
27364 Store->getMemOperand(), Store->getAddressingMode(), in LowerFixedLengthVectorStoreToSVE()
27365 Store->isTruncatingStore()); in LowerFixedLengthVectorStoreToSVE()
27373 EVT VT = Store->getValue().getValueType(); in LowerFixedLengthVectorMStoreToSVE()
27376 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue()); in LowerFixedLengthVectorMStoreToSVE()
27377 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG); in LowerFixedLengthVectorMStoreToSVE()
27380 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(), in LowerFixedLengthVectorMStoreToSVE()
27381 Mask, Store->getMemoryVT(), Store->getMemOperand(), in LowerFixedLengthVectorMStoreToSVE()
27382 Store->getAddressingMode(), Store->isTruncatingStore()); in LowerFixedLengthVectorMStoreToSVE()
27531 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0)); in LowerFixedLengthExtractVectorElt()
27544 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0)); in LowerFixedLengthInsertVectorElt()
27563 assert(isTypeLegal(VT) && "Expected only legal fixed-width types"); in LowerToPredicatedOp()
27568 for (const SDValue &V : Op->op_values()) { in LowerToPredicatedOp()
27575 EVT VTArg = VTNode->getVT().getVectorElementType(); in LowerToPredicatedOp()
27582 "Expected only legal fixed-width types"); in LowerToPredicatedOp()
27596 for (const SDValue &V : Op->op_values()) { in LowerToPredicatedOp()
27606 return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags()); in LowerToPredicatedOp()
27621 for (const SDValue &V : Op->op_values()) { in LowerToScalableOp()
27624 // Pass through non-vector operands. in LowerToScalableOp()
27721 /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors())) { in LowerReductionToSVE()
27753 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1)); in LowerFixedLengthVectorSelectToSVE()
27754 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2)); in LowerFixedLengthVectorSelectToSVE()
27812 unsigned NumOperands = Op->getNumOperands(); in LowerFixedLengthConcatVectorsToSVE()
27827 Op->getOperand(I), Op->getOperand(I + 1))); in LowerFixedLengthConcatVectorsToSVE()
27969 SDValue Chain = HG->getChain(); in LowerVECTOR_HISTOGRAM()
27970 SDValue Inc = HG->getInc(); in LowerVECTOR_HISTOGRAM()
27971 SDValue Mask = HG->getMask(); in LowerVECTOR_HISTOGRAM()
27972 SDValue Ptr = HG->getBasePtr(); in LowerVECTOR_HISTOGRAM()
27973 SDValue Index = HG->getIndex(); in LowerVECTOR_HISTOGRAM()
27974 SDValue Scale = HG->getScale(); in LowerVECTOR_HISTOGRAM()
27975 SDValue IntID = HG->getIntID(); in LowerVECTOR_HISTOGRAM()
27980 assert(CID->getZExtValue() == Intrinsic::experimental_vector_histogram_add && in LowerVECTOR_HISTOGRAM()
27992 MachineMemOperand *MMO = HG->getMemOperand(); in LowerVECTOR_HISTOGRAM()
27995 MMO->getPointerInfo(), MachineMemOperand::MOLoad, MMO->getSize(), in LowerVECTOR_HISTOGRAM()
27996 MMO->getAlign(), MMO->getAAInfo()); in LowerVECTOR_HISTOGRAM()
27997 ISD::MemIndexType IndexType = HG->getIndexType(); in LowerVECTOR_HISTOGRAM()
28013 MMO->getPointerInfo(), MachineMemOperand::MOStore, MMO->getSize(), in LowerVECTOR_HISTOGRAM()
28014 MMO->getAlign(), MMO->getAAInfo()); in LowerVECTOR_HISTOGRAM()
28087 uint64_t MaxOffset = APInt(BitsPerElt, -1, false).getZExtValue(); in GenerateFixedLengthSVETBL()
28099 // Bail out for 8-bits element types, because with 2048-bit SVE register in GenerateFixedLengthSVETBL()
28110 // is not known at compile-time, we need to maintain a mask with 'VL' values in GenerateFixedLengthSVETBL()
28114 Index += IndexLen - ElementsPerVectorReg; in GenerateFixedLengthSVETBL()
28116 Index = Index - ElementsPerVectorReg; in GenerateFixedLengthSVETBL()
28121 // For 8-bit elements and 1024-bit SVE registers and MaxOffset equals in GenerateFixedLengthSVETBL()
28129 // Choosing an out-of-range index leads to the lane being zeroed vs zero in GenerateFixedLengthSVETBL()
28131 // index elements. For i8 elements an out-of-range index could be a valid in GenerateFixedLengthSVETBL()
28132 // for 2048-bit vector register size. in GenerateFixedLengthSVETBL()
28133 for (unsigned i = 0; i < IndexLen - ElementsPerVectorReg; ++i) { in GenerateFixedLengthSVETBL()
28183 auto ShuffleMask = SVN->getMask(); in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28193 auto MinLegalExtractEltScalarTy = [](EVT ScalarTy) -> EVT { in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28199 if (SVN->isSplat()) { in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28200 unsigned Lane = std::max(0, SVN->getSplatIndex()); in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28211 Imm == VT.getVectorNumElements() - 1) { in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28217 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64)); in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28242 if (Subtarget->hasSVE2p1() && EltSize == 64 && in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28284 // are actually sub-vectors of a larger SVE register. When mapping in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28292 // when converting from fixed-length to scalable vector types (i.e. the start in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28294 unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28295 unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits(); in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28327 // 128-bits. in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28328 if (MinSVESize || !Subtarget->isNeonAvailable()) in LowerFixedLengthVECTOR_SHUFFLEToSVE()
28396 SDValue ShiftR = Op->getOperand(0); in SimplifyDemandedBitsForTargetNode()
28397 if (ShiftR->getOpcode() != AArch64ISD::VLSHR) in SimplifyDemandedBitsForTargetNode()
28403 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1); in SimplifyDemandedBitsForTargetNode()
28404 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1); in SimplifyDemandedBitsForTargetNode()
28421 // used - simplify to just Val. in SimplifyDemandedBitsForTargetNode()
28422 return TLO.CombineTo(Op, ShiftR->getOperand(0)); in SimplifyDemandedBitsForTargetNode()
28430 uint64_t BitsToClear = Op->getConstantOperandVal(1) in SimplifyDemandedBitsForTargetNode()
28431 << Op->getConstantOperandVal(2); in SimplifyDemandedBitsForTargetNode()
28444 unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits(); in SimplifyDemandedBitsForTargetNode()
28456 Known.Zero.setHighBits(BitWidth - RequiredBits); in SimplifyDemandedBitsForTargetNode()
28475 return Subtarget->hasSVE() || Subtarget->hasSVE2() || in isComplexDeinterleavingSupported()
28476 Subtarget->hasComplxNum(); in isComplexDeinterleavingSupported()
28487 if (!VTy->isScalableTy() && !Subtarget->hasComplxNum()) in isComplexDeinterleavingOperationSupported()
28490 auto *ScalarTy = VTy->getScalarType(); in isComplexDeinterleavingOperationSupported()
28491 unsigned NumElements = VTy->getElementCount().getKnownMinValue(); in isComplexDeinterleavingOperationSupported()
28495 // power-of-2 size, as we later split them into the smallest supported size in isComplexDeinterleavingOperationSupported()
28497 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements; in isComplexDeinterleavingOperationSupported()
28498 if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) || in isComplexDeinterleavingOperationSupported()
28502 if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) { in isComplexDeinterleavingOperationSupported()
28503 unsigned ScalarWidth = ScalarTy->getScalarSizeInBits(); in isComplexDeinterleavingOperationSupported()
28507 return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) || in isComplexDeinterleavingOperationSupported()
28508 ScalarTy->isFloatTy() || ScalarTy->isDoubleTy(); in isComplexDeinterleavingOperationSupported()
28515 VectorType *Ty = cast<VectorType>(InputA->getType()); in createComplexDeinterleavingIR()
28516 bool IsScalable = Ty->isScalableTy(); in createComplexDeinterleavingIR()
28517 bool IsInt = Ty->getElementType()->isIntegerTy(); in createComplexDeinterleavingIR()
28520 Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue(); in createComplexDeinterleavingIR()
28526 int Stride = Ty->getElementCount().getKnownMinValue() / 2; in createComplexDeinterleavingIR()
28561 auto *Mask = B.getAllOnesMask(Ty->getElementCount()); in createComplexDeinterleavingIR()
28586 auto *Mask = B.getAllOnesMask(Ty->getElementCount()); in createComplexDeinterleavingIR()
28610 unsigned Opc = N->getOpcode(); in preferScalarizeSplat()
28612 if (any_of(N->uses(), in preferScalarizeSplat()
28613 [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; })) in preferScalarizeSplat()
28620 return Subtarget->getMinimumJumpTableEntries(); in getMinimumJumpTableEntries()
28628 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors()) in getRegisterTypeForCallingConv()
28643 if (!NonUnitFixedLengthVector || !Subtarget->useSVEForFixedLengthVectors()) in getNumRegistersForCallingConv()
28662 assert(Subtarget->useSVEForFixedLengthVectors() && "Unexpected mode!"); in getVectorTypeBreakdownForCallingConv()
28721 return !Subtarget->isTargetWindows() && in hasInlineStackProbe()
28722 MF.getInfo<AArch64FunctionInfo>()->hasStackProbing(); in hasInlineStackProbe()
28727 switch (N->getOpcode()) { in verifyTargetSDNode()
28734 assert(N->getNumValues() == 1 && "Expected one result!"); in verifyTargetSDNode()
28735 assert(N->getNumOperands() == 1 && "Expected one operand!"); in verifyTargetSDNode()
28736 EVT VT = N->getValueType(0); in verifyTargetSDNode()
28737 EVT OpVT = N->getOperand(0).getValueType(); in verifyTargetSDNode()
28753 assert(N->getNumValues() == 1 && "Expected one result!"); in verifyTargetSDNode()
28754 assert(N->getNumOperands() == 2 && "Expected two operands!"); in verifyTargetSDNode()
28755 EVT VT = N->getValueType(0); in verifyTargetSDNode()
28756 EVT Op0VT = N->getOperand(0).getValueType(); in verifyTargetSDNode()
28757 EVT Op1VT = N->getOperand(1).getValueType(); in verifyTargetSDNode()