Lines Matching +full:fsin +full:- +full:output
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
106 #define DEBUG_TYPE "ppc-lowering"
108 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117 static cl::opt<bool> DisableSCO("disable-ppc-sco",
120 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
132 "disable-auto-paired-vec-st",
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
161 // A faster local-[exec|dynamic] TLS access sequence (enabled with the
162 // -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. in PPCTargetLowering()
232 // PowerPC has pre-inc load and store's. in PPCTargetLowering()
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on in PPCTargetLowering()
358 // Handle constrained floating-point operations of scalar. in PPCTargetLowering()
399 setOperationAction(ISD::FSIN , MVT::f64, Expand); in PPCTargetLowering()
404 setOperationAction(ISD::FSIN , MVT::f32, Expand); in PPCTargetLowering()
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag in PPCTargetLowering()
413 setOperationAction(ISD::FSIN , MVT::f64, Custom); in PPCTargetLowering()
419 setOperationAction(ISD::FSIN , MVT::f32, Custom); in PPCTargetLowering()
537 // SPE has built-in conversions in PPCTargetLowering()
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to in PPCTargetLowering()
587 // support continuation, user-level threading, and etc.. As a result, no in PPCTargetLowering()
590 // LLVM/Clang supports zero-cost DWARF exception handling. in PPCTargetLowering()
618 // VAARG always uses double-word chunks, so promote anything smaller. in PPCTargetLowering()
629 // VAARG is custom lowered with the 32-bit SVR4 ABI. in PPCTargetLowering()
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI. in PPCTargetLowering()
659 // To handle counter-based loop conditions. in PPCTargetLowering()
700 // This is just the low 32 bits of a (signed) fp->i64 conversion. in PPCTargetLowering()
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations. in PPCTargetLowering()
744 // 64-bit PowerPC implementations can support i64 types directly in PPCTargetLowering()
748 // 64-bit PowerPC wants to expand i128 shifts itself. in PPCTargetLowering()
753 // 32-bit PowerPC wants to expand i64 shifts itself. in PPCTargetLowering()
829 // We promote all non-typed operations to v4i32. in PPCTargetLowering()
861 setOperationAction(ISD::FSIN, VT, Expand); in PPCTargetLowering()
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register in PPCTargetLowering()
994 // Altivec does not contain unordered floating-point compare instructions in PPCTargetLowering()
1020 // so we can only code-gen them with unsafe math. in PPCTargetLowering()
1089 // VSX v2i64 only supports non-arithmetic operations. in PPCTargetLowering()
1145 // Handle constrained floating-point operations of vector. in PPCTargetLowering()
1190 setOperationAction(ISD::FSIN, MVT::f128, Expand); in PPCTargetLowering()
1245 // Handle constrained floating-point operations of fp128 in PPCTargetLowering()
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE in PPCTargetLowering()
1389 // We have target-specific dag combine patterns for the following nodes: in PPCTargetLowering()
1451 // Re-evaluate this value on future HWs that can do better with mtctr. in PPCTargetLowering()
1597 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1604 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256) in getMaxByValAlign()
1606 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 && in getMaxByValAlign()
1611 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); in getMaxByValAlign()
1615 for (auto *EltTy : STy->elements()) { in getMaxByValAlign()
1626 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1656 if (VTy->getScalarType()->isIntegerTy()) { in shallExtractConstSplatVectorElementToStore()
1853 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); in enableAggressiveFMAFusion()
1857 //===----------------------------------------------------------------------===//
1859 //===----------------------------------------------------------------------===//
1861 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1864 return CFP->getValueAPF().isZero(); in isFloatingPointZero()
1868 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) in isFloatingPointZero()
1869 return CFP->getValueAPF().isZero(); in isFloatingPointZero()
1874 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1880 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1882 /// The ShuffleKind distinguishes between big-endian operations with
1883 /// two different inputs (0), either-endian operations with two identical
1884 /// inputs (1), and little-endian operations with two different inputs (2).
1893 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) in isVPKUHUMShuffleMask()
1899 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) in isVPKUHUMShuffleMask()
1904 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || in isVPKUHUMShuffleMask()
1905 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) in isVPKUHUMShuffleMask()
1911 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1913 /// The ShuffleKind distinguishes between big-endian operations with
1914 /// two different inputs (0), either-endian operations with two identical
1915 /// inputs (1), and little-endian operations with two different inputs (2).
1924 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || in isVPKUWUMShuffleMask()
1925 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) in isVPKUWUMShuffleMask()
1931 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || in isVPKUWUMShuffleMask()
1932 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) in isVPKUWUMShuffleMask()
1937 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || in isVPKUWUMShuffleMask()
1938 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || in isVPKUWUMShuffleMask()
1939 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || in isVPKUWUMShuffleMask()
1940 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) in isVPKUWUMShuffleMask()
1946 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1950 /// The ShuffleKind distinguishes between big-endian operations with
1951 /// two different inputs (0), either-endian operations with two identical
1952 /// inputs (1), and little-endian operations with two different inputs (2).
1965 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || in isVPKUDUMShuffleMask()
1966 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || in isVPKUDUMShuffleMask()
1967 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || in isVPKUDUMShuffleMask()
1968 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) in isVPKUDUMShuffleMask()
1974 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || in isVPKUDUMShuffleMask()
1975 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || in isVPKUDUMShuffleMask()
1976 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || in isVPKUDUMShuffleMask()
1977 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) in isVPKUDUMShuffleMask()
1982 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || in isVPKUDUMShuffleMask()
1983 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || in isVPKUDUMShuffleMask()
1984 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || in isVPKUDUMShuffleMask()
1985 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || in isVPKUDUMShuffleMask()
1986 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || in isVPKUDUMShuffleMask()
1987 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || in isVPKUDUMShuffleMask()
1988 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || in isVPKUDUMShuffleMask()
1989 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) in isVPKUDUMShuffleMask()
1995 /// isVMerge - Common function, used to match vmrg* shuffles.
1999 if (N->getValueType(0) != MVT::v16i8) in isVMerge()
2006 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), in isVMerge()
2008 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), in isVMerge()
2015 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2017 /// The ShuffleKind distinguishes between big-endian merges with two
2018 /// different inputs (0), either-endian merges with two identical inputs (1),
2019 /// and little-endian merges with two different inputs (2). For the latter,
2040 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2042 /// The ShuffleKind distinguishes between big-endian merges with two
2043 /// different inputs (0), either-endian merges with two identical inputs (1),
2044 /// and little-endian merges with two different inputs (2). For the latter,
2071 * - Little Endian:
2072 * - Use offset of 0 to check for odd elements
2073 * - Use offset of 4 to check for even elements
2074 * - Big Endian:
2075 * - Use offset of 0 to check for even elements
2076 * - Use offset of 4 to check for odd elements
2079 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2080 * Targeting your applications - what little endian and big endian IBM XL C/C++
2085 * numbered in array-access order, starting with the first vector. These vectors
2088 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2093 * - If the instruction uses the same vector for both inputs, the range of the
2096 * - If the instruction has two different vectors then the range of the
2098 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2109 if (N->getValueType(0) != MVT::v16i8) in isVMerge()
2114 if (!isConstantOrUndef(N->getMaskElt(i*4+j), in isVMerge()
2116 !isConstantOrUndef(N->getMaskElt(i*4+j+8), in isVMerge()
2129 * - 0 = big-endian merge with two different inputs;
2130 * - 1 = either-endian merge with two identical inputs;
2131 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2132 * little-endian merges).
2159 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2160 /// amount, otherwise return -1.
2161 /// The ShuffleKind distinguishes between big-endian operations with two
2162 /// different inputs (0), either-endian operations with two identical inputs
2163 /// (1), and little-endian operations with two different inputs (2). For the
2167 if (N->getValueType(0) != MVT::v16i8) in isVSLDOIShuffleMask()
2168 return -1; in isVSLDOIShuffleMask()
2172 // Find the first non-undef value in the shuffle mask. in isVSLDOIShuffleMask()
2174 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) in isVSLDOIShuffleMask()
2177 if (i == 16) return -1; // all undef. in isVSLDOIShuffleMask()
2181 unsigned ShiftAmt = SVOp->getMaskElt(i); in isVSLDOIShuffleMask()
2182 if (ShiftAmt < i) return -1; in isVSLDOIShuffleMask()
2184 ShiftAmt -= i; in isVSLDOIShuffleMask()
2190 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) in isVSLDOIShuffleMask()
2191 return -1; in isVSLDOIShuffleMask()
2195 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) in isVSLDOIShuffleMask()
2196 return -1; in isVSLDOIShuffleMask()
2198 return -1; in isVSLDOIShuffleMask()
2201 ShiftAmt = 16 - ShiftAmt; in isVSLDOIShuffleMask()
2206 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2210 EVT VT = N->getValueType(0); in isSplatShuffleMask()
2212 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1); in isSplatShuffleMask()
2219 if (N->getMaskElt(0) % EltSize != 0) in isSplatShuffleMask()
2224 unsigned ElementBase = N->getMaskElt(0); in isSplatShuffleMask()
2230 // Check that the indices are consecutive, in the case of a multi-byte element in isSplatShuffleMask()
2233 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) in isSplatShuffleMask()
2237 if (N->getMaskElt(i) < 0) continue; in isSplatShuffleMask()
2239 if (N->getMaskElt(i+j) != N->getMaskElt(j)) in isSplatShuffleMask()
2252 /// the mask is in increasing/decreasing order then it is 1/-1.
2258 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); in isNByteElemShuffleMask()
2263 MaskVal[0] = N->getMaskElt(i * Width); in isNByteElemShuffleMask()
2266 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { in isNByteElemShuffleMask()
2271 MaskVal[j] = N->getMaskElt(i * Width + j); in isNByteElemShuffleMask()
2272 if (MaskVal[j] != MaskVal[j-1] + StepLen) { in isNByteElemShuffleMask()
2287 unsigned M0 = N->getMaskElt(0) / 4; in isXXINSERTWMask()
2288 unsigned M1 = N->getMaskElt(4) / 4; in isXXINSERTWMask()
2289 unsigned M2 = N->getMaskElt(8) / 4; in isXXINSERTWMask()
2290 unsigned M3 = N->getMaskElt(12) / 4; in isXXINSERTWMask()
2331 if (N->getOperand(1).isUndef()) { in isXXINSERTWMask()
2358 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); in isXXSLDWIShuffleMask()
2364 unsigned M0 = N->getMaskElt(0) / 4; in isXXSLDWIShuffleMask()
2365 unsigned M1 = N->getMaskElt(4) / 4; in isXXSLDWIShuffleMask()
2366 unsigned M2 = N->getMaskElt(8) / 4; in isXXSLDWIShuffleMask()
2367 unsigned M3 = N->getMaskElt(12) / 4; in isXXSLDWIShuffleMask()
2371 if (N->getOperand(1).isUndef()) { in isXXSLDWIShuffleMask()
2376 ShiftElts = IsLE ? (4 - M0) % 4 : M0; in isXXSLDWIShuffleMask()
2391 ShiftElts = (8 - M0) % 8; in isXXSLDWIShuffleMask()
2395 // (or if we're shifting by 4 - thereby simply swapping the vectors). in isXXSLDWIShuffleMask()
2397 ShiftElts = (4 - M0) % 4; in isXXSLDWIShuffleMask()
2411 ShiftElts = M0 - 4; in isXXSLDWIShuffleMask()
2419 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); in isXXBRShuffleMaskHelper()
2421 if (!isNByteElemShuffleMask(N, Width, -1)) in isXXBRShuffleMaskHelper()
2425 if (N->getMaskElt(i) != i + Width - 1) in isXXBRShuffleMaskHelper()
2452 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2457 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); in isXXPERMDIShuffleMask()
2463 unsigned M0 = N->getMaskElt(0) / 8; in isXXPERMDIShuffleMask()
2464 unsigned M1 = N->getMaskElt(8) / 8; in isXXPERMDIShuffleMask()
2469 if (N->getOperand(1).isUndef()) { in isXXPERMDIShuffleMask()
2508 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2509 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2515 EVT VT = SVOp->getValueType(0); in getSplatIdxForPPCMnemonics()
2518 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0) in getSplatIdxForPPCMnemonics()
2519 : SVOp->getMaskElt(0); in getSplatIdxForPPCMnemonics()
2522 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); in getSplatIdxForPPCMnemonics()
2524 return SVOp->getMaskElt(0) / EltSize; in getSplatIdxForPPCMnemonics()
2527 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2530 /// bytes of each element [124] -> [bhw].
2538 unsigned EltSize = 16/N->getNumOperands(); in get_VSPLTI_elt()
2545 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { in get_VSPLTI_elt()
2546 if (N->getOperand(i).isUndef()) continue; in get_VSPLTI_elt()
2548 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); in get_VSPLTI_elt()
2550 if (!UniquedVals[i&(Multiple-1)].getNode()) in get_VSPLTI_elt()
2551 UniquedVals[i&(Multiple-1)] = N->getOperand(i); in get_VSPLTI_elt()
2552 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) in get_VSPLTI_elt()
2556 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains in get_VSPLTI_elt()
2560 // Check to see if all of the leading entries are either 0 or -1. If in get_VSPLTI_elt()
2564 for (unsigned i = 0; i != Multiple-1; ++i) { in get_VSPLTI_elt()
2572 if (!UniquedVals[Multiple-1].getNode()) in get_VSPLTI_elt()
2574 int Val = UniquedVals[Multiple - 1]->getAsZExtVal(); in get_VSPLTI_elt()
2575 if (Val < 16) // 0,0,0,4 -> vspltisw(4) in get_VSPLTI_elt()
2579 if (!UniquedVals[Multiple-1].getNode()) in get_VSPLTI_elt()
2580 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef in get_VSPLTI_elt()
2581 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); in get_VSPLTI_elt()
2582 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) in get_VSPLTI_elt()
2589 // Check to see if this buildvec has a single non-undef value in its elements. in get_VSPLTI_elt()
2590 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { in get_VSPLTI_elt()
2591 if (N->getOperand(i).isUndef()) continue; in get_VSPLTI_elt()
2593 OpVal = N->getOperand(i); in get_VSPLTI_elt()
2594 else if (OpVal != N->getOperand(i)) in get_VSPLTI_elt()
2603 Value = CN->getZExtValue(); in get_VSPLTI_elt()
2605 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); in get_VSPLTI_elt()
2606 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat()); in get_VSPLTI_elt()
2631 //===----------------------------------------------------------------------===//
2633 //===----------------------------------------------------------------------===//
2635 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2636 /// or 64-bit immediate, and if the value can be accurately represented as a
2637 /// sign extension from a 16-bit value. If so, this returns true and the
2643 Imm = (int16_t)N->getAsZExtVal(); in isIntS16Immediate()
2644 if (N->getValueType(0) == MVT::i32) in isIntS16Immediate()
2645 return Imm == (int32_t)N->getAsZExtVal(); in isIntS16Immediate()
2647 return Imm == (int64_t)N->getAsZExtVal(); in isIntS16Immediate()
2668 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2673 for (SDNode *U : N->uses()) { in SelectAddressEVXRegReg()
2675 if (Memop->getMemoryVT() == MVT::f64) { in SelectAddressEVXRegReg()
2685 /// isIntS34Immediate - This method tests if value of node given can be
2686 /// accurately represented as a sign extension from a 34-bit value. If so,
2692 Imm = (int64_t)N->getAsZExtVal(); in isIntS34Immediate()
2699 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2702 /// non-zero and N can be represented by a base register plus a signed 16-bit
2716 // SPE load/store can only handle 8-bit offsets. in SelectAddressRegReg()
2754 // less than a 4-byte alignment, then the frame-index elimination may need to
2759 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2769 // we find such a test case. The reason why this is so test-case driven is in fixupFuncForFI()
2771 // register scavenger) on not-really-valid inputs. For example, if we have: in fixupFuncForFI()
2777 // instruction-selected initially, and the problem this 'fixup' is preventing in fixupFuncForFI()
2789 FuncInfo->setHasNonRISpills(); in fixupFuncForFI()
2793 /// a signed 16-bit displacement [r+imm], and if it is not better
2794 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2817 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2818 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2849 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2850 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2861 // If this address fits entirely in a 16-bit sext immediate field, codegen in SelectAddressRegImm()
2866 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); in SelectAddressRegImm()
2868 CN->getValueType(0)); in SelectAddressRegImm()
2872 // Handle 32-bit sext immediates with LIS + addr mode. in SelectAddressRegImm()
2873 if ((CN->getValueType(0) == MVT::i32 || in SelectAddressRegImm()
2874 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && in SelectAddressRegImm()
2876 isAligned(*EncodingAlignment, CN->getZExtValue()))) { in SelectAddressRegImm()
2877 int Addr = (int)CN->getZExtValue(); in SelectAddressRegImm()
2882 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, in SelectAddressRegImm()
2884 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; in SelectAddressRegImm()
2885 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); in SelectAddressRegImm()
2892 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2893 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); in SelectAddressRegImm()
2899 /// Similar to the 16-bit case but for instructions that take a 34-bit
2904 // Only on 64-bit targets. in SelectAddressRegImm34()
2916 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectAddressRegImm34()
2932 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectAddressRegImm34()
2939 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const. in SelectAddressRegImm34()
2948 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2963 // value and a 16-bit signed constant and both have a single use. in SelectAddressRegRegOnly()
2982 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags())); in isValidPCRelNode()
3006 // keep it as a scalar load -> direct move pattern to prevent multiple in usePartialVectorLoads()
3012 EVT MemVT = LD->getMemoryVT(); in usePartialVectorLoads()
3035 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); in usePartialVectorLoads()
3038 UI->getOpcode() != ISD::SCALAR_TO_VECTOR && in usePartialVectorLoads()
3039 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED) in usePartialVectorLoads()
3045 /// getPreIndexedAddressParts - returns true by value, base pointer and
3047 /// can be legally represented as pre-indexed load / store address.
3059 Ptr = LD->getBasePtr(); in getPreIndexedAddressParts()
3060 VT = LD->getMemoryVT(); in getPreIndexedAddressParts()
3061 Alignment = LD->getAlign(); in getPreIndexedAddressParts()
3063 Ptr = ST->getBasePtr(); in getPreIndexedAddressParts()
3064 VT = ST->getMemoryVT(); in getPreIndexedAddressParts()
3065 Alignment = ST->getAlign(); in getPreIndexedAddressParts()
3070 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector in getPreIndexedAddressParts()
3082 // Common code will reject creating a pre-inc form if the base pointer in getPreIndexedAddressParts()
3091 SDValue Val = cast<StoreSDNode>(N)->getValue(); in getPreIndexedAddressParts()
3092 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) in getPreIndexedAddressParts()
3108 // LDU/STU need an address with at least 4-byte alignment. in getPreIndexedAddressParts()
3119 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && in getPreIndexedAddressParts()
3120 LD->getExtensionType() == ISD::SEXTLOAD && in getPreIndexedAddressParts()
3129 //===----------------------------------------------------------------------===//
3131 //===----------------------------------------------------------------------===//
3162 // Generate non-pic code that has direct accesses to the constant pool. in LowerLabelRef()
3169 FuncInfo->setUsesTOCBasePtr(); in setUsesTOCBasePtr()
3195 const Constant *C = CP->getConstVal(); in LowerConstantPool()
3197 // 64-bit SVR4 ABI and AIX ABI code are always position-independent. in LowerConstantPool()
3204 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG); in LowerConstantPool()
3208 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0); in LowerConstantPool()
3218 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG); in LowerConstantPool()
3223 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag); in LowerConstantPool()
3225 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag); in LowerConstantPool()
3229 // For 64-bit PowerPC, prefer the more compact relative encodings.
3274 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); in getPICJumpTableRelocBaseExpr()
3287 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG); in LowerJumpTable()
3292 // 64-bit SVR4 ABI and AIX ABI code are always position-independent. in LowerJumpTable()
3296 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); in LowerJumpTable()
3305 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, in LowerJumpTable()
3310 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); in LowerJumpTable()
3311 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); in LowerJumpTable()
3319 const BlockAddress *BA = BASDN->getBlockAddress(); in LowerBlockAddress()
3325 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(), in LowerBlockAddress()
3331 // 64-bit SVR4 ABI and AIX ABI code are always position-independent. in LowerBlockAddress()
3335 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); in LowerBlockAddress()
3339 // 32-bit position-independent ELF stores the BlockAddress in the .got. in LowerBlockAddress()
3343 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset())); in LowerBlockAddress()
3361 /// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3367 // (1) Use initial-exec for single TLS var references within current function. in updateForAIXShLibTLSModelOpt()
3368 // (2) Use local-dynamic for multiple TLS var references within current in updateForAIXShLibTLSModelOpt()
3372 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) { in updateForAIXShLibTLSModelOpt()
3380 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end(); in updateForAIXShLibTLSModelOpt()
3382 if (II->getOpcode() == Instruction::Call) in updateForAIXShLibTLSModelOpt()
3384 if (Function *CF = CI->getCalledFunction()) in updateForAIXShLibTLSModelOpt()
3385 if (CF->isDeclaration() && in updateForAIXShLibTLSModelOpt()
3386 CF->getIntrinsicID() == Intrinsic::threadlocal_address) in updateForAIXShLibTLSModelOpt()
3388 dyn_cast<GlobalValue>(II->getOperand(0))) { in updateForAIXShLibTLSModelOpt()
3397 FuncInfo->setAIXFuncUseTLSIEForLD(); in updateForAIXShLibTLSModelOpt()
3398 FuncInfo->setAIXFuncTLSModelOptInitDone(); in updateForAIXShLibTLSModelOpt()
3401 if (FuncInfo->isAIXFuncUseTLSIEForLD()) { in updateForAIXShLibTLSModelOpt()
3404 << " function is using the TLS-IE model for TLS-LD access.\n"); in updateForAIXShLibTLSModelOpt()
3417 const GlobalValue *GV = GA->getGlobal(); in LowerGlobalTLSAddressAIX()
3437 if (GVar->hasAttribute("aix-small-tls")) in LowerGlobalTLSAddressAIX()
3441 // For local-exec and initial-exec on AIX (64-bit), the sequence generated in LowerGlobalTLSAddressAIX()
3449 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls" in LowerGlobalTLSAddressAIX()
3451 // local-exec TLS variables where the offset from the TLS base is encoded in LowerGlobalTLSAddressAIX()
3454 // We only utilize the faster local-exec access sequence when the TLS in LowerGlobalTLSAddressAIX()
3459 Type *GVType = GV->getValueType(); in LowerGlobalTLSAddressAIX()
3460 if (GVType->isSized() && !GVType->isEmptyTy() && in LowerGlobalTLSAddressAIX()
3461 GV->getDataLayout().getTypeAllocSize(GVType) <= in LowerGlobalTLSAddressAIX()
3466 // For local-exec and initial-exec on AIX (32-bit), the sequence generated in LowerGlobalTLSAddressAIX()
3475 // We do not implement the 32-bit version of the faster access sequence in LowerGlobalTLSAddressAIX()
3476 // for local-exec that is controlled by the -maix-small-local-exec-tls in LowerGlobalTLSAddressAIX()
3477 // option, or the "aix-small-tls" global variable attribute. in LowerGlobalTLSAddressAIX()
3479 report_fatal_error("The small-local-exec TLS access sequence is " in LowerGlobalTLSAddressAIX()
3480 "currently only supported on AIX (64-bit mode)."); in LowerGlobalTLSAddressAIX()
3488 // We do not implement the 32-bit version of the faster access sequence in LowerGlobalTLSAddressAIX()
3489 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls. in LowerGlobalTLSAddressAIX()
3491 report_fatal_error("The small-local-dynamic TLS access sequence is " in LowerGlobalTLSAddressAIX()
3492 "currently only supported on AIX (64-bit mode)."); in LowerGlobalTLSAddressAIX()
3494 // For local-dynamic on AIX, we need to generate one TOC entry for each in LowerGlobalTLSAddressAIX()
3495 // variable offset, and a single module-handle TOC entry for the entire in LowerGlobalTLSAddressAIX()
3504 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal( in LowerGlobalTLSAddressAIX()
3506 TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel); in LowerGlobalTLSAddressAIX()
3514 // With the -maix-small-local-dynamic-tls option, produce a faster access in LowerGlobalTLSAddressAIX()
3515 // sequence for local-dynamic TLS variables where the offset from the in LowerGlobalTLSAddressAIX()
3516 // module-handle is encoded as an immediate operand. in LowerGlobalTLSAddressAIX()
3518 // We only utilize the faster local-dynamic access sequence when the TLS in LowerGlobalTLSAddressAIX()
3522 Type *GVType = GV->getValueType(); in LowerGlobalTLSAddressAIX()
3523 if (GVType->isSized() && !GVType->isEmptyTy() && in LowerGlobalTLSAddressAIX()
3524 GV->getDataLayout().getTypeAllocSize(GVType) <= in LowerGlobalTLSAddressAIX()
3533 // If Local- or Initial-exec or Local-dynamic is not possible or specified, in LowerGlobalTLSAddressAIX()
3534 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We in LowerGlobalTLSAddressAIX()
3560 const GlobalValue *GV = GA->getGlobal(); in LowerGlobalTLSAddressLinux()
3564 PICLevel::Level picLevel = M->getPICLevel(); in LowerGlobalTLSAddressLinux()
3682 const GlobalValue *GV = GSDN->getGlobal(); in LowerGlobalAddress()
3684 // 64-bit SVR4 ABI & AIX ABI code is always position-independent. in LowerGlobalAddress()
3690 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(), in LowerGlobalAddress()
3697 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(), in LowerGlobalAddress()
3703 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); in LowerGlobalAddress()
3713 GSDN->getOffset(), in LowerGlobalAddress()
3719 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); in LowerGlobalAddress()
3721 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); in LowerGlobalAddress()
3727 bool IsStrict = Op->isStrictFPOpcode(); in LowerSETCC()
3729 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); in LowerSETCC()
3741 Op->getOpcode() == ISD::STRICT_FSETCCS); in LowerSETCC()
3782 // Leave comparisons against 0 and -1 alone for now, since they're usually in LowerSETCC()
3785 if (C->isAllOnes() || C->isZero()) in LowerSETCC()
3793 // the result to other bit-twiddling opportunities. in LowerSETCC()
3804 EVT VT = Node->getValueType(0); in LowerVAARG()
3806 SDValue InChain = Node->getOperand(0); in LowerVAARG()
3807 SDValue VAListPtr = Node->getOperand(1); in LowerVAARG()
3808 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); in LowerVAARG()
3936 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) in LowerINLINEASM()
3937 --NumOps; in LowerINLINEASM()
3956 for (; NumVals; --NumVals, ++i) { in LowerINLINEASM()
3957 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg(); in LowerINLINEASM()
4020 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerVASTART()
4021 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVASTART()
4026 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. in LowerVASTART()
4050 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); in LowerVASTART()
4051 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); in LowerVASTART()
4052 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), in LowerVASTART()
4054 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), in LowerVASTART()
4060 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; in LowerVASTART()
4066 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVASTART()
4094 /// FPR - The set of FP registers that should be allocated for arguments
4100 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
4111 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in CalculateStackSlotSize()
4116 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
4156 /// CalculateStackSlotUsed - Return whether this argument will use its
4172 // use memory (this check also catches zero-sized arguments). in CalculateStackSlotUsed()
4179 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in CalculateStackSlotUsed()
4190 --AvailableFPRs; in CalculateStackSlotUsed()
4198 --AvailableVRs; in CalculateStackSlotUsed()
4206 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
4210 return alignTo(NumBytes, Lowering->getStackAlign()); in EnsureStackAlignment()
4233 // 32-bit SVR4 ABI Stack Frame Layout: in LowerFormalArguments_32SVR4()
4234 // +-----------------------------------+ in LowerFormalArguments_32SVR4()
4235 // +--> | Back chain | in LowerFormalArguments_32SVR4()
4236 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4237 // | | Floating-point register save area | in LowerFormalArguments_32SVR4()
4238 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4240 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4242 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4244 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4246 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4248 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4250 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4252 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4254 // | +-----------------------------------+ in LowerFormalArguments_32SVR4()
4255 // SP--> +--- | Back chain | in LowerFormalArguments_32SVR4()
4256 // +-----------------------------------+ in LowerFormalArguments_32SVR4()
4278 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in LowerFormalArguments_32SVR4()
4364 ArgOffset += ArgSize - ObjSize; in LowerFormalArguments_32SVR4()
4396 FuncInfo->setMinReservedArea(MinReservedArea); in LowerFormalArguments_32SVR4()
4418 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); in LowerFormalArguments_32SVR4()
4419 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); in LowerFormalArguments_32SVR4()
4425 FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject( in LowerFormalArguments_32SVR4()
4428 FuncInfo->setVarArgsFrameIndex( in LowerFormalArguments_32SVR4()
4430 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerFormalArguments_32SVR4()
4436 // Get an existing live-in vreg, or add a new one. in LowerFormalArguments_32SVR4()
4450 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 in LowerFormalArguments_32SVR4()
4455 // Get an existing live-in vreg, or add a new one. in LowerFormalArguments_32SVR4()
4513 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in LowerFormalArguments_64SVR4()
4567 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); in LowerFormalArguments_64SVR4()
4570 // We re-align the argument offset for each argument, except when using the in LowerFormalArguments_64SVR4()
4587 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; in LowerFormalArguments_64SVR4()
4601 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in LowerFormalArguments_64SVR4()
4606 // etc. However, we have to provide a place-holder in InVals, so in LowerFormalArguments_64SVR4()
4607 // pretend we have an 8-byte item at the current address for that in LowerFormalArguments_64SVR4()
4633 // address of the enclosing doubleword on big-endian systems. in LowerFormalArguments_64SVR4()
4636 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); in LowerFormalArguments_64SVR4()
4643 FuncInfo->addLiveInAttr(VReg, Flags); in LowerFormalArguments_64SVR4()
4667 FuncInfo->addLiveInAttr(VReg, Flags); in LowerFormalArguments_64SVR4()
4674 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8; in LowerFormalArguments_64SVR4()
4707 FuncInfo->addLiveInAttr(VReg, Flags); in LowerFormalArguments_64SVR4()
4746 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 in LowerFormalArguments_64SVR4()
4747 // once we support fp <-> gpr moves. in LowerFormalArguments_64SVR4()
4753 FuncInfo->addLiveInAttr(VReg, Flags); in LowerFormalArguments_64SVR4()
4778 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in LowerFormalArguments_64SVR4()
4810 CurArgOffset += ArgSize - ObjSize; in LowerFormalArguments_64SVR4()
4832 FuncInfo->setMinReservedArea(MinReservedArea); in LowerFormalArguments_64SVR4()
4843 FuncInfo->setVarArgsFrameIndex( in LowerFormalArguments_64SVR4()
4845 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerFormalArguments_64SVR4()
4850 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; in LowerFormalArguments_64SVR4()
4869 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4877 unsigned CallerMinReservedArea = FI->getMinReservedArea(); in CalculateTailCallSPDiff()
4878 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; in CalculateTailCallSPDiff()
4880 if (SPDiff < FI->getTailCallSPDelta()) in CalculateTailCallSPDiff()
4881 FI->setTailCallSPDelta(SPDiff); in CalculateTailCallSPDiff()
4895 assert(!STICaller->isUsingPCRelativeCalls() && in callsShareTOCBase()
4906 // If the callee is preemptable, then the static linker will use a plt-stub in callsShareTOCBase()
4908 // instruction to convert to a toc-restore. in callsShareTOCBase()
4920 const GlobalObject *GlobalObj = Alias->getAliaseeObject(); in callsShareTOCBase()
4934 if (STICallee->isUsingPCRelativeCalls()) in callsShareTOCBase()
4941 if (!CalleeGV->isStrongDefinitionForLinker()) in callsShareTOCBase()
4951 // Any explicitly-specified sections and section prefixes must also match. in callsShareTOCBase()
4952 // Also, if we're using -ffunction-sections, then each function is always in in callsShareTOCBase()
4954 if (TM.getFunctionSections() || CalleeGV->hasComdat() || in callsShareTOCBase()
4955 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection()) in callsShareTOCBase()
4958 if (F->getSectionPrefix() != Caller->getSectionPrefix()) in callsShareTOCBase()
4971 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in needStackSlotPassParameters()
5003 if (CB.arg_size() != CallerFn->arg_size()) in hasSameArgumentList()
5008 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); in hasSameArgumentList()
5020 if (CalleeArg->getType() == CallerArg->getType() && in hasSameArgumentList()
5044 // than a non-fastcc caller with the same signature so disable tail-calls in in areCallingConvEligibleForTCO_64SVR4()
5080 // b->a = v.a; in IsEligibleForTailCallOptimization_64SVR4()
5094 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that in IsEligibleForTailCallOptimization_64SVR4()
5100 // When PC-Relative addressing is used, the concept of the TOC is no longer in IsEligibleForTailCallOptimization_64SVR4()
5133 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
5152 // Non-PIC/GOT tail calls are supported. in IsEligibleForTailCallOptimization()
5159 return CalleeGV->hasHiddenVisibility() || in IsEligibleForTailCallOptimization()
5160 CalleeGV->hasProtectedVisibility(); in IsEligibleForTailCallOptimization()
5166 /// isCallCompatibleAddress - Return the immediate to use if the specified
5167 /// 32-bit value is representable in the immediate field of a BxA instruction.
5172 int Addr = C->getZExtValue(); in isBLACompatibleAddress()
5179 (int)C->getZExtValue() >> 2, SDLoc(Op), in isBLACompatibleAddress()
5196 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5212 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5224 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); in EmitTailCallStoreFPAndRetAddr()
5235 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5253 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5269 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5284 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5336 if (GV->isThreadLocal()) in isFunctionGlobalAddress()
5339 return GV->getValueType()->isFunctionTy(); in isFunctionGlobalAddress()
5412 const GlobalValue *GV = G ? G->getGlobal() : nullptr; in isIndirectCall()
5421 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not in isIndirectCall()
5433 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5450 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross in getCallOpcode()
5456 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC in getCallOpcode()
5473 const GlobalValue *GV = G ? G->getGlobal() : nullptr; in getCallOpcode()
5511 const GlobalValue *GV = G ? G->getGlobal() : nullptr; in transformCallee()
5517 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in in transformCallee()
5519 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are in transformCallee()
5520 // built with secure-PLT. in transformCallee()
5529 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM)); in transformCallee()
5536 const GlobalValue *GV = G ? G->getGlobal() : nullptr; in transformCallee()
5538 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); in transformCallee()
5549 const char *SymName = S->getSymbol(); in transformCallee()
5551 // If there exists a user-declared function whose name is the same as the in transformCallee()
5552 // ExternalSymbol's, then we pick up the user-declared version. in transformCallee()
5555 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) in transformCallee()
5560 // C-linkage name. A Qualname is returned here because an external in transformCallee()
5567 return Sec->getQualNameSymbol(); in transformCallee()
5570 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data(); in transformCallee()
5588 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1); in getOutputChainFromCallSeq()
5592 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2); in getOutputChainFromCallSeq()
5614 // Function pointers in the 64-bit SVR4 ABI do not point to the function in prepareDescriptorIndirectCall()
5647 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr); in prepareDescriptorIndirectCall()
5695 // The rest of the indirect call sequence is the same as the non-descriptor in prepareDescriptorIndirectCall()
5727 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not in buildCallOperands()
5733 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); in buildCallOperands()
5767 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls in buildCallOperands()
5771 // Add a register mask operand representing the call-preserved registers. in buildCallOperands()
5774 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv); in buildCallOperands()
5796 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false); in FinishCall()
5816 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || in FinishCall()
5854 CallingConv::ID CalleeCC = CB->getCallingConv(); in supportsTailCallFor()
5855 const Function *CallerFunc = CB->getCaller(); in supportsTailCallFor()
5856 CallingConv::ID CallerCC = CallerFunc->getCallingConv(); in supportsTailCallFor()
5857 const Function *CalleeFunc = CB->getCalledFunction(); in supportsTailCallFor()
5865 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(), in supportsTailCallFor()
5866 CalleeFunc->getAttributes(), Outs, *this, in supportsTailCallFor()
5867 CalleeFunc->getDataLayout()); in supportsTailCallFor()
5870 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc, in supportsTailCallFor()
5880 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall())) in isEligibleForTCO()
5912 const GlobalValue *GV = G ? G->getGlobal() : nullptr; in LowerCall()
5939 if (!isTailCall && CB && CB->isMustTailCall()) in LowerCall()
5978 // of the 32-bit SVR4 ABI stack frame layout. in LowerCall_32SVR4()
5999 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); in LowerCall_32SVR4()
6010 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), in LowerCall_32SVR4()
6087 // i - Tracks the index into the list of registers allocated for the call in LowerCall_32SVR4()
6088 // RealArgIdx - Tracks the index into the list of actual function arguments in LowerCall_32SVR4()
6089 // j - Tracks the index into the list of byval arguments in LowerCall_32SVR4()
6117 CallSeqStart.getNode()->getOperand(0), in LowerCall_32SVR4()
6178 // Build a sequence of copy-to-reg nodes chained together with token chain in LowerCall_32SVR4()
6213 CallSeqStart.getNode()->getOperand(0), in createMemcpyOutsideCallSeq()
6251 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); in LowerCall_64SVR4()
6260 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in LowerCall_64SVR4()
6359 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in LowerCall_64SVR4()
6429 // We re-align the argument offset for each argument, except when using the in LowerCall_64SVR4()
6447 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; in LowerCall_64SVR4()
6451 // Promote integers to 64-bit values. in LowerCall_64SVR4()
6465 // These are the proper values we need for right-justifying the in LowerCall_64SVR4()
6477 // All aggregates smaller than 8 bytes must be passed right-justified. in LowerCall_64SVR4()
6494 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, in LowerCall_64SVR4()
6510 if ((NumGPRs - GPR_idx) * PtrByteSize < Size) in LowerCall_64SVR4()
6515 // When a register is available, pass a small aggregate right-justified. in LowerCall_64SVR4()
6517 // The easiest way to get this right-justified in a register in LowerCall_64SVR4()
6527 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); in LowerCall_64SVR4()
6551 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8; in LowerCall_64SVR4()
6560 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; in LowerCall_64SVR4()
6620 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 in LowerCall_64SVR4()
6621 // once we support fp <-> gpr moves. in LowerCall_64SVR4()
6623 // In the non-vararg case, this can only ever happen in the in LowerCall_64SVR4()
6632 // Non-array float values are extended and passed in a GPR. in LowerCall_64SVR4()
6641 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); in LowerCall_64SVR4()
6655 // Non-final even elements are skipped; they will be handled in LowerCall_64SVR4()
6656 // together the with subsequent argument on the next go-around. in LowerCall_64SVR4()
6666 // Single-precision floating-point values are mapped to the in LowerCall_64SVR4()
6689 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; in LowerCall_64SVR4()
6737 // Non-varargs Altivec params go into VRs or on the stack. in LowerCall_64SVR4()
6768 // information about calls through function pointers in the 64-bit SVR4 ABI. in LowerCall_64SVR4()
6770 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the in LowerCall_64SVR4()
6778 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); in LowerCall_64SVR4()
6792 // Build a sequence of copy-to-reg nodes chained together with token chain in LowerCall_64SVR4()
6860 static const MCPhysReg GPR_32[] = {// 32-bit registers. in CC_AIX()
6863 static const MCPhysReg GPR_64[] = {// 64-bit registers. in CC_AIX()
6877 report_fatal_error("Pass-by-value arguments with alignment greater than " in CC_AIX()
6945 // Floats are always 4-byte aligned in the PSA on AIX. in CC_AIX()
6946 // This includes f64 in 64-bit mode for ABI compatibility. in CC_AIX()
6961 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR. in CC_AIX()
7022 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg in CC_AIX()
7046 // Corner case for 32-bit codegen. We have 2 registers to pass the first in CC_AIX()
7089 "i64 should have been split for 32-bit codegen."); in getRegClassForSVT()
7130 const unsigned LASize = FL->getLinkageSize(); in mapArgRegToOffsetAIX()
7135 return LASize + 4 * (Reg - PPC::R3); in mapArgRegToOffsetAIX()
7141 return LASize + 8 * (Reg - PPC::X3); in mapArgRegToOffsetAIX()
7149 // Low Memory +--------------------------------------------+
7150 // SP +---> | Back chain | ---+
7151 // | +--------------------------------------------+ |
7153 // | +--------------------------------------------+ |
7155 // | +--------------------------------------------+ | Linkage Area
7157 // | +--------------------------------------------+ |
7159 // | +--------------------------------------------+ |
7160 // | | Saved TOC pointer | ---+
7161 // | +--------------------------------------------+
7163 // | +--------------------------------------------+
7165 // | +--------------------------------------------+
7167 // | +--------------------------------------------+
7169 // | +--------------------------------------------+
7171 // | +--------------------------------------------+
7173 // | +--------------------------------------------+
7175 // | +--------------------------------------------+
7177 // | +--------------------------------------------+
7179 // | +--------------------------------------------+
7180 // +---- | Back chain |
7181 // High Memory +--------------------------------------------+
7216 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in LowerFormalArguments_AIX()
7242 // Objects are right-justified because AIX is big-endian. in LowerFormalArguments_AIX()
7244 CurArgOffset += LocSize - ValSize; in LowerFormalArguments_AIX()
7282 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in in LowerFormalArguments_AIX()
7283 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and in LowerFormalArguments_AIX()
7288 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if in LowerFormalArguments_AIX()
7292 "Only 2 custom RegLocs expected for 64-bit codegen."); in LowerFormalArguments_AIX()
7302 FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); in LowerFormalArguments_AIX()
7308 FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint); in LowerFormalArguments_AIX()
7311 FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint); in LowerFormalArguments_AIX()
7319 FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar); in LowerFormalArguments_AIX()
7322 FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort); in LowerFormalArguments_AIX()
7327 FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt); in LowerFormalArguments_AIX()
7331 FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat); in LowerFormalArguments_AIX()
7397 FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); in LowerFormalArguments_AIX()
7445 FuncInfo->setMinReservedArea(CallerReservedArea); in LowerFormalArguments_AIX()
7448 FuncInfo->setVarArgsFrameIndex( in LowerFormalArguments_AIX()
7450 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); in LowerFormalArguments_AIX()
7463 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize; in LowerFormalArguments_AIX()
7515 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); in LowerCall_AIX()
7554 // Nothing to do for zero-sized ByVals on the caller side. in LowerCall_AIX()
7571 // Initialize registers, which are fully occupied by the by-val argument. in LowerCall_AIX()
7578 "Unexpected location for pass-by-value argument."); in LowerCall_AIX()
7587 "Expected additional location for by-value argument."); in LowerCall_AIX()
7590 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg."); in LowerCall_AIX()
7594 MemcpyFlags.setByValSize(ByValSize - LoadOffset); in LowerCall_AIX()
7606 // Any residue that occupies the final by-val arg register must be in LowerCall_AIX()
7607 // left-justified on AIX. Loads must be a power-of-2 size and cannot be in LowerCall_AIX()
7608 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4, in LowerCall_AIX()
7612 "Unexpected register residue for by-value argument."); in LowerCall_AIX()
7615 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes); in LowerCall_AIX()
7624 // By-val arguments are passed left-justfied in register. in LowerCall_AIX()
7628 "Unexpected load emitted during handling of pass-by-value " in LowerCall_AIX()
7630 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8); in LowerCall_AIX()
7632 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout()); in LowerCall_AIX()
7697 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in in LowerCall_AIX()
7698 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and in LowerCall_AIX()
7706 "Only 2 custom RegLocs expected for 64-bit codegen."); in LowerCall_AIX()
7738 // f32 in 32-bit GPR in LowerCall_AIX()
7739 // f64 in 64-bit GPR in LowerCall_AIX()
7743 // f32 in 64-bit GPR. in LowerCall_AIX()
7747 // f64 in two 32-bit GPRs in LowerCall_AIX()
7777 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported."); in LowerCall_AIX()
7782 Subtarget.getFrameLowering()->getTOCSaveOffset(); in LowerCall_AIX()
7794 // Build a sequence of copy-to-reg nodes chained together with token chain in LowerCall_AIX()
7837 // Copy the result values into the output registers. in LowerReturn()
7859 // Legalize ret f64 -> ret 2 x i32. in LowerReturn()
7938 int RASI = FI->getReturnAddrSaveIndex(); in getReturnAddrFrameIndex()
7943 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); in getReturnAddrFrameIndex()
7947 FI->setReturnAddrSaveIndex(RASI); in getReturnAddrFrameIndex()
7961 int FPSI = FI->getFramePointerSaveIndex(); in getFramePointerFrameIndex()
7966 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); in getFramePointerFrameIndex()
7970 FI->setFramePointerSaveIndex(FPSI); in getFramePointerFrameIndex()
8035 SDValue Chain = LD->getChain(); in LowerLOAD()
8036 SDValue BasePtr = LD->getBasePtr(); in LowerLOAD()
8037 MachineMemOperand *MMO = LD->getMemOperand(); in LowerLOAD()
8060 SDValue Chain = ST->getChain(); in LowerSTORE()
8061 SDValue BasePtr = ST->getBasePtr(); in LowerSTORE()
8062 SDValue Value = ST->getValue(); in LowerSTORE()
8063 MachineMemOperand *MMO = ST->getMemOperand(); in LowerSTORE()
8085 // size). At that point legalization will try to custom lower the sub-legal in LowerTRUNCATEVector()
8086 // result and get here - where we can contain the truncate as a single target in LowerTRUNCATEVector()
8092 // We will implement it for big-endian ordering as this (where x denotes in LowerTRUNCATEVector()
8097 // The same operation in little-endian ordering will be: in LowerTRUNCATEVector()
8148 ShuffV.push_back(i * SizeMult - 1); in LowerTRUNCATEVector()
8160 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8163 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); in LowerSELECT_CC()
8170 // Without power9-vector, we don't have native instruction for f128 comparison. in LowerSELECT_CC()
8172 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE in LowerSELECT_CC()
8186 SDNodeFlags Flags = Op.getNode()->getFlags(); in LowerSELECT_CC()
8204 // general, fsel-based lowering of select is a finite-math-only optimization. in LowerSELECT_CC()
8222 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8225 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8235 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8244 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8258 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8261 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8268 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8274 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8280 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8286 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits in LowerSELECT_CC()
8319 bool IsStrict = Op->isStrictFPOpcode(); in convertFPToInt()
8325 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); in convertFPToInt()
8376 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_TO_INTForReuse()
8382 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); in LowerFP_TO_INTForReuse()
8422 if (Op->isStrictFPOpcode()) in LowerFP_TO_INTDirectMove()
8430 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_TO_INT()
8441 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on in LowerFP_TO_INT()
8446 // set other fast-math flags to FP operations in both strict and in LowerFP_TO_INT()
8447 // non-strict cases. (FP_TO_SINT, FSUB) in LowerFP_TO_INT()
8449 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); in LowerFP_TO_INT()
8455 // Add the two halves of the long double in round-to-zero mode, and use in LowerFP_TO_INT()
8477 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs in LowerFP_TO_INT()
8504 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X in LowerFP_TO_INT()
8541 if (Op->isStrictFPOpcode()) in canReuseLoadAddress()
8557 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || in canReuseLoadAddress()
8558 LD->isNonTemporal()) in canReuseLoadAddress()
8560 if (LD->getMemoryVT() != MemVT) in canReuseLoadAddress()
8565 // ties the legalised loads together uses a different output chain then the in canReuseLoadAddress()
8567 if (!isTypeLegal(LD->getValueType(0))) in canReuseLoadAddress()
8570 RLI.Ptr = LD->getBasePtr(); in canReuseLoadAddress()
8571 if (LD->isIndexed() && !LD->getOffset().isUndef()) { in canReuseLoadAddress()
8572 assert(LD->getAddressingMode() == ISD::PRE_INC && in canReuseLoadAddress()
8573 "Non-pre-inc AM on PPC?"); in canReuseLoadAddress()
8575 LD->getOffset()); in canReuseLoadAddress()
8578 RLI.Chain = LD->getChain(); in canReuseLoadAddress()
8579 RLI.MPI = LD->getPointerInfo(); in canReuseLoadAddress()
8580 RLI.IsDereferenceable = LD->isDereferenceable(); in canReuseLoadAddress()
8581 RLI.IsInvariant = LD->isInvariant(); in canReuseLoadAddress()
8582 RLI.Alignment = LD->getAlign(); in canReuseLoadAddress()
8583 RLI.AAInfo = LD->getAAInfo(); in canReuseLoadAddress()
8584 RLI.Ranges = LD->getRanges(); in canReuseLoadAddress()
8586 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); in canReuseLoadAddress()
8615 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode(); in directMoveIsProfitable()
8616 if (Origin->getOpcode() != ISD::LOAD) in directMoveIsProfitable()
8621 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand(); in directMoveIsProfitable()
8623 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2)) in directMoveIsProfitable()
8626 for (SDNode::use_iterator UI = Origin->use_begin(), in directMoveIsProfitable()
8627 UE = Origin->use_end(); in directMoveIsProfitable()
8634 if (UI->getOpcode() != ISD::SINT_TO_FP && in directMoveIsProfitable()
8635 UI->getOpcode() != ISD::UINT_TO_FP && in directMoveIsProfitable()
8636 UI->getOpcode() != ISD::STRICT_SINT_TO_FP && in directMoveIsProfitable()
8637 UI->getOpcode() != ISD::STRICT_UINT_TO_FP) in directMoveIsProfitable()
8653 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); in convertIntToFP()
8655 // If we have FCFIDS, then use it when converting to single-precision. in convertIntToFP()
8656 // Otherwise, convert to double-precision and then round. in convertIntToFP()
8661 if (Op->isStrictFPOpcode()) { in convertIntToFP()
8681 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); in LowerINT_TO_FPDirectMove()
8712 bool IsStrict = Op->isStrictFPOpcode(); in LowerINT_TO_FPVector()
8723 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); in LowerINT_TO_FPVector()
8744 ShuffV[i * Stride - 1] = i - 1; in LowerINT_TO_FPVector()
8775 bool IsStrict = Op->isStrictFPOpcode(); in LowerINT_TO_FP()
8781 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); in LowerINT_TO_FP()
8818 // When converting to single-precision, we actually need to convert in LowerINT_TO_FP()
8819 // to double-precision first and then round to single-precision. in LowerINT_TO_FP()
8820 // To avoid double-rounding effects during that operation, we have in LowerINT_TO_FP()
8822 // converting to double-precision are replaced by a bit that won't in LowerINT_TO_FP()
8823 // be lost at this stage, but is below the single-precision rounding in LowerINT_TO_FP()
8826 // However, if -enable-unsafe-fp-math is in effect, accept double in LowerINT_TO_FP()
8834 // mantissa of an IEEE double-precision value without rounding.) in LowerINT_TO_FP()
8837 // to single-precision gets the correct result. in LowerINT_TO_FP()
8844 Round, DAG.getConstant(-2048, dl, MVT::i64)); in LowerINT_TO_FP()
8847 // of the input value is small, the bit-twiddling we did above might in LowerINT_TO_FP()
8848 // end up visibly changing the output. Fortunately, in that case, we in LowerINT_TO_FP()
8850 // exactly to double-precision floating-point already. Therefore, in LowerINT_TO_FP()
8852 // bits are all sign-bit copies, and use the rounded value computed in LowerINT_TO_FP()
8910 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && in LowerINT_TO_FP()
8949 // Since we only generate this in 64-bit mode, we can take advantage of in LowerINT_TO_FP()
8950 // 64-bit registers. In particular, sign extend the input value into the in LowerINT_TO_FP()
8951 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack in LowerINT_TO_FP()
8970 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && in LowerINT_TO_FP()
8992 "i32->FP without LFIWAX supported only on PPC64"); in LowerINT_TO_FP()
9037 11 Round to -inf in LowerGET_ROUNDING()
9040 -1 Undefined in LowerGET_ROUNDING()
9044 3 Round to -inf in LowerGET_ROUNDING()
9121 DAG.getConstant(-BitWidth, dl, AmtVT)); in LowerSHL_PARTS()
9150 DAG.getConstant(-BitWidth, dl, AmtVT)); in LowerSRL_PARTS()
9178 DAG.getConstant(-BitWidth, dl, AmtVT)); in LowerSRA_PARTS()
9199 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) in LowerFunnelShift()
9200 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) in LowerFunnelShift()
9204 DAG.getConstant(BitWidth - 1, dl, AmtVT)); in LowerFunnelShift()
9212 //===----------------------------------------------------------------------===//
9216 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9224 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; in getCanonicalConstSplat()
9227 if (Val == ((1LLU << (SplatSize * 8)) - 1)) { in getCanonicalConstSplat()
9232 EVT CanonicalVT = VTys[SplatSize-1]; in getCanonicalConstSplat()
9238 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9247 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9257 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9267 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9284 /// \param V - pointer to the BuildVectorSDNode being matched
9285 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9290 /// - The node builds a vector out of integers that are not 32 or 64-bits
9291 /// - The node builds a vector out of constants
9292 /// - The node is a "load-and-splat"
9297 EVT VecVT = V->getValueType(0); in haveEfficientBuildVectorPattern()
9306 SDValue Op0 = V->getOperand(0); in haveEfficientBuildVectorPattern()
9311 if (V->isConstant()) in haveEfficientBuildVectorPattern()
9313 for (int i = 0, e = V->getNumOperands(); i < e; ++i) { in haveEfficientBuildVectorPattern()
9314 if (V->getOperand(i).isUndef()) in haveEfficientBuildVectorPattern()
9316 // We want to expand nodes that represent load-and-splat even if the in haveEfficientBuildVectorPattern()
9318 if (V->getOperand(i).getOpcode() == ISD::LOAD || in haveEfficientBuildVectorPattern()
9319 (V->getOperand(i).getOpcode() == ISD::FP_ROUND && in haveEfficientBuildVectorPattern()
9320 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || in haveEfficientBuildVectorPattern()
9321 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && in haveEfficientBuildVectorPattern()
9322 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || in haveEfficientBuildVectorPattern()
9323 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && in haveEfficientBuildVectorPattern()
9324 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) in haveEfficientBuildVectorPattern()
9328 if (V->getOperand(i) != Op0 || in haveEfficientBuildVectorPattern()
9329 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) in haveEfficientBuildVectorPattern()
9339 SDValue Op0 = Op->getOperand(0); in LowerBITCAST()
9358 while (InputLoad->getOpcode() == ISD::BITCAST) in getNormalLoadInput()
9359 InputLoad = &InputLoad->getOperand(0); in getNormalLoadInput()
9360 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR || in getNormalLoadInput()
9361 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) { in getNormalLoadInput()
9362 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED; in getNormalLoadInput()
9363 InputLoad = &InputLoad->getOperand(0); in getNormalLoadInput()
9365 if (InputLoad->getOpcode() != ISD::LOAD) in getNormalLoadInput()
9417 EVT Ty = Op->getValueType(0); in isValidSplatLoad()
9418 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending in isValidSplatLoad()
9424 EVT MemVT = InputNode->getMemoryVT(); in isValidSplatLoad()
9434 // Check the extend type, when the input type is i32, and the output vector in isValidSplatLoad()
9463 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, in LowerBUILD_VECTOR()
9467 // non-denormal float which when converted back to double gives us the same in LowerBUILD_VECTOR()
9472 // Check the type first to short-circuit so we don't modify APSplatBits if in LowerBUILD_VECTOR()
9474 if ((Op->getValueType(0) == MVT::v2f64) && in LowerBUILD_VECTOR()
9512 // Handle load-and-splat patterns as we have instructions that will do this in LowerBUILD_VECTOR()
9519 // If the input load is an extending load, it will be an i32 -> i64 in LowerBUILD_VECTOR()
9521 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits(); in LowerBUILD_VECTOR()
9535 for (SDValue BVInOp : Op->ops()) in LowerBUILD_VECTOR()
9537 NumUsesOfInputLD--; in LowerBUILD_VECTOR()
9546 // case 1 - lfiwzx/lfiwax in LowerBUILD_VECTOR()
9552 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT && in LowerBUILD_VECTOR()
9557 // case 2 - lxvr[hb]x in LowerBUILD_VECTOR()
9567 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) && in LowerBUILD_VECTOR()
9570 LD->getChain(), // Chain in LowerBUILD_VECTOR()
9571 LD->getBasePtr(), // Ptr in LowerBUILD_VECTOR()
9576 LD->getMemoryVT(), LD->getMemOperand()); in LowerBUILD_VECTOR()
9577 // Replace all uses of the output chain of the original load with the in LowerBUILD_VECTOR()
9578 // output chain of the new load. in LowerBUILD_VECTOR()
9579 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), in LowerBUILD_VECTOR()
9586 // 32-bits can be lowered to VSX instructions under certain conditions. in LowerBUILD_VECTOR()
9612 // Given vector length is a multiple of 4, 2-byte splats can be replaced in LowerBUILD_VECTOR()
9613 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to in LowerBUILD_VECTOR()
9614 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be in LowerBUILD_VECTOR()
9615 // turned into a 4-byte splat of 0xABABABAB. in LowerBUILD_VECTOR()
9629 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. in LowerBUILD_VECTOR()
9630 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> in LowerBUILD_VECTOR()
9631 (32-SplatBitSize)); in LowerBUILD_VECTOR()
9632 if (SextVal >= -16 && SextVal <= 15) in LowerBUILD_VECTOR()
9638 // If this value is in the range [-32,30] and is even, use: in LowerBUILD_VECTOR()
9641 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) in LowerBUILD_VECTOR()
9642 // If this value is in the range [-31,-17] and is odd, use: in LowerBUILD_VECTOR()
9643 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) in LowerBUILD_VECTOR()
9644 // Note the last two are three-instruction sequences. in LowerBUILD_VECTOR()
9645 if (SextVal >= -32 && SextVal <= 31) { in LowerBUILD_VECTOR()
9664 // Make -1 and vspltisw -1: in LowerBUILD_VECTOR()
9665 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl); in LowerBUILD_VECTOR()
9678 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, in LowerBUILD_VECTOR()
9679 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 in LowerBUILD_VECTOR()
9683 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for in LowerBUILD_VECTOR()
9684 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' in LowerBUILD_VECTOR()
9689 unsigned TypeShiftAmt = i & (SplatBitSize-1); in LowerBUILD_VECTOR()
9698 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); in LowerBUILD_VECTOR()
9709 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); in LowerBUILD_VECTOR()
9715 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { in LowerBUILD_VECTOR()
9721 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); in LowerBUILD_VECTOR()
9748 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9754 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); in GeneratePerfectShuffle()
9755 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); in GeneratePerfectShuffle()
9825 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9833 SDValue V1 = N->getOperand(0); in lowerToVINSERTB()
9834 SDValue V2 = N->getOperand(1); in lowerToVINSERTB()
9844 ArrayRef<int> Mask = N->getMask(); in lowerToVINSERTB()
9862 // Go through the mask of half-words to find an element that's being moved in lowerToVINSERTB()
9896 …// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2… in lowerToVINSERTB()
9901 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; in lowerToVINSERTB()
9926 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9933 // Check that the shuffle is on half-words. in lowerToVINSERTH()
9939 SDValue V1 = N->getOperand(0); in lowerToVINSERTH()
9940 SDValue V2 = N->getOperand(1); in lowerToVINSERTH()
9944 // Shifts required to get the half-word we want at element 3. in lowerToVINSERTH()
9952 // 32-bit space, only need 4-bit nibbles per element. in lowerToVINSERTH()
9954 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; in lowerToVINSERTH()
9955 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); in lowerToVINSERTH()
9972 // Go through the mask of half-words to find an element that's being moved in lowerToVINSERTH()
9975 unsigned MaskShift = (NumHalfWords - 1 - i) * 4; in lowerToVINSERTH()
9992 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; in lowerToVINSERTH()
10005 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; in lowerToVINSERTH()
10038 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10045 SDValue LHS = peekThroughBitcasts(SVN->getOperand(0)); in lowerToXXSPLTI32DX()
10046 SDValue RHS = peekThroughBitcasts(SVN->getOperand(1)); in lowerToXXSPLTI32DX()
10048 auto ShuffleMask = SVN->getMask(); in lowerToXXSPLTI32DX()
10057 if (RHS->getOpcode() != ISD::BUILD_VECTOR) { in lowerToXXSPLTI32DX()
10063 ShuffleMask = CommutedSV->getMask(); in lowerToXXSPLTI32DX()
10071 // Check if RHS is a splat of 4-bytes (or smaller). in lowerToXXSPLTI32DX()
10075 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize, in lowerToXXSPLTI32DX()
10084 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively. in lowerToXXSPLTI32DX()
10100 // If the splat is narrower than 32-bits, we need to get the 32-bit value in lowerToXXSPLTI32DX()
10112 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10115 /// i.e (or (shl x, C1), (srl x, 128-C1)).
10137 DAG.getConstant(128 - SHLAmt, dl, MVT::i32)); in LowerROTL()
10142 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10153 // Any nodes that were combined in the target-independent combiner prior in LowerVECTOR_SHUFFLE()
10170 // If this is a load-and-splat, we can do that with a single instruction in LowerVECTOR_SHUFFLE()
10177 InputLoad->hasOneUse()) { in LowerVECTOR_SHUFFLE()
10194 // For 4-byte load-and-splat, we need Power9. in LowerVECTOR_SHUFFLE()
10198 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; in LowerVECTOR_SHUFFLE()
10200 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; in LowerVECTOR_SHUFFLE()
10204 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64)) in LowerVECTOR_SHUFFLE()
10207 SDValue BasePtr = LD->getBasePtr(); in LowerVECTOR_SHUFFLE()
10212 LD->getChain(), // Chain in LowerVECTOR_SHUFFLE()
10220 Ops, LD->getMemoryVT(), LD->getMemOperand()); in LowerVECTOR_SHUFFLE()
10221 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1)); in LowerVECTOR_SHUFFLE()
10222 if (LdSplt.getValueType() != SVOp->getValueType(0)) in LowerVECTOR_SHUFFLE()
10223 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); in LowerVECTOR_SHUFFLE()
10341 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || in LowerVECTOR_SHUFFLE()
10362 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || in LowerVECTOR_SHUFFLE()
10375 // Check to see if this is a shuffle of 4-byte values. If so, we can use our in LowerVECTOR_SHUFFLE()
10377 ArrayRef<int> PermMask = SVOp->getMask(); in LowerVECTOR_SHUFFLE()
10385 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. in LowerVECTOR_SHUFFLE()
10405 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the in LowerVECTOR_SHUFFLE()
10409 // little-endian perfect shuffle table. in LowerVECTOR_SHUFFLE()
10428 // for 3-operation perms and use a constant pool load instead. in LowerVECTOR_SHUFFLE()
10452 (V1->hasOneUse() || V2->hasOneUse())) { in LowerVPERM()
10453 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using " in LowerVPERM()
10457 // The second input to XXPERM is also an output so if the second input has in LowerVPERM()
10459 // single-use operand to be used as the second input to prevent copying. in LowerVPERM()
10460 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) || in LowerVPERM()
10461 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) { in LowerVPERM()
10472 // necessary to produce proper semantics with the big-endian-based vperm in LowerVPERM()
10477 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; in LowerVPERM()
10478 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD; in LowerVPERM()
10483 [ A | B | C | D ] -> [ C | D | A | B ] in LowerVPERM()
10484 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15 in LowerVPERM()
10485 i.e. index of A, B += 8, and index of C, D -= 8. in LowerVPERM()
10487 [ E | F | G | H ] -> [ G | H | E | F ] in LowerVPERM()
10488 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31 in LowerVPERM()
10489 i.e. index of E, F += 8, index of G, H -= 8 in LowerVPERM()
10491 [ V1 | V2 ] -> [ V2 | V1 ] in LowerVPERM()
10492 0-15 16-31 0-15 16-31 in LowerVPERM()
10493 i.e. index of V1 += 16, index of V2 -= 16 in LowerVPERM()
10504 SrcElt -= 8; in LowerVPERM()
10508 SrcElt -= 8; in LowerVPERM()
10516 SrcElt -= 16; in LowerVPERM()
10521 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32)); in LowerVPERM()
10528 dl = SDLoc(V1->getOperand(0)); in LowerVPERM()
10529 V1 = V1->getOperand(0)->getOperand(1); in LowerVPERM()
10532 dl = SDLoc(V2->getOperand(0)); in LowerVPERM()
10533 V2 = V2->getOperand(0)->getOperand(1); in LowerVPERM()
10552 SVOp->dump(); in LowerVPERM()
10572 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10578 CompareOpc = -1; in getVectorCompareInfo()
10858 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10874 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!"); in LowerINTRINSIC_WO_CHAIN()
10885 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi. in LowerINTRINSIC_WO_CHAIN()
10886 if (ME < 63 - SH) { in LowerINTRINSIC_WO_CHAIN()
10889 } else if (ME > 63 - SH) { in LowerINTRINSIC_WO_CHAIN()
10891 DAG.getConstant(ME + SH - 63, dl, MVT::i32)); in LowerINTRINSIC_WO_CHAIN()
10896 DAG.getTargetConstant(63 - ME, dl, MVT::i32), in LowerINTRINSIC_WO_CHAIN()
10984 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo in LowerINTRINSIC_WO_CHAIN()
10994 // Allow pre-isa-future subtargets to lower as normal. in LowerINTRINSIC_WO_CHAIN()
11008 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) && in LowerINTRINSIC_WO_CHAIN()
11011 DAG.getConstant(!!(Idx->getSExtValue()), dl, in LowerINTRINSIC_WO_CHAIN()
11012 Idx->getValueType(0))); in LowerINTRINSIC_WO_CHAIN()
11088 all_of(Op->ops().drop_front(4), in LowerINTRINSIC_WO_CHAIN()
11097 unsigned I = Op.getNumOperands() - 2, Cnt = I; in LowerINTRINSIC_WO_CHAIN()
11099 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) { in LowerINTRINSIC_WO_CHAIN()
11114 // If this is a non-dot comparison, make the VCMP node and we are done. in LowerINTRINSIC_WO_CHAIN()
11158 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); in LowerINTRINSIC_WO_CHAIN()
11219 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11220 // compared to a value that is atomically loaded (atomic loads zero-extend).
11224 "Expecting an atomic compare-and-swap here."); in LowerATOMIC_CMP_SWAP()
11227 EVT MemVT = AtomicNode->getMemoryVT(); in LowerATOMIC_CMP_SWAP()
11232 // If this is already correctly zero-extended, leave it alone. in LowerATOMIC_CMP_SWAP()
11233 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); in LowerATOMIC_CMP_SWAP()
11238 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; in LowerATOMIC_CMP_SWAP()
11243 // Replace the existing compare operand with the properly zero-extended one. in LowerATOMIC_CMP_SWAP()
11245 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) in LowerATOMIC_CMP_SWAP()
11246 Ops.push_back(AtomicNode->getOperand(i)); in LowerATOMIC_CMP_SWAP()
11248 MachineMemOperand *MMO = AtomicNode->getMemOperand(); in LowerATOMIC_CMP_SWAP()
11258 EVT MemVT = N->getMemoryVT(); in LowerATOMIC_LOAD_STORE()
11262 unsigned Opc = N->getOpcode(); in LowerATOMIC_LOAD_STORE()
11269 N->getOperand(0), in LowerATOMIC_LOAD_STORE()
11271 for (int I = 1, E = N->getNumOperands(); I < E; ++I) in LowerATOMIC_LOAD_STORE()
11272 Ops.push_back(N->getOperand(I)); in LowerATOMIC_LOAD_STORE()
11274 Ops, MemVT, N->getMemOperand()); in LowerATOMIC_LOAD_STORE()
11290 N->getOperand(0), in LowerATOMIC_LOAD_STORE()
11292 SDValue Val = N->getOperand(1); in LowerATOMIC_LOAD_STORE()
11299 Ops.push_back(N->getOperand(2)); in LowerATOMIC_LOAD_STORE()
11301 N->getMemOperand()); in LowerATOMIC_LOAD_STORE()
11341 // first, and test if it's 'not not-normal' with expected sign. in getDataClassTest()
11458 // Create a stack slot that is 16-byte aligned. in LowerSCALAR_TO_VECTOR()
11525 unsigned InsertAtElement = C->getZExtValue(); in LowerINSERT_VECTOR_ELT()
11528 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; in LowerINSERT_VECTOR_ELT()
11540 SDValue LoadChain = LN->getChain(); in LowerVectorLoad()
11541 SDValue BasePtr = LN->getBasePtr(); in LowerVectorLoad()
11554 Align Alignment = LN->getAlign(); in LowerVectorLoad()
11561 LN->getPointerInfo().getWithOffset(Idx * 16), in LowerVectorLoad()
11563 LN->getMemOperand()->getFlags(), LN->getAAInfo()); in LowerVectorLoad()
11585 SDValue StoreChain = SN->getChain(); in LowerVectorStore()
11586 SDValue BasePtr = SN->getBasePtr(); in LowerVectorStore()
11587 SDValue Value = SN->getValue(); in LowerVectorStore()
11588 SDValue Value2 = SN->getValue(); in LowerVectorStore()
11601 Align Alignment = SN->getAlign(); in LowerVectorStore()
11617 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx; in LowerVectorStore()
11620 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2); in LowerVectorStore()
11630 SN->getPointerInfo().getWithOffset(Idx * 16), in LowerVectorStore()
11632 SN->getMemOperand()->getFlags(), SN->getAAInfo()); in LowerVectorStore()
11648 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl); in LowerMUL()
11657 // Low parts multiplied together, generating 32-bit results (we ignore the in LowerMUL()
11672 // Multiply the even 8-bit parts, producing 16-bit sums. in LowerMUL()
11677 // Multiply the odd 8-bit parts, producing 16-bit sums. in LowerMUL()
11683 // instructions with a big-endian bias, we must reverse the in LowerMUL()
11706 bool IsStrict = Op->isStrictFPOpcode(); in LowerFP_ROUND()
11734 isa<ConstantSDNode>(Op0->getOperand(1)) && in LowerFP_EXTEND()
11767 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; in LowerFP_EXTEND()
11770 LD->getMemoryVT(), LD->getMemOperand()); in LowerFP_EXTEND()
11774 NewLoad[1], Op0.getNode()->getFlags()); in LowerFP_EXTEND()
11780 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; in LowerFP_EXTEND()
11783 LD->getMemoryVT(), LD->getMemOperand()); in LowerFP_EXTEND()
11791 /// LowerOperation - Provide custom lowering hooks for some operations.
11797 case ISD::FSIN: return lowerSin(Op, DAG); in LowerOperation()
11844 // Lower 64-bit shifts. in LowerOperation()
11852 // Vector-related lowering. in LowerOperation()
11865 // For counter-based loop handling. in LowerOperation()
11891 switch (N->getOpcode()) { in ReplaceNodeResults()
11902 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); in ReplaceNodeResults()
11910 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement) in ReplaceNodeResults()
11913 assert(N->getValueType(0) == MVT::i1 && in ReplaceNodeResults()
11916 N->getValueType(0)); in ReplaceNodeResults()
11918 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), in ReplaceNodeResults()
11919 N->getOperand(1)); in ReplaceNodeResults()
11926 switch (N->getConstantOperandVal(0)) { in ReplaceNodeResults()
11929 N->getOperand(2), N->getOperand(1))); in ReplaceNodeResults()
11944 EVT VT = N->getValueType(0); in ReplaceNodeResults()
11959 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == in ReplaceNodeResults()
11964 if (N->isStrictFPOpcode()) in ReplaceNodeResults()
11969 if (!N->getValueType(0).isVector()) in ReplaceNodeResults()
11991 //===----------------------------------------------------------------------===//
11993 //===----------------------------------------------------------------------===//
11996 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in callIntrinsic()
12016 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { in emitTrailingFence()
12023 Builder.GetInsertBlock()->getParent()->getParent(), in emitTrailingFence()
12024 Intrinsic::ppc_cfence, {Inst->getType()}), in emitTrailingFence()
12066 const BasicBlock *LLVM_BB = BB->getBasicBlock(); in EmitAtomicBinary()
12067 MachineFunction *F = BB->getParent(); in EmitAtomicBinary()
12068 MachineFunction::iterator It = ++BB->getIterator(); in EmitAtomicBinary()
12076 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitAtomicBinary()
12078 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; in EmitAtomicBinary()
12079 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitAtomicBinary()
12080 F->insert(It, loopMBB); in EmitAtomicBinary()
12082 F->insert(It, loop2MBB); in EmitAtomicBinary()
12083 F->insert(It, exitMBB); in EmitAtomicBinary()
12084 exitMBB->splice(exitMBB->begin(), BB, in EmitAtomicBinary()
12085 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitAtomicBinary()
12086 exitMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitAtomicBinary()
12088 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitAtomicBinary()
12095 // fallthrough --> loopMBB in EmitAtomicBinary()
12096 BB->addSuccessor(loopMBB); in EmitAtomicBinary()
12102 // bne- loopMBB in EmitAtomicBinary()
12103 // fallthrough --> exitMBB in EmitAtomicBinary()
12112 // bne- loopMBB in EmitAtomicBinary()
12113 // fallthrough --> exitMBB in EmitAtomicBinary()
12116 BuildMI(BB, dl, TII->get(LoadMnemonic), dest) in EmitAtomicBinary()
12119 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); in EmitAtomicBinary()
12122 // Signed comparisons of byte or halfword values must be sign-extended. in EmitAtomicBinary()
12125 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), in EmitAtomicBinary()
12127 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr); in EmitAtomicBinary()
12129 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr); in EmitAtomicBinary()
12131 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitAtomicBinary()
12135 BB->addSuccessor(loop2MBB); in EmitAtomicBinary()
12136 BB->addSuccessor(exitMBB); in EmitAtomicBinary()
12139 BuildMI(BB, dl, TII->get(StoreMnemonic)) in EmitAtomicBinary()
12141 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitAtomicBinary()
12143 BB->addSuccessor(loopMBB); in EmitAtomicBinary()
12144 BB->addSuccessor(exitMBB); in EmitAtomicBinary()
12157 return TII->isSignExtended(MI.getOperand(1).getReg(), in isSignExtended()
12158 &MI.getMF()->getRegInfo()); in isSignExtended()
12218 MachineFunction *F = BB->getParent(); in EmitPartwordAtomicBinary()
12219 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitPartwordAtomicBinary()
12226 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) in EmitPartwordAtomicBinary()
12231 // If we support part-word atomic mnemonics, just use them in EmitPartwordAtomicBinary()
12237 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address in EmitPartwordAtomicBinary()
12244 const BasicBlock *LLVM_BB = BB->getBasicBlock(); in EmitPartwordAtomicBinary()
12245 MachineFunction::iterator It = ++BB->getIterator(); in EmitPartwordAtomicBinary()
12251 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitPartwordAtomicBinary()
12253 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; in EmitPartwordAtomicBinary()
12254 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitPartwordAtomicBinary()
12255 F->insert(It, loopMBB); in EmitPartwordAtomicBinary()
12257 F->insert(It, loop2MBB); in EmitPartwordAtomicBinary()
12258 F->insert(It, exitMBB); in EmitPartwordAtomicBinary()
12259 exitMBB->splice(exitMBB->begin(), BB, in EmitPartwordAtomicBinary()
12260 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitPartwordAtomicBinary()
12261 exitMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitPartwordAtomicBinary()
12286 // fallthrough --> loopMBB in EmitPartwordAtomicBinary()
12287 BB->addSuccessor(loopMBB); in EmitPartwordAtomicBinary()
12289 // The 4-byte load must be aligned, while a char or short may be in EmitPartwordAtomicBinary()
12305 // bne- loopMBB in EmitPartwordAtomicBinary()
12306 // fallthrough --> exitMBB in EmitPartwordAtomicBinary()
12311 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) in EmitPartwordAtomicBinary()
12317 // We need use 32-bit subregister to avoid mismatch register class in 64-bit in EmitPartwordAtomicBinary()
12319 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) in EmitPartwordAtomicBinary()
12325 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) in EmitPartwordAtomicBinary()
12329 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) in EmitPartwordAtomicBinary()
12334 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) in EmitPartwordAtomicBinary()
12339 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg); in EmitPartwordAtomicBinary()
12341 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); in EmitPartwordAtomicBinary()
12343 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); in EmitPartwordAtomicBinary()
12344 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) in EmitPartwordAtomicBinary()
12348 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) in EmitPartwordAtomicBinary()
12353 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) in EmitPartwordAtomicBinary()
12357 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) in EmitPartwordAtomicBinary()
12360 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) in EmitPartwordAtomicBinary()
12363 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg); in EmitPartwordAtomicBinary()
12369 BuildMI(BB, dl, TII->get(PPC::AND), SReg) in EmitPartwordAtomicBinary()
12376 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) in EmitPartwordAtomicBinary()
12380 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) in EmitPartwordAtomicBinary()
12385 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg); in EmitPartwordAtomicBinary()
12386 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitPartwordAtomicBinary()
12390 BB->addSuccessor(loop2MBB); in EmitPartwordAtomicBinary()
12391 BB->addSuccessor(exitMBB); in EmitPartwordAtomicBinary()
12394 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg); in EmitPartwordAtomicBinary()
12395 BuildMI(BB, dl, TII->get(PPC::STWCX)) in EmitPartwordAtomicBinary()
12399 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitPartwordAtomicBinary()
12403 BB->addSuccessor(loopMBB); in EmitPartwordAtomicBinary()
12404 BB->addSuccessor(exitMBB); in EmitPartwordAtomicBinary()
12411 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest) in EmitPartwordAtomicBinary()
12416 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg) in EmitPartwordAtomicBinary()
12429 MachineFunction *MF = MBB->getParent(); in emitEHSjLjSetJmp()
12430 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitEHSjLjSetJmp()
12432 const BasicBlock *BB = MBB->getBasicBlock(); in emitEHSjLjSetJmp()
12433 MachineFunction::iterator I = ++MBB->getIterator(); in emitEHSjLjSetJmp()
12437 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); in emitEHSjLjSetJmp()
12441 MVT PVT = getPointerTy(MF->getDataLayout()); in emitEHSjLjSetJmp()
12461 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); in emitEHSjLjSetJmp()
12462 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); in emitEHSjLjSetJmp()
12463 MF->insert(I, mainMBB); in emitEHSjLjSetJmp()
12464 MF->insert(I, sinkMBB); in emitEHSjLjSetJmp()
12469 sinkMBB->splice(sinkMBB->begin(), MBB, in emitEHSjLjSetJmp()
12470 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); in emitEHSjLjSetJmp()
12471 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); in emitEHSjLjSetJmp()
12495 setUsesTOCBasePtr(*MBB->getParent()); in emitEHSjLjSetJmp()
12496 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) in emitEHSjLjSetJmp()
12506 if (MF->getFunction().hasFnAttribute(Attribute::Naked)) in emitEHSjLjSetJmp()
12512 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) in emitEHSjLjSetJmp()
12519 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); in emitEHSjLjSetJmp()
12520 MIB.addRegMask(TRI->getNoPreservedMask()); in emitEHSjLjSetJmp()
12522 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); in emitEHSjLjSetJmp()
12524 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) in emitEHSjLjSetJmp()
12526 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); in emitEHSjLjSetJmp()
12528 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); in emitEHSjLjSetJmp()
12529 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); in emitEHSjLjSetJmp()
12535 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); in emitEHSjLjSetJmp()
12539 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) in emitEHSjLjSetJmp()
12544 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) in emitEHSjLjSetJmp()
12551 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); in emitEHSjLjSetJmp()
12552 mainMBB->addSuccessor(sinkMBB); in emitEHSjLjSetJmp()
12555 BuildMI(*sinkMBB, sinkMBB->begin(), DL, in emitEHSjLjSetJmp()
12556 TII->get(PPC::PHI), DstReg) in emitEHSjLjSetJmp()
12570 MachineFunction *MF = MBB->getParent(); in emitEHSjLjLongJmp()
12571 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitEHSjLjLongJmp()
12573 MVT PVT = getPointerTy(MF->getDataLayout()); in emitEHSjLjLongJmp()
12598 // Reload FP (the jumped-to function may not have had a in emitEHSjLjLongJmp()
12602 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) in emitEHSjLjLongJmp()
12606 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) in emitEHSjLjLongJmp()
12614 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) in emitEHSjLjLongJmp()
12618 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) in emitEHSjLjLongJmp()
12626 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) in emitEHSjLjLongJmp()
12630 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) in emitEHSjLjLongJmp()
12638 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) in emitEHSjLjLongJmp()
12642 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) in emitEHSjLjLongJmp()
12650 setUsesTOCBasePtr(*MBB->getParent()); in emitEHSjLjLongJmp()
12651 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) in emitEHSjLjLongJmp()
12659 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); in emitEHSjLjLongJmp()
12660 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); in emitEHSjLjLongJmp()
12668 if (MF.getFunction().hasFnAttribute("probe-stack")) in hasInlineStackProbe()
12669 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == in hasInlineStackProbe()
12670 "inline-asm"; in hasInlineStackProbe()
12676 unsigned StackAlign = TFI->getStackAlignment(); in getStackProbeSize()
12680 // stack-probe-size attribute. in getStackProbeSize()
12683 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096); in getStackProbeSize()
12685 StackProbeSize &= ~(StackAlign - 1); in getStackProbeSize()
12699 MachineFunction *MF = MBB->getParent(); in emitProbedAlloca()
12703 const BasicBlock *ProbedBB = MBB->getBasicBlock(); in emitProbedAlloca()
12704 MachineRegisterInfo &MRI = MF->getRegInfo(); in emitProbedAlloca()
12706 // +-----+ in emitProbedAlloca()
12708 // +--+--+ in emitProbedAlloca()
12710 // +----v----+ in emitProbedAlloca()
12711 // +--->+ TestMBB +---+ in emitProbedAlloca()
12712 // | +----+----+ | in emitProbedAlloca()
12714 // | +-----v----+ | in emitProbedAlloca()
12715 // +---+ BlockMBB | | in emitProbedAlloca()
12716 // +----------+ | in emitProbedAlloca()
12718 // +---------+ | in emitProbedAlloca()
12719 // | TailMBB +<--+ in emitProbedAlloca()
12720 // +---------+ in emitProbedAlloca()
12725 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB); in emitProbedAlloca()
12726 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB); in emitProbedAlloca()
12727 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB); in emitProbedAlloca()
12729 MachineFunction::iterator MBBIter = ++MBB->getIterator(); in emitProbedAlloca()
12730 MF->insert(MBBIter, TestMBB); in emitProbedAlloca()
12731 MF->insert(MBBIter, BlockMBB); in emitProbedAlloca()
12732 MF->insert(MBBIter, TailMBB); in emitProbedAlloca()
12758 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer) in emitProbedAlloca()
12765 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), in emitProbedAlloca()
12771 int64_t NegProbeSize = -(int64_t)ProbeSize; in emitProbedAlloca()
12776 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) in emitProbedAlloca()
12778 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), in emitProbedAlloca()
12783 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg) in emitProbedAlloca()
12789 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div) in emitProbedAlloca()
12793 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul) in emitProbedAlloca()
12797 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod) in emitProbedAlloca()
12800 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) in emitProbedAlloca()
12809 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult) in emitProbedAlloca()
12812 BuildMI(TestMBB, DL, TII->get(PPC::BCC)) in emitProbedAlloca()
12816 TestMBB->addSuccessor(BlockMBB); in emitProbedAlloca()
12817 TestMBB->addSuccessor(TailMBB); in emitProbedAlloca()
12823 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) in emitProbedAlloca()
12827 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB); in emitProbedAlloca()
12828 BlockMBB->addSuccessor(TestMBB); in emitProbedAlloca()
12836 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET), in emitProbedAlloca()
12840 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg) in emitProbedAlloca()
12845 TailMBB->splice(TailMBB->end(), MBB, in emitProbedAlloca()
12846 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); in emitProbedAlloca()
12847 TailMBB->transferSuccessorsAndUpdatePHIs(MBB); in emitProbedAlloca()
12848 MBB->addSuccessor(TestMBB); in emitProbedAlloca()
12925 // control-flow patterns. in EmitInstrWithCustomInserter()
12926 const BasicBlock *LLVM_BB = BB->getBasicBlock(); in EmitInstrWithCustomInserter()
12927 MachineFunction::iterator It = ++BB->getIterator(); in EmitInstrWithCustomInserter()
12929 MachineFunction *F = BB->getParent(); in EmitInstrWithCustomInserter()
12930 MachineRegisterInfo &MRI = F->getRegInfo(); in EmitInstrWithCustomInserter()
12945 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, in EmitInstrWithCustomInserter()
12957 // fallthrough --> copy0MBB in EmitInstrWithCustomInserter()
12959 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
12960 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
12962 F->insert(It, copy0MBB); in EmitInstrWithCustomInserter()
12963 F->insert(It, sinkMBB); in EmitInstrWithCustomInserter()
12967 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI); in EmitInstrWithCustomInserter()
12968 copy0MBB->setCallFrameSize(CallFrameSize); in EmitInstrWithCustomInserter()
12969 sinkMBB->setCallFrameSize(CallFrameSize); in EmitInstrWithCustomInserter()
12972 sinkMBB->splice(sinkMBB->begin(), BB, in EmitInstrWithCustomInserter()
12973 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitInstrWithCustomInserter()
12974 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitInstrWithCustomInserter()
12977 BB->addSuccessor(copy0MBB); in EmitInstrWithCustomInserter()
12978 BB->addSuccessor(sinkMBB); in EmitInstrWithCustomInserter()
12981 BuildMI(BB, dl, TII->get(PPC::BC)) in EmitInstrWithCustomInserter()
12986 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
12997 // Update machine-CFG edges in EmitInstrWithCustomInserter()
12998 BB->addSuccessor(sinkMBB); in EmitInstrWithCustomInserter()
13004 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) in EmitInstrWithCustomInserter()
13010 // To read the 64-bit time-base register on a 32-bit target, we read the in EmitInstrWithCustomInserter()
13022 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13023 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13025 F->insert(It, readMBB); in EmitInstrWithCustomInserter()
13026 F->insert(It, sinkMBB); in EmitInstrWithCustomInserter()
13029 sinkMBB->splice(sinkMBB->begin(), BB, in EmitInstrWithCustomInserter()
13030 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitInstrWithCustomInserter()
13031 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitInstrWithCustomInserter()
13033 BB->addSuccessor(readMBB); in EmitInstrWithCustomInserter()
13036 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13041 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); in EmitInstrWithCustomInserter()
13042 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); in EmitInstrWithCustomInserter()
13043 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); in EmitInstrWithCustomInserter()
13047 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) in EmitInstrWithCustomInserter()
13050 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
13055 BB->addSuccessor(readMBB); in EmitInstrWithCustomInserter()
13056 BB->addSuccessor(sinkMBB); in EmitInstrWithCustomInserter()
13187 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13196 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13197 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13198 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13199 F->insert(It, loop1MBB); in EmitInstrWithCustomInserter()
13200 F->insert(It, loop2MBB); in EmitInstrWithCustomInserter()
13201 F->insert(It, exitMBB); in EmitInstrWithCustomInserter()
13202 exitMBB->splice(exitMBB->begin(), BB, in EmitInstrWithCustomInserter()
13203 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitInstrWithCustomInserter()
13204 exitMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitInstrWithCustomInserter()
13208 // fallthrough --> loopMBB in EmitInstrWithCustomInserter()
13209 BB->addSuccessor(loop1MBB); in EmitInstrWithCustomInserter()
13214 // bne- exitBB in EmitInstrWithCustomInserter()
13217 // bne- loopMBB in EmitInstrWithCustomInserter()
13221 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB); in EmitInstrWithCustomInserter()
13222 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg) in EmitInstrWithCustomInserter()
13225 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
13229 BB->addSuccessor(loop2MBB); in EmitInstrWithCustomInserter()
13230 BB->addSuccessor(exitMBB); in EmitInstrWithCustomInserter()
13233 BuildMI(BB, dl, TII->get(StoreMnemonic)) in EmitInstrWithCustomInserter()
13237 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
13241 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); in EmitInstrWithCustomInserter()
13242 BB->addSuccessor(loop1MBB); in EmitInstrWithCustomInserter()
13243 BB->addSuccessor(exitMBB); in EmitInstrWithCustomInserter()
13250 // We must use 64-bit registers for addresses when targeting 64-bit, in EmitInstrWithCustomInserter()
13252 // can be 32-bit. in EmitInstrWithCustomInserter()
13264 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13265 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13266 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); in EmitInstrWithCustomInserter()
13267 F->insert(It, loop1MBB); in EmitInstrWithCustomInserter()
13268 F->insert(It, loop2MBB); in EmitInstrWithCustomInserter()
13269 F->insert(It, exitMBB); in EmitInstrWithCustomInserter()
13270 exitMBB->splice(exitMBB->begin(), BB, in EmitInstrWithCustomInserter()
13271 std::next(MachineBasicBlock::iterator(MI)), BB->end()); in EmitInstrWithCustomInserter()
13272 exitMBB->transferSuccessorsAndUpdatePHIs(BB); in EmitInstrWithCustomInserter()
13274 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13299 // fallthrough --> loopMBB in EmitInstrWithCustomInserter()
13300 BB->addSuccessor(loop1MBB); in EmitInstrWithCustomInserter()
13302 // The 4-byte load must be aligned, while a char or short may be in EmitInstrWithCustomInserter()
13318 // bne- exitBB in EmitInstrWithCustomInserter()
13323 // bne- loop1MBB in EmitInstrWithCustomInserter()
13329 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) in EmitInstrWithCustomInserter()
13336 // We need use 32-bit subregister to avoid mismatch register class in 64-bit in EmitInstrWithCustomInserter()
13338 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) in EmitInstrWithCustomInserter()
13344 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) in EmitInstrWithCustomInserter()
13348 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) in EmitInstrWithCustomInserter()
13353 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) in EmitInstrWithCustomInserter()
13358 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) in EmitInstrWithCustomInserter()
13361 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) in EmitInstrWithCustomInserter()
13365 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); in EmitInstrWithCustomInserter()
13367 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); in EmitInstrWithCustomInserter()
13368 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) in EmitInstrWithCustomInserter()
13372 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) in EmitInstrWithCustomInserter()
13375 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) in EmitInstrWithCustomInserter()
13378 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) in EmitInstrWithCustomInserter()
13383 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) in EmitInstrWithCustomInserter()
13386 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg) in EmitInstrWithCustomInserter()
13389 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg) in EmitInstrWithCustomInserter()
13392 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
13396 BB->addSuccessor(loop2MBB); in EmitInstrWithCustomInserter()
13397 BB->addSuccessor(exitMBB); in EmitInstrWithCustomInserter()
13400 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) in EmitInstrWithCustomInserter()
13403 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg) in EmitInstrWithCustomInserter()
13406 BuildMI(BB, dl, TII->get(PPC::STWCX)) in EmitInstrWithCustomInserter()
13410 BuildMI(BB, dl, TII->get(PPC::BCC)) in EmitInstrWithCustomInserter()
13414 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); in EmitInstrWithCustomInserter()
13415 BB->addSuccessor(loop1MBB); in EmitInstrWithCustomInserter()
13416 BB->addSuccessor(exitMBB); in EmitInstrWithCustomInserter()
13421 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) in EmitInstrWithCustomInserter()
13426 // to round-to-zero. We emit this via custom inserter since the FPSCR in EmitInstrWithCustomInserter()
13433 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13437 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); in EmitInstrWithCustomInserter()
13439 // Set rounding mode to round-to-zero. in EmitInstrWithCustomInserter()
13440 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)) in EmitInstrWithCustomInserter()
13444 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)) in EmitInstrWithCustomInserter()
13449 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) in EmitInstrWithCustomInserter()
13456 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); in EmitInstrWithCustomInserter()
13468 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13473 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest) in EmitInstrWithCustomInserter()
13476 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), in EmitInstrWithCustomInserter()
13481 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13483 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); in EmitInstrWithCustomInserter()
13484 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), in EmitInstrWithCustomInserter()
13490 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm); in EmitInstrWithCustomInserter()
13491 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), in EmitInstrWithCustomInserter()
13500 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); in EmitInstrWithCustomInserter()
13502 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); in EmitInstrWithCustomInserter()
13509 // 11 Round to -inf in EmitInstrWithCustomInserter()
13514 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0)) in EmitInstrWithCustomInserter()
13518 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0)) in EmitInstrWithCustomInserter()
13531 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg) in EmitInstrWithCustomInserter()
13536 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13552 MachineFrameInfo &MFI = F->getFrameInfo(); in EmitInstrWithCustomInserter()
13555 MachineMemOperand *MMOStore = F->getMachineMemOperand( in EmitInstrWithCustomInserter()
13561 BuildMI(*BB, MI, dl, TII->get(StoreOp)) in EmitInstrWithCustomInserter()
13567 MachineMemOperand *MMOLoad = F->getMachineMemOperand( in EmitInstrWithCustomInserter()
13575 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg) in EmitInstrWithCustomInserter()
13585 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); in EmitInstrWithCustomInserter()
13596 MachineRegisterInfo &RegInfo = F->getRegInfo(); in EmitInstrWithCustomInserter()
13607 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg); in EmitInstrWithCustomInserter()
13608 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg) in EmitInstrWithCustomInserter()
13614 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) in EmitInstrWithCustomInserter()
13625 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)) in EmitInstrWithCustomInserter()
13636 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg); in EmitInstrWithCustomInserter()
13638 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); in EmitInstrWithCustomInserter()
13642 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF)) in EmitInstrWithCustomInserter()
13655 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY)) in EmitInstrWithCustomInserter()
13658 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY)) in EmitInstrWithCustomInserter()
13668 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); in EmitInstrWithCustomInserter()
13672 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB); in EmitInstrWithCustomInserter()
13674 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ) in EmitInstrWithCustomInserter()
13675 : TII->get(PPC::STQ)) in EmitInstrWithCustomInserter()
13687 //===----------------------------------------------------------------------===//
13689 //===----------------------------------------------------------------------===//
13694 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), in getEstimateRefinementSteps()
13695 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. in getEstimateRefinementSteps()
13712 // The output register of FTSQRT is CR field. in getSqrtInputTest()
13715 // Let e_b be the unbiased exponent of the double-precision in getSqrtInputTest()
13716 // floating-point operand in register FRB. in getSqrtInputTest()
13718 // - The double-precision floating-point operand in register FRB is a zero, in getSqrtInputTest()
13720 // - e_b is less than or equal to -970. in getSqrtInputTest()
13722 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is in getSqrtInputTest()
13724 // exponent is less than -970) in getSqrtInputTest()
13755 // The Newton-Raphson computation with a single constant does not provide in getSqrtEstimate()
13779 // Note: This functionality is used only when unsafe-fp-math is enabled, and in combineRepeatedFPDivisors()
13780 // on cores with reciprocal estimates (which are used when unsafe-fp-math is in combineRepeatedFPDivisors()
13782 // combiner logic (once the division -> reciprocal/multiply transformation in combineRepeatedFPDivisors()
13807 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); in getBaseWithConstantOffset()
13821 SDValue BaseLoc = Base->getBasePtr(); in isConsecutiveLSLoc()
13826 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); in isConsecutiveLSLoc()
13827 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); in isConsecutiveLSLoc()
13859 EVT VT = LS->getMemoryVT(); in isConsecutiveLS()
13860 SDValue Loc = LS->getBasePtr(); in isConsecutiveLS()
13864 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { in isConsecutiveLS()
13866 switch (N->getConstantOperandVal(1)) { in isConsecutiveLS()
13889 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); in isConsecutiveLS()
13892 if (N->getOpcode() == ISD::INTRINSIC_VOID) { in isConsecutiveLS()
13894 switch (N->getConstantOperandVal(1)) { in isConsecutiveLS()
13921 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); in isConsecutiveLS()
13933 SDValue Chain = LD->getChain(); in findConsecutiveLoad()
13934 EVT VT = LD->getMemoryVT(); in findConsecutiveLoad()
13940 // First, search up the chain, branching to follow all token-factor operands. in findConsecutiveLoad()
13942 // nodes just above the top-level loads and token factors. in findConsecutiveLoad()
13952 if (!Visited.count(ChainLD->getChain().getNode())) in findConsecutiveLoad()
13953 Queue.push_back(ChainLD->getChain().getNode()); in findConsecutiveLoad()
13954 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { in findConsecutiveLoad()
13955 for (const SDUse &O : ChainNext->ops()) in findConsecutiveLoad()
13962 // Second, search down the chain, starting from the top-level nodes recorded in findConsecutiveLoad()
13963 // in the first phase. These top-level nodes are the nodes just above all in findConsecutiveLoad()
13982 for (SDNode *U : LoadRoot->uses()) in findConsecutiveLoad()
13984 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) || in findConsecutiveLoad()
13985 U->getOpcode() == ISD::TokenFactor) && in findConsecutiveLoad()
14000 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); in generateEquivalentSub()
14004 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), in generateEquivalentSub()
14006 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), in generateEquivalentSub()
14019 DAG.getConstant(Size - 1, DL, MVT::i32)); in generateEquivalentSub()
14032 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); in ConvertSETCCToSubtract()
14044 for (const SDNode *U : N->uses()) in ConvertSETCCToSubtract()
14045 if (U->getOpcode() != ISD::ZERO_EXTEND) in ConvertSETCCToSubtract()
14048 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); in ConvertSETCCToSubtract()
14049 auto OpSize = N->getOperand(0).getValueSizeInBits(); in ConvertSETCCToSubtract()
14077 // trunc(binary-ops(zext(x), zext(y))) in DAGCombineTruncBoolExt()
14079 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) in DAGCombineTruncBoolExt()
14085 if (N->getOpcode() == ISD::TRUNCATE && in DAGCombineTruncBoolExt()
14086 N->getValueType(0) != MVT::i1) in DAGCombineTruncBoolExt()
14089 if (N->getOperand(0).getValueType() != MVT::i32 && in DAGCombineTruncBoolExt()
14090 N->getOperand(0).getValueType() != MVT::i64) in DAGCombineTruncBoolExt()
14093 if (N->getOpcode() == ISD::SETCC || in DAGCombineTruncBoolExt()
14094 N->getOpcode() == ISD::SELECT_CC) { in DAGCombineTruncBoolExt()
14098 cast<CondCodeSDNode>(N->getOperand( in DAGCombineTruncBoolExt()
14099 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); in DAGCombineTruncBoolExt()
14100 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); in DAGCombineTruncBoolExt()
14103 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || in DAGCombineTruncBoolExt()
14104 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) in DAGCombineTruncBoolExt()
14107 if (!DAG.MaskedValueIsZero(N->getOperand(0), in DAGCombineTruncBoolExt()
14108 APInt::getHighBitsSet(OpBits, OpBits-1)) || in DAGCombineTruncBoolExt()
14109 !DAG.MaskedValueIsZero(N->getOperand(1), in DAGCombineTruncBoolExt()
14110 APInt::getHighBitsSet(OpBits, OpBits-1))) in DAGCombineTruncBoolExt()
14111 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) in DAGCombineTruncBoolExt()
14116 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0)); in DAGCombineTruncBoolExt()
14117 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); in DAGCombineTruncBoolExt()
14131 // We now know that the higher-order bits are irrelevant, we just need to in DAGCombineTruncBoolExt()
14134 if (N->getOperand(0).getOpcode() != ISD::AND && in DAGCombineTruncBoolExt()
14135 N->getOperand(0).getOpcode() != ISD::OR && in DAGCombineTruncBoolExt()
14136 N->getOperand(0).getOpcode() != ISD::XOR && in DAGCombineTruncBoolExt()
14137 N->getOperand(0).getOpcode() != ISD::SELECT && in DAGCombineTruncBoolExt()
14138 N->getOperand(0).getOpcode() != ISD::SELECT_CC && in DAGCombineTruncBoolExt()
14139 N->getOperand(0).getOpcode() != ISD::TRUNCATE && in DAGCombineTruncBoolExt()
14140 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && in DAGCombineTruncBoolExt()
14141 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && in DAGCombineTruncBoolExt()
14142 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) in DAGCombineTruncBoolExt()
14145 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && in DAGCombineTruncBoolExt()
14146 N->getOperand(1).getOpcode() != ISD::AND && in DAGCombineTruncBoolExt()
14147 N->getOperand(1).getOpcode() != ISD::OR && in DAGCombineTruncBoolExt()
14148 N->getOperand(1).getOpcode() != ISD::XOR && in DAGCombineTruncBoolExt()
14149 N->getOperand(1).getOpcode() != ISD::SELECT && in DAGCombineTruncBoolExt()
14150 N->getOperand(1).getOpcode() != ISD::SELECT_CC && in DAGCombineTruncBoolExt()
14151 N->getOperand(1).getOpcode() != ISD::TRUNCATE && in DAGCombineTruncBoolExt()
14152 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && in DAGCombineTruncBoolExt()
14153 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && in DAGCombineTruncBoolExt()
14154 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) in DAGCombineTruncBoolExt()
14162 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || in DAGCombineTruncBoolExt()
14163 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || in DAGCombineTruncBoolExt()
14164 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && in DAGCombineTruncBoolExt()
14165 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || in DAGCombineTruncBoolExt()
14166 isa<ConstantSDNode>(N->getOperand(i))) in DAGCombineTruncBoolExt()
14167 Inputs.push_back(N->getOperand(i)); in DAGCombineTruncBoolExt()
14169 BinOps.push_back(N->getOperand(i)); in DAGCombineTruncBoolExt()
14171 if (N->getOpcode() == ISD::TRUNCATE) in DAGCombineTruncBoolExt()
14216 // Make sure that this is a self-contained cluster of operations (which in DAGCombineTruncBoolExt()
14223 for (const SDNode *User : Inputs[i].getNode()->uses()) { in DAGCombineTruncBoolExt()
14227 // Make sure that we're not going to promote the non-output-value in DAGCombineTruncBoolExt()
14232 if (User->getOpcode() == ISD::SELECT) { in DAGCombineTruncBoolExt()
14233 if (User->getOperand(0) == Inputs[i]) in DAGCombineTruncBoolExt()
14235 } else if (User->getOpcode() == ISD::SELECT_CC) { in DAGCombineTruncBoolExt()
14236 if (User->getOperand(0) == Inputs[i] || in DAGCombineTruncBoolExt()
14237 User->getOperand(1) == Inputs[i]) in DAGCombineTruncBoolExt()
14244 for (const SDNode *User : PromOps[i].getNode()->uses()) { in DAGCombineTruncBoolExt()
14248 // Make sure that we're not going to promote the non-output-value in DAGCombineTruncBoolExt()
14253 if (User->getOpcode() == ISD::SELECT) { in DAGCombineTruncBoolExt()
14254 if (User->getOperand(0) == PromOps[i]) in DAGCombineTruncBoolExt()
14256 } else if (User->getOpcode() == ISD::SELECT_CC) { in DAGCombineTruncBoolExt()
14257 if (User->getOperand(0) == PromOps[i] || in DAGCombineTruncBoolExt()
14258 User->getOperand(1) == PromOps[i]) in DAGCombineTruncBoolExt()
14266 // Constants may have users outside the cluster of to-be-promoted nodes, in DAGCombineTruncBoolExt()
14317 // The to-be-promoted operands of this node have not yet been in DAGCombineTruncBoolExt()
14320 // this cluster of to-be-promoted nodes, it is possible). in DAGCombineTruncBoolExt()
14325 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), in DAGCombineTruncBoolExt()
14326 PromOp.getNode()->op_end()); in DAGCombineTruncBoolExt()
14338 if (N->getOpcode() == ISD::TRUNCATE) in DAGCombineTruncBoolExt()
14339 return N->getOperand(0); in DAGCombineTruncBoolExt()
14352 // zext(binary-ops(trunc(x), trunc(y))) in DAGCombineExtBoolTrunc()
14354 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) in DAGCombineExtBoolTrunc()
14361 // 32-to-64-bit extensions; these occur often when 32-bit values are used as in DAGCombineExtBoolTrunc()
14365 if (N->getValueType(0) != MVT::i32 && in DAGCombineExtBoolTrunc()
14366 N->getValueType(0) != MVT::i64) in DAGCombineExtBoolTrunc()
14369 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || in DAGCombineExtBoolTrunc()
14370 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) in DAGCombineExtBoolTrunc()
14373 if (N->getOperand(0).getOpcode() != ISD::AND && in DAGCombineExtBoolTrunc()
14374 N->getOperand(0).getOpcode() != ISD::OR && in DAGCombineExtBoolTrunc()
14375 N->getOperand(0).getOpcode() != ISD::XOR && in DAGCombineExtBoolTrunc()
14376 N->getOperand(0).getOpcode() != ISD::SELECT && in DAGCombineExtBoolTrunc()
14377 N->getOperand(0).getOpcode() != ISD::SELECT_CC) in DAGCombineExtBoolTrunc()
14381 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; in DAGCombineExtBoolTrunc()
14419 // promoted because the operand is actually part of the to-be-promoted set. in DAGCombineExtBoolTrunc()
14422 // Make sure that this is a self-contained cluster of operations (which in DAGCombineExtBoolTrunc()
14429 for (SDNode *User : Inputs[i].getNode()->uses()) { in DAGCombineExtBoolTrunc()
14433 // If we're going to promote the non-output-value operand(s) or SELECT or in DAGCombineExtBoolTrunc()
14435 if (User->getOpcode() == ISD::SELECT) { in DAGCombineExtBoolTrunc()
14436 if (User->getOperand(0) == Inputs[i]) in DAGCombineExtBoolTrunc()
14438 User->getOperand(0).getValueType())); in DAGCombineExtBoolTrunc()
14439 } else if (User->getOpcode() == ISD::SELECT_CC) { in DAGCombineExtBoolTrunc()
14440 if (User->getOperand(0) == Inputs[i]) in DAGCombineExtBoolTrunc()
14442 User->getOperand(0).getValueType())); in DAGCombineExtBoolTrunc()
14443 if (User->getOperand(1) == Inputs[i]) in DAGCombineExtBoolTrunc()
14445 User->getOperand(1).getValueType())); in DAGCombineExtBoolTrunc()
14451 for (SDNode *User : PromOps[i].getNode()->uses()) { in DAGCombineExtBoolTrunc()
14455 // If we're going to promote the non-output-value operand(s) or SELECT or in DAGCombineExtBoolTrunc()
14457 if (User->getOpcode() == ISD::SELECT) { in DAGCombineExtBoolTrunc()
14458 if (User->getOperand(0) == PromOps[i]) in DAGCombineExtBoolTrunc()
14460 User->getOperand(0).getValueType())); in DAGCombineExtBoolTrunc()
14461 } else if (User->getOpcode() == ISD::SELECT_CC) { in DAGCombineExtBoolTrunc()
14462 if (User->getOperand(0) == PromOps[i]) in DAGCombineExtBoolTrunc()
14464 User->getOperand(0).getValueType())); in DAGCombineExtBoolTrunc()
14465 if (User->getOperand(1) == PromOps[i]) in DAGCombineExtBoolTrunc()
14467 User->getOperand(1).getValueType())); in DAGCombineExtBoolTrunc()
14472 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); in DAGCombineExtBoolTrunc()
14474 if (N->getOpcode() != ISD::ANY_EXTEND) { in DAGCombineExtBoolTrunc()
14485 if ((N->getOpcode() == ISD::ZERO_EXTEND && in DAGCombineExtBoolTrunc()
14488 OpBits-PromBits))) || in DAGCombineExtBoolTrunc()
14489 (N->getOpcode() == ISD::SIGN_EXTEND && in DAGCombineExtBoolTrunc()
14491 (OpBits-(PromBits-1)))) { in DAGCombineExtBoolTrunc()
14499 // truncation or extension to the final output type. in DAGCombineExtBoolTrunc()
14501 // Constant inputs need to be replaced with the to-be-promoted nodes that in DAGCombineExtBoolTrunc()
14508 if (Inputs[i].getValueType() == N->getValueType(0)) in DAGCombineExtBoolTrunc()
14510 else if (N->getOpcode() == ISD::SIGN_EXTEND) in DAGCombineExtBoolTrunc()
14512 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); in DAGCombineExtBoolTrunc()
14513 else if (N->getOpcode() == ISD::ZERO_EXTEND) in DAGCombineExtBoolTrunc()
14515 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); in DAGCombineExtBoolTrunc()
14518 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); in DAGCombineExtBoolTrunc()
14541 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || in DAGCombineExtBoolTrunc()
14543 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { in DAGCombineExtBoolTrunc()
14544 // The to-be-promoted operands of this node have not yet been in DAGCombineExtBoolTrunc()
14547 // this cluster of to-be-promoted nodes, it is possible). in DAGCombineExtBoolTrunc()
14553 // to-be-promoted comparison inputs. in DAGCombineExtBoolTrunc()
14557 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || in DAGCombineExtBoolTrunc()
14559 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { in DAGCombineExtBoolTrunc()
14565 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), in DAGCombineExtBoolTrunc()
14566 PromOp.getNode()->op_end()); in DAGCombineExtBoolTrunc()
14572 if (Ops[C+i].getValueType() == N->getValueType(0)) in DAGCombineExtBoolTrunc()
14575 if (N->getOpcode() == ISD::SIGN_EXTEND) in DAGCombineExtBoolTrunc()
14576 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); in DAGCombineExtBoolTrunc()
14577 else if (N->getOpcode() == ISD::ZERO_EXTEND) in DAGCombineExtBoolTrunc()
14578 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); in DAGCombineExtBoolTrunc()
14580 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); in DAGCombineExtBoolTrunc()
14589 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); in DAGCombineExtBoolTrunc()
14592 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); in DAGCombineExtBoolTrunc()
14596 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); in DAGCombineExtBoolTrunc()
14601 return N->getOperand(0); in DAGCombineExtBoolTrunc()
14605 if (N->getOpcode() == ISD::ZERO_EXTEND) in DAGCombineExtBoolTrunc()
14606 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), in DAGCombineExtBoolTrunc()
14608 N->getValueSizeInBits(0), PromBits), in DAGCombineExtBoolTrunc()
14609 dl, N->getValueType(0))); in DAGCombineExtBoolTrunc()
14611 assert(N->getOpcode() == ISD::SIGN_EXTEND && in DAGCombineExtBoolTrunc()
14613 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); in DAGCombineExtBoolTrunc()
14615 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); in DAGCombineExtBoolTrunc()
14617 ISD::SRA, dl, N->getValueType(0), in DAGCombineExtBoolTrunc()
14618 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), in DAGCombineExtBoolTrunc()
14624 assert(N->getOpcode() == ISD::SETCC && in combineSetCC()
14627 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); in combineSetCC()
14629 SDValue LHS = N->getOperand(0); in combineSetCC()
14630 SDValue RHS = N->getOperand(1); in combineSetCC()
14632 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. in combineSetCC()
14637 // x == 0-y --> x+y == 0 in combineSetCC()
14638 // x != 0-y --> x+y != 0 in combineSetCC()
14643 EVT VT = N->getValueType(0); in combineSetCC()
14656 return LD->getExtensionType() == ISD::EXTLOAD && in isFPExtLoad()
14661 /// Reduces the number of fp-to-int conversion when building a vector.
14671 assert(N->getOpcode() == ISD::BUILD_VECTOR && in combineElementTruncationToVectorTruncation()
14677 SDValue FirstInput = N->getOperand(0); in combineElementTruncationToVectorTruncation()
14679 "The input operand must be an fp-to-int conversion."); in combineElementTruncationToVectorTruncation()
14693 EVT TargetVT = N->getValueType(0); in combineElementTruncationToVectorTruncation()
14694 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { in combineElementTruncationToVectorTruncation()
14695 SDValue NextOp = N->getOperand(i); in combineElementTruncationToVectorTruncation()
14701 // If we are converting to 32-bit integers, we need to add an FP_ROUND. in combineElementTruncationToVectorTruncation()
14707 if (N->getOperand(i) != FirstInput) in combineElementTruncationToVectorTruncation()
14711 // If this is a splat, we leave it as-is since there will be only a single in combineElementTruncationToVectorTruncation()
14712 // fp-to-int conversion followed by a splat of the integer. This is better in combineElementTruncationToVectorTruncation()
14713 // for 32-bit and smaller ints and neutral for 64-bit ints. in combineElementTruncationToVectorTruncation()
14718 for (int i = 0, e = N->getNumOperands(); i < e; ++i) { in combineElementTruncationToVectorTruncation()
14719 SDValue In = N->getOperand(i).getOperand(0); in combineElementTruncationToVectorTruncation()
14721 // For 32-bit values, we need to add an FP_ROUND node (if we made it in combineElementTruncationToVectorTruncation()
14756 assert(N->getOpcode() == ISD::BUILD_VECTOR && in combineBVOfConsecutiveLoads()
14761 // Return early for non byte-sized type, as they can't be consecutive. in combineBVOfConsecutiveLoads()
14762 if (!N->getValueType(0).getVectorElementType().isByteSized()) in combineBVOfConsecutiveLoads()
14767 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize(); in combineBVOfConsecutiveLoads()
14768 SDValue FirstInput = N->getOperand(0); in combineBVOfConsecutiveLoads()
14775 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD; in combineBVOfConsecutiveLoads()
14779 N->getNumOperands() == 1) in combineBVOfConsecutiveLoads()
14787 for (int i = 1, e = N->getNumOperands(); i < e; ++i) { in combineBVOfConsecutiveLoads()
14789 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) in combineBVOfConsecutiveLoads()
14792 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : in combineBVOfConsecutiveLoads()
14793 N->getOperand(i); in combineBVOfConsecutiveLoads()
14798 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); in combineBVOfConsecutiveLoads()
14803 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) in combineBVOfConsecutiveLoads()
14806 // We only care about regular loads. The PPC-specific load intrinsics in combineBVOfConsecutiveLoads()
14826 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(), in combineBVOfConsecutiveLoads()
14827 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), in combineBVOfConsecutiveLoads()
14828 FirstLoad->getAlign()); in combineBVOfConsecutiveLoads()
14833 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(), in combineBVOfConsecutiveLoads()
14834 LastLoad->getBasePtr(), LastLoad->getPointerInfo(), in combineBVOfConsecutiveLoads()
14835 LastLoad->getAlign()); in combineBVOfConsecutiveLoads()
14837 for (int i = N->getNumOperands() - 1; i >= 0; i--) in combineBVOfConsecutiveLoads()
14840 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad, in combineBVOfConsecutiveLoads()
14841 DAG.getUNDEF(N->getValueType(0)), Ops); in combineBVOfConsecutiveLoads()
14859 SmallVector<int, 16> ShuffleMask(NumElems, -1); in addShuffleForVecExtend()
14864 for (unsigned i = 0; i < N->getNumOperands(); i++) { in addShuffleForVecExtend()
14877 EVT VT = N->getValueType(0); in addShuffleForVecExtend()
14897 // For example: 0x3074B8FC byte->word in combineBVOfVecSExt()
14900 // For example: 0x000070F8 byte->double word in combineBVOfVecSExt()
14904 0x3074B8FC, // b->w in combineBVOfVecSExt()
14905 0x000070F8, // b->d in combineBVOfVecSExt()
14906 0x10325476, // h->w in combineBVOfVecSExt()
14907 0x00003074, // h->d in combineBVOfVecSExt()
14908 0x00001032, // w->d in combineBVOfVecSExt()
14915 auto isSExtOfVecExtract = [&](SDValue Op) -> bool { in combineBVOfVecSExt()
14934 Index = ExtOp->getZExtValue(); in combineBVOfVecSExt()
14950 for (unsigned i = 0; i < N->getNumOperands(); i++) { in combineBVOfVecSExt()
14951 if (!isSExtOfVecExtract(N->getOperand(i))) { in combineBVOfVecSExt()
14960 int OutputSize = N->getValueType(0).getScalarSizeInBits(); in combineBVOfVecSExt()
14994 if (N->getValueType(0) != MVT::v1i128) in combineBVZEXTLOAD()
14997 SDValue Operand = N->getOperand(0); in combineBVZEXTLOAD()
15004 EVT MemoryType = LD->getMemoryVT(); in combineBVZEXTLOAD()
15013 (LD->getExtensionType() != ISD::ZEXTLOAD && in combineBVZEXTLOAD()
15014 LD->getExtensionType() != ISD::EXTLOAD)) in combineBVZEXTLOAD()
15018 LD->getChain(), LD->getBasePtr(), in combineBVZEXTLOAD()
15023 LoadOps, MemoryType, LD->getMemOperand()); in combineBVZEXTLOAD()
15028 assert(N->getOpcode() == ISD::BUILD_VECTOR && in DAGCombineBuildVector()
15038 // float-to-int conversions intact. We can generate MUCH better code for in DAGCombineBuildVector()
15039 // a float-to-int conversion of a vector of floats. in DAGCombineBuildVector()
15040 SDValue FirstInput = N->getOperand(0); in DAGCombineBuildVector()
15072 if (N->getValueType(0) != MVT::v2f64) in DAGCombineBuildVector()
15080 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && in DAGCombineBuildVector()
15081 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) in DAGCombineBuildVector()
15083 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) in DAGCombineBuildVector()
15087 SDValue Ext2 = N->getOperand(1).getOperand(0); in DAGCombineBuildVector()
15100 int FirstElem = Ext1Op->getZExtValue(); in DAGCombineBuildVector()
15101 int SecondElem = Ext2Op->getZExtValue(); in DAGCombineBuildVector()
15111 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? in DAGCombineBuildVector()
15119 assert((N->getOpcode() == ISD::SINT_TO_FP || in combineFPToIntToFP()
15120 N->getOpcode() == ISD::UINT_TO_FP) && in combineFPToIntToFP()
15121 "Need an int -> FP conversion node here"); in combineFPToIntToFP()
15130 // Don't handle ppc_fp128 here or conversions that are out-of-range capable in combineFPToIntToFP()
15145 bool Signed = N->getOpcode() == ISD::SINT_TO_FP; in combineFPToIntToFP()
15154 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; in combineFPToIntToFP()
15157 Ops, MVT::i8, LDN->getMemOperand()); in combineFPToIntToFP()
15160 // For signed conversion, we need to sign-extend the value in the VSR in combineFPToIntToFP()
15172 // scalar instructions, we have no method for zero- or sign-extending the in combineFPToIntToFP()
15180 // If we have FCFIDS, then use it when converting to single-precision. in combineFPToIntToFP()
15181 // Otherwise, convert to double-precision and then round. in combineFPToIntToFP()
15224 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15239 switch (N->getOpcode()) { in expandVSXLoadForLE()
15244 Chain = LD->getChain(); in expandVSXLoadForLE()
15245 Base = LD->getBasePtr(); in expandVSXLoadForLE()
15246 MMO = LD->getMemOperand(); in expandVSXLoadForLE()
15248 // things alone. For a built-in, we have to make the change for in expandVSXLoadForLE()
15250 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16) in expandVSXLoadForLE()
15256 Chain = Intrin->getChain(); in expandVSXLoadForLE()
15257 // Similarly to the store case below, Intrin->getBasePtr() doesn't get in expandVSXLoadForLE()
15259 Base = Intrin->getOperand(2); in expandVSXLoadForLE()
15260 MMO = Intrin->getMemOperand(); in expandVSXLoadForLE()
15265 MVT VecTy = N->getValueType(0).getSimpleVT(); in expandVSXLoadForLE()
15290 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15306 switch (N->getOpcode()) { in expandVSXStoreForLE()
15311 Chain = ST->getChain(); in expandVSXStoreForLE()
15312 Base = ST->getBasePtr(); in expandVSXStoreForLE()
15313 MMO = ST->getMemOperand(); in expandVSXStoreForLE()
15316 // things alone. For a built-in, we have to make the change for in expandVSXStoreForLE()
15318 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16) in expandVSXStoreForLE()
15324 Chain = Intrin->getChain(); in expandVSXStoreForLE()
15325 // Intrin->getBasePtr() oddly does not get what we want. in expandVSXStoreForLE()
15326 Base = Intrin->getOperand(3); in expandVSXStoreForLE()
15327 MMO = Intrin->getMemOperand(); in expandVSXStoreForLE()
15333 SDValue Src = N->getOperand(SrcOpnd); in expandVSXStoreForLE()
15359 unsigned Opcode = N->getOperand(1).getOpcode(); in combineStoreFPToInt()
15361 bool Strict = N->getOperand(1)->isStrictFPOpcode(); in combineStoreFPToInt()
15367 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0); in combineStoreFPToInt()
15368 EVT Op1VT = N->getOperand(1).getValueType(); in combineStoreFPToInt()
15384 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) in combineStoreFPToInt()
15387 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget); in combineStoreFPToInt()
15391 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2), in combineStoreFPToInt()
15397 cast<StoreSDNode>(N)->getMemoryVT(), in combineStoreFPToInt()
15398 cast<StoreSDNode>(N)->getMemOperand()); in combineStoreFPToInt()
15405 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts). in isAlternatingShuffMask()
15422 // Find first non-undef input. in isSplatBV()
15429 // All inputs are undef or the same as the first non-undef input. in isSplatBV()
15464 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth; in fixupShuffleMaskForPermutedSToV()
15485 // Can't handle non-const element indices or different vector types in getSToVPermuted()
15486 // for the input to the extract and the output of the scalar_to_vector. in getSToVPermuted()
15492 SmallVector<int, 16> NewMask(NumElts, -1); in getSToVPermuted()
15494 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1; in getSToVPermuted()
15495 NewMask[ResultInElt] = Idx->getZExtValue(); in getSToVPermuted()
15517 SDValue LHS = SVN->getOperand(0); in combineVectorShuffle()
15518 SDValue RHS = SVN->getOperand(1); in combineVectorShuffle()
15519 auto Mask = SVN->getMask(); in combineVectorShuffle()
15540 Mask = cast<ShuffleVectorSDNode>(Res)->getMask(); in combineVectorShuffle()
15576 int LHSMaxIdx = -1; in combineVectorShuffle()
15577 int RHSMinIdx = -1; in combineVectorShuffle()
15578 int RHSMaxIdx = -1; in combineVectorShuffle()
15584 // than 64 bits since for 64-bit elements, all instructions already put in combineVectorShuffle()
15615 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); in combineVectorShuffle()
15621 Mask = cast<ShuffleVectorSDNode>(Res)->getMask(); in combineVectorShuffle()
15647 // If element from non-splat is undef, pick first element from splat. in combineVectorShuffle()
15648 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts; in combineVectorShuffle()
15656 // If element from non-splat is undef, pick first element from splat. in combineVectorShuffle()
15666 // If element from non-splat is undef, pick first element from splat. in combineVectorShuffle()
15667 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0; in combineVectorShuffle()
15675 // If element from non-splat is undef, pick first element from splat. in combineVectorShuffle()
15676 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0; in combineVectorShuffle()
15683 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue(); in combineVectorShuffle()
15690 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); in combineVectorShuffle()
15699 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { in combineVReverseMemOP()
15700 auto Mask = SVN->getMask(); in combineVReverseMemOP()
15714 EVT VT = SVN->getValueType(0); in combineVReverseMemOP()
15728 if (LSBase->getOpcode() == ISD::LOAD) { in combineVReverseMemOP()
15732 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end(); in combineVReverseMemOP()
15734 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE) in combineVReverseMemOP()
15738 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; in combineVReverseMemOP()
15741 LSBase->getMemoryVT(), LSBase->getMemOperand()); in combineVReverseMemOP()
15744 if (LSBase->getOpcode() == ISD::STORE) { in combineVReverseMemOP()
15746 // Forcing the use of an X-Form (since swapped stores only have in combineVReverseMemOP()
15747 // X-Forms) without removing the swap is unprofitable. in combineVReverseMemOP()
15748 if (!SVN->hasOneUse()) in combineVReverseMemOP()
15752 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), in combineVReverseMemOP()
15753 LSBase->getBasePtr()}; in combineVReverseMemOP()
15756 LSBase->getMemoryVT(), LSBase->getMemOperand()); in combineVReverseMemOP()
15781 switch (N->getOpcode()) { in PerformDAGCombine()
15789 SDValue Op1 = N->getOperand(0); in PerformDAGCombine()
15790 SDValue Op2 = N->getOperand(1); in PerformDAGCombine()
15793 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 || in PerformDAGCombine()
15801 uint64_t Imm = Op2->getAsZExtVal(); in PerformDAGCombine()
15807 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0)); in PerformDAGCombine()
15821 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. in PerformDAGCombine()
15822 return N->getOperand(0); in PerformDAGCombine()
15825 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. in PerformDAGCombine()
15826 return N->getOperand(0); in PerformDAGCombine()
15829 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { in PerformDAGCombine()
15830 if (C->isZero() || // 0 >>s V -> 0. in PerformDAGCombine()
15831 C->isAllOnes()) // -1 >>s V -> -1. in PerformDAGCombine()
15832 return N->getOperand(0); in PerformDAGCombine()
15851 if (ISD::isNormalLoad(N->getOperand(0).getNode())) { in PerformDAGCombine()
15852 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0)); in PerformDAGCombine()
15858 EVT Op1VT = N->getOperand(1).getValueType(); in PerformDAGCombine()
15859 unsigned Opcode = N->getOperand(1).getOpcode(); in PerformDAGCombine()
15869 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1)); in PerformDAGCombine()
15875 // Turn STORE (BSWAP) -> sthbrx/stwbrx. in PerformDAGCombine()
15876 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP && in PerformDAGCombine()
15877 N->getOperand(1).getNode()->hasOneUse() && in PerformDAGCombine()
15882 // two bytes in byte-reversed order. in PerformDAGCombine()
15883 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); in PerformDAGCombine()
15887 SDValue BSwapOp = N->getOperand(1).getOperand(0); in PerformDAGCombine()
15888 // Do an any-extend to 32-bits if this is a half-word input. in PerformDAGCombine()
15895 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); in PerformDAGCombine()
15904 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) in PerformDAGCombine()
15908 Ops, cast<StoreSDNode>(N)->getMemoryVT(), in PerformDAGCombine()
15909 cast<StoreSDNode>(N)->getMemOperand()); in PerformDAGCombine()
15912 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0> in PerformDAGCombine()
15915 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) { in PerformDAGCombine()
15916 // Need to sign-extended to 64-bits to handle negative values. in PerformDAGCombine()
15917 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT(); in PerformDAGCombine()
15918 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), in PerformDAGCombine()
15925 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2), in PerformDAGCombine()
15926 N->getOperand(3)); in PerformDAGCombine()
15927 cast<StoreSDNode>(N)->setTruncatingStore(true); in PerformDAGCombine()
15932 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. in PerformDAGCombine()
15944 EVT VT = LD->getValueType(0); in PerformDAGCombine()
15947 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. in PerformDAGCombine()
15956 // We sometimes end up with a 64-bit integer load, from which we extract in PerformDAGCombine()
15957 // two single-precision floating-point numbers. This happens with in PerformDAGCombine()
15961 // floating-point numbers turn into store/load pairs. Even with direct moves, in PerformDAGCombine()
15962 // just loading the two floating-point numbers is likely better. in PerformDAGCombine()
15967 if (LD->getExtensionType() != ISD::NON_EXTLOAD || in PerformDAGCombine()
15968 LD->isVolatile()) in PerformDAGCombine()
15979 if (!LD->hasNUsesOfValue(2, 0)) in PerformDAGCombine()
15982 auto UI = LD->use_begin(); in PerformDAGCombine()
15987 if (Trunc->getOpcode() != ISD::TRUNCATE) in PerformDAGCombine()
15990 if (Trunc->getOpcode() != ISD::TRUNCATE || in PerformDAGCombine()
15991 Trunc->getValueType(0) != MVT::i32 || in PerformDAGCombine()
15992 !Trunc->hasOneUse()) in PerformDAGCombine()
15994 if (RightShift->getOpcode() != ISD::SRL || in PerformDAGCombine()
15995 !isa<ConstantSDNode>(RightShift->getOperand(1)) || in PerformDAGCombine()
15996 RightShift->getConstantOperandVal(1) != 32 || in PerformDAGCombine()
15997 !RightShift->hasOneUse()) in PerformDAGCombine()
16000 SDNode *Trunc2 = *RightShift->use_begin(); in PerformDAGCombine()
16001 if (Trunc2->getOpcode() != ISD::TRUNCATE || in PerformDAGCombine()
16002 Trunc2->getValueType(0) != MVT::i32 || in PerformDAGCombine()
16003 !Trunc2->hasOneUse()) in PerformDAGCombine()
16006 SDNode *Bitcast = *Trunc->use_begin(); in PerformDAGCombine()
16007 SDNode *Bitcast2 = *Trunc2->use_begin(); in PerformDAGCombine()
16009 if (Bitcast->getOpcode() != ISD::BITCAST || in PerformDAGCombine()
16010 Bitcast->getValueType(0) != MVT::f32) in PerformDAGCombine()
16012 if (Bitcast2->getOpcode() != ISD::BITCAST || in PerformDAGCombine()
16013 Bitcast2->getValueType(0) != MVT::f32) in PerformDAGCombine()
16019 // Bitcast has the second float (in memory-layout order) and Bitcast2 in PerformDAGCombine()
16022 SDValue BasePtr = LD->getBasePtr(); in PerformDAGCombine()
16023 if (LD->isIndexed()) { in PerformDAGCombine()
16024 assert(LD->getAddressingMode() == ISD::PRE_INC && in PerformDAGCombine()
16025 "Non-pre-inc AM on PPC?"); in PerformDAGCombine()
16028 LD->getOffset()); in PerformDAGCombine()
16032 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; in PerformDAGCombine()
16033 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, in PerformDAGCombine()
16034 LD->getPointerInfo(), LD->getAlign(), in PerformDAGCombine()
16035 MMOFlags, LD->getAAInfo()); in PerformDAGCombine()
16041 LD->getPointerInfo().getWithOffset(4), in PerformDAGCombine()
16042 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo()); in PerformDAGCombine()
16044 if (LD->isIndexed()) { in PerformDAGCombine()
16045 // Note that DAGCombine should re-form any pre-increment load(s) from in PerformDAGCombine()
16053 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), in PerformDAGCombine()
16061 EVT MemVT = LD->getMemoryVT(); in PerformDAGCombine()
16064 if (LD->isUnindexed() && VT.isVector() && in PerformDAGCombine()
16070 LD->getAlign() < ABIAlignment) { in PerformDAGCombine()
16071 // This is a type-legal unaligned Altivec load. in PerformDAGCombine()
16072 SDValue Chain = LD->getChain(); in PerformDAGCombine()
16073 SDValue Ptr = LD->getBasePtr(); in PerformDAGCombine()
16082 // loads into an alignment-based permutation-control instruction (lvsl in PerformDAGCombine()
16088 // last requested load), but rather sizeof(vector) - 1 bytes after the in PerformDAGCombine()
16115 // reading up to (sizeof(vector)-1) bytes below the address of the in PerformDAGCombine()
16119 MF.getMachineMemOperand(LD->getMemOperand(), in PerformDAGCombine()
16120 -(int64_t)MemVT.getStoreSize()+1, in PerformDAGCombine()
16121 2*MemVT.getStoreSize()-1); in PerformDAGCombine()
16147 --IncValue; in PerformDAGCombine()
16154 MF.getMachineMemOperand(LD->getMemOperand(), in PerformDAGCombine()
16155 1, 2*MemVT.getStoreSize()-1); in PerformDAGCombine()
16165 // Because vperm has a big-endian bias, we must reverse the order in PerformDAGCombine()
16186 // The output of the permutation is our loaded result, the TokenFactor is in PerformDAGCombine()
16195 unsigned IID = N->getConstantOperandVal(0); in PerformDAGCombine()
16198 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { in PerformDAGCombine()
16199 SDValue Add = N->getOperand(1); in PerformDAGCombine()
16203 if (DAG.MaskedValueIsZero(Add->getOperand(1), in PerformDAGCombine()
16206 SDNode *BasePtr = Add->getOperand(0).getNode(); in PerformDAGCombine()
16207 for (SDNode *U : BasePtr->uses()) { in PerformDAGCombine()
16208 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN && in PerformDAGCombine()
16209 U->getConstantOperandVal(0) == IID) { in PerformDAGCombine()
16219 if (isa<ConstantSDNode>(Add->getOperand(1))) { in PerformDAGCombine()
16220 SDNode *BasePtr = Add->getOperand(0).getNode(); in PerformDAGCombine()
16221 for (SDNode *U : BasePtr->uses()) { in PerformDAGCombine()
16222 if (U->getOpcode() == ISD::ADD && in PerformDAGCombine()
16223 isa<ConstantSDNode>(U->getOperand(1)) && in PerformDAGCombine()
16224 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) % in PerformDAGCombine()
16228 for (SDNode *V : OtherAdd->uses()) { in PerformDAGCombine()
16229 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN && in PerformDAGCombine()
16230 V->getConstantOperandVal(0) == IID) { in PerformDAGCombine()
16245 SDValue V1 = N->getOperand(1); in PerformDAGCombine()
16246 SDValue V2 = N->getOperand(2); in PerformDAGCombine()
16251 // (0-a, a) in PerformDAGCombine()
16257 // (a, 0-a) in PerformDAGCombine()
16263 // (x-y, y-x) in PerformDAGCombine()
16275 switch (N->getConstantOperandVal(1)) { in PerformDAGCombine()
16281 // These sum-across intrinsics only have a chain due to the side effect in PerformDAGCombine()
16286 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) { in PerformDAGCombine()
16290 bool BVNIsConstantSplat = BVN->isConstantSplat( in PerformDAGCombine()
16295 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0)); in PerformDAGCombine()
16302 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. in PerformDAGCombine()
16310 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. in PerformDAGCombine()
16312 switch (N->getConstantOperandVal(1)) { in PerformDAGCombine()
16322 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. in PerformDAGCombine()
16324 // expansion even for 64-bit BSWAP (LOAD). in PerformDAGCombine()
16326 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64; in PerformDAGCombine()
16327 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) && in PerformDAGCombine()
16328 N->getOperand(0).hasOneUse(); in PerformDAGCombine()
16330 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || in PerformDAGCombine()
16332 SDValue Load = N->getOperand(0); in PerformDAGCombine()
16334 // Create the byte-swapping load. in PerformDAGCombine()
16336 LD->getChain(), // Chain in PerformDAGCombine()
16337 LD->getBasePtr(), // Ptr in PerformDAGCombine()
16338 DAG.getValueType(N->getValueType(0)) // VT in PerformDAGCombine()
16342 DAG.getVTList(N->getValueType(0) == MVT::i64 ? in PerformDAGCombine()
16344 Ops, LD->getMemoryVT(), LD->getMemOperand()); in PerformDAGCombine()
16348 if (N->getValueType(0) == MVT::i16) in PerformDAGCombine()
16362 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only in PerformDAGCombine()
16367 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0)); in PerformDAGCombine()
16370 if (!LD->isSimple()) in PerformDAGCombine()
16372 SDValue BasePtr = LD->getBasePtr(); in PerformDAGCombine()
16373 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, in PerformDAGCombine()
16374 LD->getPointerInfo(), LD->getAlign()); in PerformDAGCombine()
16379 LD->getMemOperand(), 4, 4); in PerformDAGCombine()
16380 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO); in PerformDAGCombine()
16396 // and a normal output). in PerformDAGCombine()
16398 if (!N->getOperand(0).hasOneUse() && in PerformDAGCombine()
16399 !N->getOperand(1).hasOneUse() && in PerformDAGCombine()
16400 !N->getOperand(2).hasOneUse()) { in PerformDAGCombine()
16405 SDNode *LHSN = N->getOperand(0).getNode(); in PerformDAGCombine()
16406 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); in PerformDAGCombine()
16408 if (UI->getOpcode() == PPCISD::VCMP_rec && in PerformDAGCombine()
16409 UI->getOperand(1) == N->getOperand(1) && in PerformDAGCombine()
16410 UI->getOperand(2) == N->getOperand(2) && in PerformDAGCombine()
16411 UI->getOperand(0) == N->getOperand(0)) { in PerformDAGCombine()
16418 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1)) in PerformDAGCombine()
16425 for (SDNode::use_iterator UI = VCMPrecNode->use_begin(); in PerformDAGCombine()
16427 assert(UI != VCMPrecNode->use_end() && "Didn't find user!"); in PerformDAGCombine()
16429 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { in PerformDAGCombine()
16430 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) { in PerformDAGCombine()
16439 if (FlagUser->getOpcode() == PPCISD::MFOCRF) in PerformDAGCombine()
16446 // lowering is done pre-legalize, because the legalizer lowers the predicate in PerformDAGCombine()
16450 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); in PerformDAGCombine()
16451 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); in PerformDAGCombine()
16459 // Since we are doing this pre-legalize, the RHS can be a constant of in PerformDAGCombine()
16462 auto RHSAPInt = RHS->getAsAPIntVal(); in PerformDAGCombine()
16472 return N->getOperand(0); in PerformDAGCombine()
16475 N->getOperand(0), N->getOperand(4)); in PerformDAGCombine()
16501 MemNode->getMemoryVT(), MemNode->getMemOperand()); in PerformDAGCombine()
16505 if (N->getOperand(0) == LHS.getValue(1)) in PerformDAGCombine()
16507 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) { in PerformDAGCombine()
16509 SDValue InTF = N->getOperand(0); in PerformDAGCombine()
16518 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4), in PerformDAGCombine()
16557 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), in PerformDAGCombine()
16560 N->getOperand(4), CompNode.getValue(1)); in PerformDAGCombine()
16576 EVT VT = N->getValueType(0); in BuildSDIVPow2()
16584 SDValue N0 = N->getOperand(0); in BuildSDIVPow2()
16587 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero(); in BuildSDIVPow2()
16601 //===----------------------------------------------------------------------===//
16603 //===----------------------------------------------------------------------===//
16615 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) in computeKnownBitsForTargetNode()
16679 // If the nested loop is an innermost loop, prefer to a 32-byte alignment, in getPrefLoopAlignment()
16680 // so that we can decrease cache misses and branch-prediction misses. in getPrefLoopAlignment()
16683 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) in getPrefLoopAlignment()
16689 // For small loops (between 5 and 8 instructions), align to a 32-byte in getPrefLoopAlignment()
16690 // boundary so that the entire loop fits in one instruction-cache line. in getPrefLoopAlignment()
16692 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) in getPrefLoopAlignment()
16694 LoopSize += TII->getInstSizeInBytes(J); in getPrefLoopAlignment()
16709 /// getConstraintType - Given a constraint, return the type of
16754 Type *type = CallOperandVal->getType(); in getSingleConstraintMatchWeight()
16757 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) in getSingleConstraintMatchWeight()
16762 type->isVectorTy()) in getSingleConstraintMatchWeight()
16764 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) in getSingleConstraintMatchWeight()
16765 return CW_Register; // just hold 64-bit integers data. in getSingleConstraintMatchWeight()
16766 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) in getSingleConstraintMatchWeight()
16768 else if (StringRef(constraint) == "ww" && type->isFloatTy()) in getSingleConstraintMatchWeight()
16776 if (type->isIntegerTy()) in getSingleConstraintMatchWeight()
16780 if (type->isFloatTy()) in getSingleConstraintMatchWeight()
16784 if (type->isDoubleTy()) in getSingleConstraintMatchWeight()
16788 if (type->isVectorTy()) in getSingleConstraintMatchWeight()
16808 case 'b': // R1-R31 in getRegForInlineAsmConstraint()
16812 case 'r': // R0-R31 in getRegForInlineAsmConstraint()
16817 // registers", where one is for 32-bit and the other for 64-bit. We don't in getRegForInlineAsmConstraint()
16870 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { in getRegForInlineAsmConstraint()
16872 // will not recognize the correct register (their names will be VSL{0-31} in getRegForInlineAsmConstraint()
16873 // and V{0-31} so they won't match). So we match them here. in getRegForInlineAsmConstraint()
16880 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); in getRegForInlineAsmConstraint()
16903 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers in getRegForInlineAsmConstraint()
16904 // (which we call X[0-9]+). If a 64-bit value has been requested, and a in getRegForInlineAsmConstraint()
16905 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent in getRegForInlineAsmConstraint()
16911 return std::make_pair(TRI->getMatchingSuperReg(R.first, in getRegForInlineAsmConstraint()
16933 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16959 int64_t Value = CST->getSExtValue(); in LowerAsmOperandForConstraint()
16964 case 'I': // "I" is a signed 16-bit constant. in LowerAsmOperandForConstraint()
16968 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. in LowerAsmOperandForConstraint()
16972 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. in LowerAsmOperandForConstraint()
16976 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. in LowerAsmOperandForConstraint()
16992 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. in LowerAsmOperandForConstraint()
16993 if (isInt<16>(-Value)) in LowerAsmOperandForConstraint()
17017 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal(); in CollectTargetIntrinsicOperands()
17026 // isLegalAddressingMode - Return true if the addressing mode represented
17038 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector()) in isLegalAddressingMode()
17041 // PPC allows a sign-extended 16-bit immediate field. in isLegalAddressingMode()
17042 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) in isLegalAddressingMode()
17086 FuncInfo->setLRStoreRequired(); in LowerRETURNADDR()
17098 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, in LowerRETURNADDR()
17133 while (Depth--) in LowerFRAMEADDR()
17161 // 32-bit SVR4 ABI access everything as got-indirect. in isAccessedAsGotIndirect()
17176 // JumpTable and BlockAddress are accessed as got-indirect. in isAccessedAsGotIndirect()
17181 return Subtarget.isGVIndirectSymbol(G->getGlobal()); in isAccessedAsGotIndirect()
17263 Info.offset = -VT.getStoreSize()+1; in getTgtMemIntrinsic()
17264 Info.size = 2*VT.getStoreSize()-1; in getTgtMemIntrinsic()
17303 Info.offset = -VT.getStoreSize()+1; in getTgtMemIntrinsic()
17304 Info.size = 2*VT.getStoreSize()-1; in getTgtMemIntrinsic()
17348 /// target-independent logic.
17381 assert(Ty->isIntegerTy()); in shouldConvertConstantLoadToIntImm()
17383 unsigned BitSize = Ty->getPrimitiveSizeInBits(); in shouldConvertConstantLoadToIntImm()
17388 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) in isTruncateFree()
17390 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); in isTruncateFree()
17391 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); in isTruncateFree()
17407 EVT MemVT = LD->getMemoryVT(); in isZExtFree()
17410 (LD->getExtensionType() == ISD::NON_EXTLOAD || in isZExtFree()
17411 LD->getExtensionType() == ISD::ZEXTLOAD)) in isZExtFree()
17416 // - 32-bit shifts with a zext to i64 in isZExtFree()
17417 // - zext after ctlz, bswap, etc. in isZExtFree()
17418 // - zext after and by a constant mask in isZExtFree()
17446 // PowerPC supports unaligned memory access for simple non-vector types. in allowsMisalignedMemoryAccesses()
17484 if (!ConstNode->getAPIntValue().isSignedIntN(64)) in decomposeMulByConstant()
17492 int64_t Imm = ConstNode->getSExtValue(); in decomposeMulByConstant()
17498 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) || in decomposeMulByConstant()
17499 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm)) in decomposeMulByConstant()
17515 switch (Ty->getScalarType()->getTypeID()) { in isFMAFasterThanFMulAndFAdd()
17528 if (!I->hasOneUse()) in isProfitableToHoist()
17531 Instruction *User = I->user_back(); in isProfitableToHoist()
17534 switch (I->getOpcode()) { in isProfitableToHoist()
17537 if (User->getOpcode() != Instruction::FSub && in isProfitableToHoist()
17538 User->getOpcode() != Instruction::FAdd) in isProfitableToHoist()
17542 const Function *F = I->getFunction(); in isProfitableToHoist()
17543 const DataLayout &DL = F->getDataLayout(); in isProfitableToHoist()
17544 Type *Ty = User->getOperand(0)->getType(); in isProfitableToHoist()
17561 if (!LI->isUnordered()) in isProfitableToHoist()
17564 if (User->getOpcode() != Instruction::Store) in isProfitableToHoist()
17567 if (I->getType()->getTypeID() != Type::FloatTyID) in isProfitableToHoist()
17580 // LR is a callee-save register, but we must treat it as clobbered by any call in getScratchRegisters()
17582 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies in getScratchRegisters()
17628 // For example, (fma -a b c) = (fnmsub a b c)
17649 SDNodeFlags Flags = Op.getNode()->getFlags(); in getNegatedExpression()
17672 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1. in getNegatedExpression()
17751 // For exact values in the range [-16, 15] we can materialize the float. in isFPImmLegal()
17752 if (IsExact && IntResult <= 15 && IntResult >= -16) in isFPImmLegal()
17762 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17765 SDValue N0 = N->getOperand(0); in stripModuloOnShift()
17766 SDValue N1 = N->getOperand(1); in stripModuloOnShift()
17769 unsigned Opcode = N->getOpcode(); in stripModuloOnShift()
17787 N1->getOpcode() == ISD::AND) in stripModuloOnShift()
17788 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) in stripModuloOnShift()
17789 if (Mask->getZExtValue() == OpSizeInBits - 1) in stripModuloOnShift()
17790 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); in stripModuloOnShift()
17799 SDValue N0 = N->getOperand(0); in combineSHL()
17800 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1)); in combineSHL()
17804 N->getValueType(0) != MVT::i64) in combineSHL()
17819 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); in combineSHL()
17821 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), in combineSHL()
17839 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17840 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17841 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17842 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17848 SDValue LHS = N->getOperand(0); in combineADDToADDZE()
17849 SDValue RHS = N->getOperand(1); in combineADDToADDZE()
17862 int64_t NegConstant = 0 - Constant->getSExtValue(); in combineADDToADDZE()
17864 // -C is required to be [-32768, 32767]. in combineADDToADDZE()
17885 int64_t NegConstant = 0 - Constant->getSExtValue(); in combineADDToADDZE()
17887 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) { in combineADDToADDZE()
17891 // --> addze X, (addic Z, -1).carry in combineADDToADDZE()
17893 // add X, (zext(setne Z, C))-- in combineADDToADDZE()
17894 // \ when -32768 <= -C <= 32767 && C != 0 in combineADDToADDZE()
17895 // --> addze X, (addic (addi Z, -C), -1).carry in combineADDToADDZE()
17900 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); in combineADDToADDZE()
17906 // --> addze X, (subfic Z, 0).carry in combineADDToADDZE()
17908 // add X, (zext(sete Z, C))-- in combineADDToADDZE()
17909 // \ when -32768 <= -C <= 32767 && C != 0 in combineADDToADDZE()
17910 // --> addze X, (subfic (addi Z, -C), 0).carry in combineADDToADDZE()
17936 SDValue LHS = N->getOperand(0); in combineADDToMAT_PCREL_ADDR()
17937 SDValue RHS = N->getOperand(1); in combineADDToMAT_PCREL_ADDR()
17953 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue(); in combineADDToMAT_PCREL_ADDR()
17963 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0), in combineADDToMAT_PCREL_ADDR()
17964 NewOffset, GSDN->getTargetFlags()); in combineADDToMAT_PCREL_ADDR()
17966 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA); in combineADDToMAT_PCREL_ADDR()
17999 SDValue Op0 = N->getOperand(0); in combineTRUNCATE()
18002 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) in combineTRUNCATE()
18011 if (!ConstNode || ConstNode->getZExtValue() != 64) in combineTRUNCATE()
18035 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1)); in combineMUL()
18041 isOperationLegal(ISD::MUL, N->getValueType(0))) in combineMUL()
18044 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { in combineMUL()
18045 switch (this->Subtarget.getCPUDirective()) { in combineMUL()
18066 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6. in combineMUL()
18072 EVT VT = N->getValueType(0); in combineMUL()
18075 const APInt &MulAmt = ConstOpOrElement->getAPIntValue(); in combineMUL()
18079 if ((MulAmtAbs - 1).isPowerOf2()) { in combineMUL()
18081 // (mul x, -(2^N + 1)) => -(add (shl x, N), x) in combineMUL()
18086 SDValue Op0 = N->getOperand(0); in combineMUL()
18088 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMUL()
18089 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT)); in combineMUL()
18097 // (mul x, 2^N - 1) => (sub (shl x, N), x) in combineMUL()
18098 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) in combineMUL()
18103 SDValue Op0 = N->getOperand(0); in combineMUL()
18105 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), in combineMUL()
18118 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18122 SDValue N0 = N->getOperand(0); in combineFMALike()
18123 SDValue N1 = N->getOperand(1); in combineFMALike()
18124 SDValue N2 = N->getOperand(2); in combineFMALike()
18125 SDNodeFlags Flags = N->getFlags(); in combineFMALike()
18126 EVT VT = N->getValueType(0); in combineFMALike()
18129 unsigned Opc = N->getOpcode(); in combineFMALike()
18137 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0 in combineFMALike()
18138 // since (fnmsub a b c)=-0 while c-ab=+0. in combineFMALike()
18156 // Only duplicate to increase tail-calls for the 64bit SysV ABIs. in mayBeEmittedAsTailCall()
18161 if (!CI->isTailCall()) in mayBeEmittedAsTailCall()
18164 // If sibling calls have been disabled and tail-calls aren't guaranteed in mayBeEmittedAsTailCall()
18171 const Function *Callee = CI->getCalledFunction(); in mayBeEmittedAsTailCall()
18172 if (!Callee || Callee->isVarArg()) in mayBeEmittedAsTailCall()
18176 const Function *Caller = CI->getParent()->getParent(); in mayBeEmittedAsTailCall()
18177 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), in mayBeEmittedAsTailCall()
18178 CI->getCallingConv())) in mayBeEmittedAsTailCall()
18181 // If the function is local then we have a good chance at tail-calling it in mayBeEmittedAsTailCall()
18190 // Can't handle constants wider than 64-bits. in isMaskAndCmp0FoldingBeneficial()
18191 if (CI->getBitWidth() > 64) in isMaskAndCmp0FoldingBeneficial()
18193 int64_t ConstVal = CI->getZExtValue(); in isMaskAndCmp0FoldingBeneficial()
18198 // For non-constant masks, we can always use the record-form and. in isMaskAndCmp0FoldingBeneficial()
18202 /// getAddrModeForFlags - Based on the set of address flags, select the most
18208 // Unaligned D-Forms are tried first, followed by the aligned D-Forms. in getAddrModeForFlags()
18221 // If no other forms are selected, return an X-Form as it is the most in getAddrModeForFlags()
18236 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value(); in setAlignFlagsForFI()
18261 // 4-byte or 16-byte aligned. in computeFlagsForAddressComputation()
18270 // All 32-bit constants can be computed as LIS + Disp. in computeFlagsForAddressComputation()
18271 const APInt &ConstImm = CN->getAPIntValue(); in computeFlagsForAddressComputation()
18272 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants. in computeFlagsForAddressComputation()
18277 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants. in computeFlagsForAddressComputation()
18283 // - Register + Imm16 (possibly a multiple of 4/16) in computeFlagsForAddressComputation()
18284 // - Register + Imm34 in computeFlagsForAddressComputation()
18285 // - Register + PPCISD::Lo in computeFlagsForAddressComputation()
18286 // - Register + Register in computeFlagsForAddressComputation()
18290 const APInt &ConstImm = CN->getAPIntValue(); in computeFlagsForAddressComputation()
18292 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates. in computeFlagsForAddressComputation()
18297 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates. in computeFlagsForAddressComputation()
18318 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18342 unsigned ParentOp = Parent->getOpcode(); in computeMOFlags()
18345 unsigned ID = Parent->getConstantOperandVal(1); in computeMOFlags()
18348 ? Parent->getOperand(2) in computeMOFlags()
18349 : Parent->getOperand(3); in computeMOFlags()
18357 // or pre-increment instruction. in computeMOFlags()
18359 if (LSB->isIndexed()) in computeMOFlags()
18362 // Compute in-memory type flags. This is based on if there are scalars, in computeMOFlags()
18366 EVT MemVT = MN->getMemoryVT(); in computeMOFlags()
18382 "256-bit vectors are only available when paired vector memops is " in computeMOFlags()
18401 switch (LN->getExtensionType()) { in computeMOFlags()
18424 // If we don't have prefixed instructions, 34-bit constants should be in computeMOFlags()
18425 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms. in computeMOFlags()
18436 /// SelectForceXFormMode - Given the specified address, force it to be
18455 // value and a 16-bit signed constant and both have a single use. in SelectForceXFormMode()
18501 for (const SDValue &N : Op->op_values()) { in lowerToLibCall()
18516 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); in lowerToLibCall()
18542 SDNodeFlags Flags = Op.getNode()->getFlags(); in isLowringToMASSFiniteSafe()
18548 return Op.getNode()->getFlags().hasApproximateFuncs(); in isLowringToMASSSafe()
18602 // If we happen to match to an aligned D-Form, check if the Frame Index is
18603 // adequately aligned. If it is not, reset the mode to match to X-Form.
18613 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18629 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned. in SelectOptimalAddrMode()
18630 // Select an X-Form load if it is not. in SelectOptimalAddrMode()
18633 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node. in SelectOptimalAddrMode()
18636 "Must be using PC-Relative calls when a valid PC-Relative node is " in SelectOptimalAddrMode()
18646 // This is a register plus a 16-bit immediate. The base will be the in SelectOptimalAddrMode()
18652 int16_t Imm = Op1->getAsZExtVal(); in SelectOptimalAddrMode()
18657 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectOptimalAddrMode()
18658 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); in SelectOptimalAddrMode()
18675 // zero or load-immediate-shifted and the displacement will be in SelectOptimalAddrMode()
18679 EVT CNType = CN->getValueType(0); in SelectOptimalAddrMode()
18680 uint64_t CNImm = CN->getZExtValue(); in SelectOptimalAddrMode()
18681 // If this address fits entirely in a 16-bit sext immediate field, codegen in SelectOptimalAddrMode()
18690 // Handle 32-bit sext immediate with LIS + Addr mode. in SelectOptimalAddrMode()
18697 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32); in SelectOptimalAddrMode()
18703 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable. in SelectOptimalAddrMode()
18706 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectOptimalAddrMode()
18707 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); in SelectOptimalAddrMode()
18717 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate. in SelectOptimalAddrMode()
18720 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); in SelectOptimalAddrMode()
18724 // The address is a 34-bit signed immediate. in SelectOptimalAddrMode()
18731 // When selecting PC-Relative instructions, "Base" is not utilized as in SelectOptimalAddrMode()
18738 default: { // By default, X-Form is always available to be selected. in SelectOptimalAddrMode()
18768 unsigned Size = AI->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicRMWInIR()
18772 switch (AI->getOperation()) { in shouldExpandAtomicRMWInIR()
18785 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits(); in shouldExpandAtomicCmpXchgInIR()
18817 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in emitMaskedAtomicRMWIntrinsic()
18818 Type *ValTy = Incr->getType(); in emitMaskedAtomicRMWIntrinsic()
18819 assert(ValTy->getPrimitiveSizeInBits() == 128); in emitMaskedAtomicRMWIntrinsic()
18821 M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation())); in emitMaskedAtomicRMWIntrinsic()
18822 Type *Int64Ty = Type::getInt64Ty(M->getContext()); in emitMaskedAtomicRMWIntrinsic()
18839 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); in emitMaskedAtomicCmpXchgIntrinsic()
18840 Type *ValTy = CmpVal->getType(); in emitMaskedAtomicCmpXchgIntrinsic()
18841 assert(ValTy->getPrimitiveSizeInBits() == 128); in emitMaskedAtomicCmpXchgIntrinsic()
18844 Type *Int64Ty = Type::getInt64Ty(M->getContext()); in emitMaskedAtomicCmpXchgIntrinsic()