Lines Matching +full:fsin +full:- +full:output
1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
69 #define DEBUG_TYPE "nvptx-lower"
76 "nvptx-sched4reg",
80 "nvptx-fma-level", cl::Hidden,
86 "nvptx-prec-divf32", cl::Hidden,
92 "nvptx-prec-sqrtf32", cl::Hidden,
97 "nvptx-force-min-byval-param-align", cl::Hidden,
98 cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
104 // If nvptx-prec-div32=N is used on the command-line, always honor it in getDivF32Level()
117 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it in usePrecSqrtF32()
126 return MF.getDenormalMode(APFloat::IEEEsingle()).Output == in useF32FTZ()
162 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
165 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
175 // Special case for i128 - decompose to (i64, i64) in ComputePTXValueVTs()
176 if (Ty->isIntegerTy(128)) { in ComputePTXValueVTs()
181 Offsets->push_back(StartingOffset + 0); in ComputePTXValueVTs()
182 Offsets->push_back(StartingOffset + 8); in ComputePTXValueVTs()
192 for(auto *EI : STy->elements()) { in ComputePTXValueVTs()
194 StartingOffset + SL->getElementOffset(ElementNum)); in ComputePTXValueVTs()
236 Offsets->push_back(Off + j * EltVT.getStoreSize()); in ComputePTXValueVTs()
241 Offsets->push_back(Off); in ComputePTXValueVTs()
256 "Promotion is not suitable for scalars of size larger than 64-bits"); in PromoteScalarIntegerPTX()
298 if (Offsets[Idx] & (AccessSize - 1)) in CanMergeParamLoadStoresStartingAt()
317 // PTX ISA can only deal with 2- and 4-element vector ops. in CanMergeParamLoadStoresStartingAt()
327 if (Offsets[j] - Offsets[j - 1] != EltSize) in CanMergeParamLoadStoresStartingAt()
334 // Flags for tracking per-element vectorization state of loads/stores
340 // Scalar is effectively a 1-element vector.
364 // Check what we can vectorize using 128/64/32-bit accesses. in VectorizePTXValueVTs()
570 // that don't have h/w rotation we lower them to multi-instruction assembly. in NVPTXTargetLowering()
737 // user passed --nvptx-no-fp16-math. The flag is useful because, in NVPTXTargetLowering()
820 {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) { in NVPTXTargetLowering()
863 // Custom lowering for inline asm with 128-bit operands in NVPTXTargetLowering()
1372 auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace()); in LowerGlobalAddress()
1373 Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT); in LowerGlobalAddress()
1378 return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) || in IsTypePassedAsArray()
1379 Ty->isHalfTy() || Ty->isBFloatTy(); in IsTypePassedAsArray()
1390 assert(isABI && "Non-ABI compilation is not supported"); in getPrototype()
1398 if (retTy->getTypeID() == Type::VoidTyID) { in getPrototype()
1402 if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) && in getPrototype()
1406 size = ITy->getBitWidth(); in getPrototype()
1408 assert(retTy->isFloatingPointTy() && in getPrototype()
1410 size = retTy->getPrimitiveSizeInBits(); in getPrototype()
1421 O << ".param .align " << (retAlignment ? retAlignment->value() : 0) in getPrototype()
1432 unsigned NumArgs = VAInfo ? VAInfo->first : Args.size(); in getPrototype()
1451 OIdx += len - 1; in getPrototype()
1461 sz = cast<IntegerType>(Ty)->getBitWidth(); in getPrototype()
1466 sz = Ty->getPrimitiveSizeInBits(); in getPrototype()
1486 O << (first ? "" : ",") << " .param .align " << VAInfo->second in getPrototype()
1509 const Function *DirectCallee = CB->getCalledFunction(); in getArgumentAlignment()
1552 // Use byte-store when the param address of the argument value is unaligned.
1584 // Use byte-load when the param adress of the returned value is unaligned.
1654 assert(isABI && "Non-ABI compilation is not supported"); in LowerCall()
1668 // initially set to 0, so it can be used for non-variadic arguments (which use in LowerCall()
1688 // * if there is a vector argument with more than typical vector-length in LowerCall()
1716 ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy, in LowerCall()
1768 // than 32-bits are sign extended or zero extended, depending on in LowerCall()
1772 Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; in LowerCall()
1810 // Use 16-bit registers for small stores as it's the in LowerCall()
1848 unsigned NumElts = StoreOperands.size() - 3; in LowerCall()
1893 --OIdx; in LowerCall()
1908 // .param .align N .b8 retval0[<size-in-bytes>], or in LowerCall()
1909 // .param .b<size-in-bits> retval0 in LowerCall()
1925 Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32), in LowerCall()
1943 VADeclareParam->getVTList(), DeclareParamOps); in LowerCall()
1960 CalleeFunc->addFnAttr("nvptx-libcall-callee", "true"); in LowerCall()
1976 CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1))) in LowerCall()
1979 const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); in LowerCall()
2017 if (i == (e - 1)) in LowerCall()
2064 int VecIdx = -1; // Index of the first element of the vector. in LowerCall()
2067 // 32-bits are sign extended or zero extended, depending on whether in LowerCall()
2070 RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; in LowerCall()
2097 if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() && in LowerCall()
2100 assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); in LowerCall()
2113 assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); in LowerCall()
2159 VecIdx = -1; in LowerCall()
2223 DAG.getContext()->diagnose(NoDynamicAlloca); in LowerDYNAMIC_STACKALLOC()
2231 uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); in LowerDYNAMIC_STACKALLOC()
2234 // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32. in LowerDYNAMIC_STACKALLOC()
2235 if (nvTM->is64Bit()) in LowerDYNAMIC_STACKALLOC()
2243 nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps); in LowerDYNAMIC_STACKALLOC()
2257 unsigned NumOperands = Node->getNumOperands(); in LowerCONCAT_VECTORS()
2259 SDValue SubOp = Node->getOperand(i); in LowerCONCAT_VECTORS()
2260 EVT VVT = SubOp.getNode()->getValueType(0); in LowerCONCAT_VECTORS()
2268 return DAG.getBuildVector(Node->getValueType(0), dl, Ops); in LowerCONCAT_VECTORS()
2272 // would get lowered as two constant loads and vector-packing move.
2277 EVT VT = Op->getValueType(0); in LowerBUILD_VECTOR()
2283 if (!llvm::all_of(Op->ops(), [](SDValue Operand) { in LowerBUILD_VECTOR()
2284 return Operand->isUndef() || isa<ConstantSDNode>(Operand) || in LowerBUILD_VECTOR()
2287 // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us in LowerBUILD_VECTOR()
2293 DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32), in LowerBUILD_VECTOR()
2294 DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8); in LowerBUILD_VECTOR()
2297 DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32), in LowerBUILD_VECTOR()
2301 DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32), in LowerBUILD_VECTOR()
2309 auto GetOperand = [](SDValue Op, int N) -> APInt { in LowerBUILD_VECTOR()
2310 const SDValue &Operand = Op->getOperand(N); in LowerBUILD_VECTOR()
2311 EVT VT = Op->getValueType(0); in LowerBUILD_VECTOR()
2312 if (Operand->isUndef()) in LowerBUILD_VECTOR()
2316 Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt(); in LowerBUILD_VECTOR()
2318 Value = Operand->getAsAPIntVal(); in LowerBUILD_VECTOR()
2337 return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const); in LowerBUILD_VECTOR()
2342 SDValue Index = Op->getOperand(1); in LowerEXTRACT_VECTOR_ELT()
2343 SDValue Vector = Op->getOperand(0); in LowerEXTRACT_VECTOR_ELT()
2355 return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0)); in LowerEXTRACT_VECTOR_ELT()
2377 SDValue Vector = Op->getOperand(0); in LowerINSERT_VECTOR_ELT()
2383 SDValue Value = Op->getOperand(1); in LowerINSERT_VECTOR_ELT()
2384 if (Value->isUndef()) in LowerINSERT_VECTOR_ELT()
2387 SDValue Index = Op->getOperand(2); in LowerINSERT_VECTOR_ELT()
2396 return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI); in LowerINSERT_VECTOR_ELT()
2410 for (auto I : llvm::enumerate(SVN->getMask())) { in LowerVECTOR_SHUFFLE()
2411 if (I.value() != -1) // -1 is a placeholder for undef. in LowerVECTOR_SHUFFLE()
2420 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
2427 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); in LowerShiftRightParts()
2453 // - if (Amt>=size) then in LowerShiftRightParts()
2454 // dLo = aHi >> (Amt-size) in LowerShiftRightParts()
2457 // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) in LowerShiftRightParts()
2481 /// LowerShiftLeftParts - Lower SHL_PARTS, which
2488 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); in LowerShiftLeftParts()
2513 // - if (Amt>=size) then in LowerShiftLeftParts()
2515 // dLo = aLo << (Amt-size) in LowerShiftLeftParts()
2518 // dHi = (aHi << Amt) | (aLo >> (size-Amt)) in LowerShiftLeftParts()
2556 // float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
2568 // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f)) in LowerFROUND32()
2672 return TLI->expandFP_ROUND(Op.getNode(), DAG); in LowerFP_ROUND()
2675 // This combination was the first to support f32 -> bf16. in LowerFP_ROUND()
2682 // Round-inexact-to-odd f64 to f32, then do the final rounding using in LowerFP_ROUND()
2683 // the hardware f32 -> bf16 instruction. in LowerFP_ROUND()
2684 SDValue rod = TLI->expandRoundInexactToOdd( in LowerFP_ROUND()
2691 return TLI->expandFP_ROUND(Op.getNode(), DAG); in LowerFP_ROUND()
2736 llvm::transform(Op->ops(), std::back_inserter(ScalarArgs), in LowerVectorArith()
2826 const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); in LowerVAARG()
2827 EVT VT = Node->getValueType(0); in LowerVAARG()
2829 SDValue Tmp1 = Node->getOperand(0); in LowerVAARG()
2830 SDValue Tmp2 = Node->getOperand(1); in LowerVAARG()
2831 const MaybeAlign MA(Node->getConstantOperandVal(3)); in LowerVAARG()
2833 SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL, in LowerVAARG()
2837 if (MA && *MA > TLI->getMinStackArgumentAlignment()) { in LowerVAARG()
2840 DAG.getConstant(MA->value() - 1, DL, VAList.getValueType())); in LowerVAARG()
2844 DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType())); in LowerVAARG()
2866 EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout()); in LowerVASTART()
2869 SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT); in LowerVASTART()
2872 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); in LowerVASTART()
2878 SDValue Op0 = Op->getOperand(0); in LowerSelect()
2879 SDValue Op1 = Op->getOperand(1); in LowerSelect()
2880 SDValue Op2 = Op->getOperand(2); in LowerSelect()
2902 EVT MemVT = Load->getMemoryVT(); in LowerLOAD()
2904 MemVT, *Load->getMemOperand())) { in LowerLOAD()
2916 // v1 = ld i8* addr (-> i16)
2922 assert(LD->getExtensionType() == ISD::NON_EXTLOAD); in LowerLOADi1()
2923 assert(Node->getValueType(0) == MVT::i1 && in LowerLOADi1()
2925 SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(), in LowerLOADi1()
2926 LD->getBasePtr(), LD->getPointerInfo(), in LowerLOADi1()
2927 MVT::i8, LD->getAlign(), in LowerLOADi1()
2928 LD->getMemOperand()->getFlags()); in LowerLOADi1()
2933 SDValue Ops[] = { result, LD->getChain() }; in LowerLOADi1()
2939 EVT VT = Store->getMemoryVT(); in LowerSTORE()
2948 VT, *Store->getMemOperand())) in LowerSTORE()
2964 SDValue Val = N->getOperand(1); in LowerSTOREVector()
3001 Align Alignment = MemSD->getAlign(); in LowerSTOREVector()
3047 Ops.push_back(N->getOperand(0)); in LowerSTOREVector()
3050 // Combine f16,f16 -> v2f16 in LowerSTOREVector()
3073 Ops.append(N->op_begin() + 2, N->op_end()); in LowerSTOREVector()
3077 MemSD->getMemoryVT(), MemSD->getMemOperand()); in LowerSTOREVector()
3094 SDValue Tmp1 = ST->getChain(); in LowerSTOREi1()
3095 SDValue Tmp2 = ST->getBasePtr(); in LowerSTOREi1()
3096 SDValue Tmp3 = ST->getValue(); in LowerSTOREi1()
3100 DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, in LowerSTOREi1()
3101 ST->getAlign(), ST->getMemOperand()->getFlags()); in LowerSTOREi1()
3107 // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit in LowerCopyToReg_128()
3111 "Custom lowering for 128-bit CopyToReg only"); in LowerCopyToReg_128()
3116 SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2)); in LowerCopyToReg_128()
3122 SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1); in LowerCopyToReg_128()
3123 SmallVector<EVT, 3> ResultsType(Node->values()); in LowerCopyToReg_128()
3125 NewOps[0] = Op->getOperand(0); // Chain in LowerCopyToReg_128()
3126 NewOps[1] = Op->getOperand(1); // Dst Reg in LowerCopyToReg_128()
3127 NewOps[2] = Lo; // Lower 64-bit in LowerCopyToReg_128()
3128 NewOps[3] = Hi; // Higher 64-bit in LowerCopyToReg_128()
3130 NewOps[4] = Op->getOperand(3); // Glue if exists in LowerCopyToReg_128()
3159 StringRef SavedStr = nvTM->getStrPool().save( in getParamSymbol()
3173 const AttributeList &PAL = F->getAttributes(); in LowerFormalArguments()
3180 assert(isABI && "Non-ABI compilation is not supported"); in LowerFormalArguments()
3186 for (const Argument &I : F->args()) { in LowerFormalArguments()
3194 // * if there is a vector argument with more than typical vector-length in LowerFormalArguments()
3204 if (theArgs[i]->use_empty()) { in LowerFormalArguments()
3206 if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) { in LowerFormalArguments()
3219 --InsIdx; in LowerFormalArguments()
3222 if (Ty->isVectorTy()) { in LowerFormalArguments()
3224 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); in LowerFormalArguments()
3230 --InsIdx; in LowerFormalArguments()
3244 aggregateIsPacked = STy->isPacked(); in LowerFormalArguments()
3257 int VecIdx = -1; // Index of the first element of the current vector. in LowerFormalArguments()
3260 assert(VecIdx == -1 && "Orphaned vector."); in LowerFormalArguments()
3266 unsigned NumElts = parti - VecIdx + 1; in LowerFormalArguments()
3278 EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); in LowerFormalArguments()
3283 EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); in LowerFormalArguments()
3285 const MaybeAlign PartAlign = [&]() -> MaybeAlign { in LowerFormalArguments()
3291 DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext())); in LowerFormalArguments()
3299 P.getNode()->setIROrder(i + 1); in LowerFormalArguments()
3329 VecIdx = -1; in LowerFormalArguments()
3334 --InsIdx; in LowerFormalArguments()
3351 p.getNode()->setIROrder(i + 1); in LowerFormalArguments()
3361 // Use byte-store when the param adress of the return value is unaligned.
3399 assert(isABI && "Non-ABI compilation is not supported"); in LowerReturn()
3426 RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL) in LowerReturn()
3430 // 32-bits are sign extended or zero extended, depending on whether in LowerReturn()
3433 RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; in LowerReturn()
3445 // Use 16-bit registers for small load-stores as it's the in LowerReturn()
3452 if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) { in LowerReturn()
3455 DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext())); in LowerReturn()
3482 unsigned NumElts = StoreOperands.size() - 2; in LowerReturn()
4672 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); in getTgtMemIntrinsic()
4691 Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); in getTgtMemIntrinsic()
5086 /// getFunctionParamOptimizedAlign - since function arguments are passed via
5091 /// alignment. To allow using 128-bit vectorized loads/stores, this function
5102 if (!F || !F->hasLocalLinkage() || in getFunctionParamOptimizedAlign()
5103 F->hasAddressTaken(/*Users=*/nullptr, in getFunctionParamOptimizedAlign()
5109 assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage"); in getFunctionParamOptimizedAlign()
5130 // on non-deprecated ptxas versions. in getFunctionByValParamAlign()
5145 ParamStr << getTargetMachine().getSymbol(F)->getName(); in getParamName()
5154 /// isLegalAddressingMode - Return true if the addressing mode represented
5162 // AddrMode - This represents an addressing mode of: in isLegalAddressingMode()
5166 // - [avar] in isLegalAddressingMode()
5167 // - [areg] in isLegalAddressingMode()
5168 // - [areg+immoff] in isLegalAddressingMode()
5169 // - [immAddr] in isLegalAddressingMode()
5171 // immoff must fit in a signed 32-bit int in isLegalAddressingMode()
5193 //===----------------------------------------------------------------------===//
5195 //===----------------------------------------------------------------------===//
5197 /// getConstraintType - Given a constraint letter, return the type of
5253 //===----------------------------------------------------------------------===//
5255 //===----------------------------------------------------------------------===//
5259 // Always honor command-line argument in allowFMA()
5279 // Allow unsafe math if unsafe-fp-math attribute explicitly says so. in allowUnsafeFPMath()
5281 return F.getFnAttribute("unsafe-fp-math").getValueAsBool(); in allowUnsafeFPMath()
5286 return Const && Const->getZExtValue() == 0; in isConstZero()
5289 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
5298 // Since integer multiply-add costs the same as integer multiply in PerformADDCombineWithOperands()
5302 if (!N0.getNode()->hasOneUse()) in PerformADDCombineWithOperands()
5305 // fold (add (mul a, b), c) -> (mad a, b, c) in PerformADDCombineWithOperands()
5312 // -> (select cond, c, (mad a, b, c)) in PerformADDCombineWithOperands()
5316 if (isConstZero(N0->getOperand(1))) in PerformADDCombineWithOperands()
5318 else if (isConstZero(N0->getOperand(2))) in PerformADDCombineWithOperands()
5323 SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1); in PerformADDCombineWithOperands()
5324 if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse()) in PerformADDCombineWithOperands()
5328 M->getOperand(0), M->getOperand(1), N1); in PerformADDCombineWithOperands()
5329 return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0), in PerformADDCombineWithOperands()
5345 if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel)) in PerformFADDCombineWithOperands()
5358 for (const SDNode *User : N0.getNode()->uses()) { in PerformFADDCombineWithOperands()
5360 if (User->getOpcode() != ISD::FADD) in PerformFADDCombineWithOperands()
5366 int orderNo = N->getIROrder(); in PerformFADDCombineWithOperands()
5367 int orderNo2 = N0.getNode()->getIROrder(); in PerformFADDCombineWithOperands()
5372 if (orderNo - orderNo2 < 500) in PerformFADDCombineWithOperands()
5386 for (const SDNode *User : left->uses()) { in PerformFADDCombineWithOperands()
5387 int orderNo3 = User->getIROrder(); in PerformFADDCombineWithOperands()
5395 for (const SDNode *User : right->uses()) { in PerformFADDCombineWithOperands()
5396 int orderNo3 = User->getIROrder(); in PerformFADDCombineWithOperands()
5416 if (all_of(N->ops().drop_front(Front).drop_back(Back), in PerformStoreCombineHelper()
5417 [](const SDUse &U) { return U.get()->isUndef(); })) in PerformStoreCombineHelper()
5420 return N->getOperand(0); in PerformStoreCombineHelper()
5436 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
5444 SDValue N0 = N->getOperand(0); in PerformADDCombine()
5445 SDValue N1 = N->getOperand(1); in PerformADDCombine()
5447 // Skip non-integer, non-scalar case in PerformADDCombine()
5460 /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
5465 SDValue N0 = N->getOperand(0); in PerformFADDCombine()
5466 SDValue N1 = N->getOperand(1); in PerformFADDCombine()
5485 // target-specific DAG node, the DAG combiner fails to eliminate these AND in PerformANDCombine()
5487 SDValue Val = N->getOperand(0); in PerformANDCombine()
5488 SDValue Mask = N->getOperand(1); in PerformANDCombine()
5496 // Convert BFE-> truncate i16 -> and 255 in PerformANDCombine()
5497 // To just BFE-> truncate i16, as the value already has all the bits in the in PerformANDCombine()
5507 uint64_t BFEBitsVal = BFEBits->getZExtValue(); in PerformANDCombine()
5514 uint64_t MaskVal = MaskCnst->getZExtValue(); in PerformANDCombine()
5516 if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1) in PerformANDCombine()
5521 // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and in PerformANDCombine()
5524 Val = Val->getOperand(0); in PerformANDCombine()
5527 if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { in PerformANDCombine()
5528 Val = Val->getOperand(0); in PerformANDCombine()
5531 if (Val->getOpcode() == NVPTXISD::LoadV2 || in PerformANDCombine()
5532 Val->getOpcode() == NVPTXISD::LoadV4) { in PerformANDCombine()
5539 uint64_t MaskVal = MaskCnst->getZExtValue(); in PerformANDCombine()
5551 EVT MemVT = Mem->getMemoryVT(); in PerformANDCombine()
5557 unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1); in PerformANDCombine()
5566 // Re-insert the ext as a zext. in PerformANDCombine()
5582 assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM); in PerformREMCombine()
5584 // Don't do anything at less than -O2. in PerformREMCombine()
5590 EVT VT = N->getValueType(0); in PerformREMCombine()
5591 bool IsSigned = N->getOpcode() == ISD::SREM; in PerformREMCombine()
5594 const SDValue &Num = N->getOperand(0); in PerformREMCombine()
5595 const SDValue &Den = N->getOperand(1); in PerformREMCombine()
5597 for (const SDNode *U : Num->uses()) { in PerformREMCombine()
5598 if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && in PerformREMCombine()
5599 U->getOperand(1) == Den) { in PerformREMCombine()
5600 // Num % Den -> Num - (Num / Den) * Den in PerformREMCombine()
5616 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
5642 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
5663 const APInt &Val = CI->getAPIntValue(); in AreMulWideOperandsDemotable()
5678 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
5679 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
5684 EVT MulType = N->getValueType(0); in TryMULWIDECombine()
5691 SDValue LHS = N->getOperand(0); in TryMULWIDECombine()
5692 SDValue RHS = N->getOperand(1); in TryMULWIDECombine()
5695 if (N->getOpcode() == ISD::MUL) { in TryMULWIDECombine()
5702 if (N->getOpcode() == ISD::SHL) { in TryMULWIDECombine()
5708 APInt ShiftAmt = ShlRHS->getAPIntValue(); in TryMULWIDECombine()
5750 return Const && Const->getZExtValue() == 1; in isConstOne()
5754 if (Add->getOpcode() != ISD::ADD) in matchMADConstOnePattern()
5757 if (isConstOne(Add->getOperand(0))) in matchMADConstOnePattern()
5758 return Add->getOperand(1); in matchMADConstOnePattern()
5760 if (isConstOne(Add->getOperand(1))) in matchMADConstOnePattern()
5761 return Add->getOperand(0); in matchMADConstOnePattern()
5778 if (Select->getOpcode() != ISD::SELECT) in combineMulSelectConstOne()
5781 SDValue Cond = Select->getOperand(0); in combineMulSelectConstOne()
5784 if (isConstOne(Select->getOperand(1))) in combineMulSelectConstOne()
5786 else if (isConstOne(Select->getOperand(2))) in combineMulSelectConstOne()
5791 SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1); in combineMulSelectConstOne()
5817 // (mul x, (add y, 1)) -> (mad x, y, x) in PerformMULCombineWithOperands()
5823 // (mul x, (select y, 1)) -> (select (mul x, y), x) in PerformMULCombineWithOperands()
5832 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
5842 SDValue N0 = N->getOperand(0); in PerformMULCombine()
5843 SDValue N1 = N->getOperand(1); in PerformMULCombine()
5847 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
5863 EVT CCType = N->getValueType(0); in PerformSETCCCombine()
5864 SDValue A = N->getOperand(0); in PerformSETCCCombine()
5865 SDValue B = N->getOperand(1); in PerformSETCCCombine()
5882 DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)}); in PerformSETCCCombine()
5889 SDValue Vector = N->getOperand(0); in PerformEXTRACTCombine()
5892 if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() && in PerformEXTRACTCombine()
5903 if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode())) in PerformEXTRACTCombine()
5907 // We only handle the types we can extract in-register. in PerformEXTRACTCombine()
5911 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1)); in PerformEXTRACTCombine()
5913 if (!Index || Index->getZExtValue() == 0) in PerformEXTRACTCombine()
5925 DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT))); in PerformEXTRACTCombine()
5927 // If element has non-integer type, bitcast it back to the expected type. in PerformEXTRACTCombine()
5930 // Past legalizer, we may need to extent i8 -> i16 to match the register type. in PerformEXTRACTCombine()
5931 if (EltVT != N->getValueType(0)) in PerformEXTRACTCombine()
5932 Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result); in PerformEXTRACTCombine()
5939 SDValue VA = N->getOperand(1); in PerformVSELECTCombine()
5944 // We need to split vselect into individual per-element operations Because we in PerformVSELECTCombine()
5946 // 32-bit values, so we may as well do comparison as i32 to avoid conversions in PerformVSELECTCombine()
5950 SDValue VCond = N->getOperand(0); in PerformVSELECTCombine()
5951 SDValue VB = N->getOperand(2); in PerformVSELECTCombine()
5976 // This is done at dag-combine1 time, so that vector operations with i8 in PerformLOADCombine()
5979 EVT VT = N->getValueType(0); in PerformLOADCombine()
5992 SmallVector<SDValue, 8> Ops(N->ops()); in PerformLOADCombine()
5993 Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); in PerformLOADCombine()
5995 LD->getMemOperand()); in PerformLOADCombine()
6011 switch (N->getOpcode()) { in PerformDAGCombine()
6046 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
6049 EVT ResVT = N->getValueType(0); in ReplaceLoadVector()
6082 Align Alignment = LD->getAlign(); in ReplaceLoadVector()
6085 TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); in ReplaceLoadVector()
6152 SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); in ReplaceLoadVector()
6156 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); in ReplaceLoadVector()
6159 LD->getMemoryVT(), in ReplaceLoadVector()
6160 LD->getMemOperand()); in ReplaceLoadVector()
6194 SDValue Chain = N->getOperand(0); in ReplaceINTRINSIC_W_CHAIN()
6195 SDValue Intrin = N->getOperand(1); in ReplaceINTRINSIC_W_CHAIN()
6199 unsigned IntrinNo = Intrin.getNode()->getAsZExtVal(); in ReplaceINTRINSIC_W_CHAIN()
6209 EVT ResVT = N->getValueType(0); in ReplaceINTRINSIC_W_CHAIN()
6278 OtherOps.append(N->op_begin() + 2, N->op_end()); in ReplaceINTRINSIC_W_CHAIN()
6283 MemSD->getMemoryVT(), in ReplaceINTRINSIC_W_CHAIN()
6284 MemSD->getMemOperand()); in ReplaceINTRINSIC_W_CHAIN()
6306 "Custom handling of non-i8 ldu/ldg?"); in ReplaceINTRINSIC_W_CHAIN()
6308 // Just copy all operands as-is in ReplaceINTRINSIC_W_CHAIN()
6309 SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); in ReplaceINTRINSIC_W_CHAIN()
6311 // Force output to i16 in ReplaceINTRINSIC_W_CHAIN()
6320 MVT::i8, MemSD->getMemOperand()); in ReplaceINTRINSIC_W_CHAIN()
6332 // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit in ReplaceCopyFromReg_128()
6335 SDValue Chain = N->getOperand(0); in ReplaceCopyFromReg_128()
6336 SDValue Reg = N->getOperand(1); in ReplaceCopyFromReg_128()
6337 SDValue Glue = N->getOperand(2); in ReplaceCopyFromReg_128()
6340 "Custom lowering for CopyFromReg with 128-bit reg only"); in ReplaceCopyFromReg_128()
6341 SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1), in ReplaceCopyFromReg_128()
6342 N->getValueType(2)}; in ReplaceCopyFromReg_128()
6356 switch (N->getOpcode()) { in ReplaceNodeResults()
6373 Type *Ty = AI->getValOperand()->getType(); in shouldExpandAtomicRMWInIR()
6375 if (AI->isFloatingPointOperation()) { in shouldExpandAtomicRMWInIR()
6376 if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { in shouldExpandAtomicRMWInIR()
6377 if (Ty->isHalfTy() && STI.getSmVersion() >= 70 && in shouldExpandAtomicRMWInIR()
6380 if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 && in shouldExpandAtomicRMWInIR()
6383 if (Ty->isFloatTy()) in shouldExpandAtomicRMWInIR()
6385 if (Ty->isDoubleTy() && STI.hasAtomAddF64()) in shouldExpandAtomicRMWInIR()
6391 assert(Ty->isIntegerTy() && "Ty should be integer at this point"); in shouldExpandAtomicRMWInIR()
6394 switch (AI->getOperation()) { in shouldExpandAtomicRMWInIR()
6401 switch (ITy->getBitWidth()) { in shouldExpandAtomicRMWInIR()
6420 switch (ITy->getBitWidth()) { in shouldExpandAtomicRMWInIR()