NVPTXISelLowering.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines Matching +full:fsin +full:- +full:output
1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
69 #define DEBUG_TYPE "nvptx-lower"
76     "nvptx-sched4reg",
80     "nvptx-fma-level", cl::Hidden,
86     "nvptx-prec-divf32", cl::Hidden,
92     "nvptx-prec-sqrtf32", cl::Hidden,
97     "nvptx-force-min-byval-param-align", cl::Hidden,
98     cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
104     // If nvptx-prec-div32=N is used on the command-line, always honor it  in getDivF32Level()
117     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it  in usePrecSqrtF32()
126   return MF.getDenormalMode(APFloat::IEEEsingle()).Output ==  in useF32FTZ()
162 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
165 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
175   // Special case for i128 - decompose to (i64, i64)  in ComputePTXValueVTs()
176   if (Ty->isIntegerTy(128)) {  in ComputePTXValueVTs()
181       Offsets->push_back(StartingOffset + 0);  in ComputePTXValueVTs()
182       Offsets->push_back(StartingOffset + 8);  in ComputePTXValueVTs()
192     for(auto *EI : STy->elements()) {  in ComputePTXValueVTs()
194                          StartingOffset + SL->getElementOffset(ElementNum));  in ComputePTXValueVTs()
236           Offsets->push_back(Off + j * EltVT.getStoreSize());  in ComputePTXValueVTs()
241         Offsets->push_back(Off);  in ComputePTXValueVTs()
256           "Promotion is not suitable for scalars of size larger than 64-bits");  in PromoteScalarIntegerPTX()
298   if (Offsets[Idx] & (AccessSize - 1))  in CanMergeParamLoadStoresStartingAt()
317   // PTX ISA can only deal with 2- and 4-element vector ops.  in CanMergeParamLoadStoresStartingAt()
327     if (Offsets[j] - Offsets[j - 1] != EltSize)  in CanMergeParamLoadStoresStartingAt()
334 // Flags for tracking per-element vectorization state of loads/stores
340   // Scalar is effectively a 1-element vector.
364   // Check what we can vectorize using 128/64/32-bit accesses.  in VectorizePTXValueVTs()
570   // that don't have h/w rotation we lower them to multi-instruction assembly.  in NVPTXTargetLowering()
737   // user passed --nvptx-no-fp16-math. The flag is useful because,  in NVPTXTargetLowering()
820        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS}) {  in NVPTXTargetLowering()
863   // Custom lowering for inline asm with 128-bit operands  in NVPTXTargetLowering()
1372   auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());  in LowerGlobalAddress()
1373   Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);  in LowerGlobalAddress()
1378   return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||  in IsTypePassedAsArray()
1379          Ty->isHalfTy() || Ty->isBFloatTy();  in IsTypePassedAsArray()
1390   assert(isABI && "Non-ABI compilation is not supported");  in getPrototype()
1398   if (retTy->getTypeID() == Type::VoidTyID) {  in getPrototype()
1402     if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&  in getPrototype()
1406         size = ITy->getBitWidth();  in getPrototype()
1408         assert(retTy->isFloatingPointTy() &&  in getPrototype()
1410         size = retTy->getPrimitiveSizeInBits();  in getPrototype()
1421       O << ".param .align " << (retAlignment ? retAlignment->value() : 0)  in getPrototype()
1432   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();  in getPrototype()
1451           OIdx += len - 1;  in getPrototype()
1461         sz = cast<IntegerType>(Ty)->getBitWidth();  in getPrototype()
1466         sz = Ty->getPrimitiveSizeInBits();  in getPrototype()
1486     O << (first ? "" : ",") << " .param .align " << VAInfo->second  in getPrototype()
1509   const Function *DirectCallee = CB->getCalledFunction();  in getArgumentAlignment()
1552 // Use byte-store when the param address of the argument value is unaligned.
1584 // Use byte-load when the param adress of the returned value is unaligned.
1654   assert(isABI && "Non-ABI compilation is not supported");  in LowerCall()
1668   // initially set to 0, so it can be used for non-variadic arguments (which use  in LowerCall()
1688   //   * if there is a vector argument with more than typical vector-length  in LowerCall()
1716       ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,  in LowerCall()
1768     // than 32-bits are sign extended or zero extended, depending on  in LowerCall()
1772         Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;  in LowerCall()
1810         // Use 16-bit registers for small stores as it's the  in LowerCall()
1848         unsigned NumElts = StoreOperands.size() - 3;  in LowerCall()
1893       --OIdx;  in LowerCall()
1908     //  .param .align N .b8 retval0[<size-in-bytes>], or  in LowerCall()
1909     //  .param .b<size-in-bits> retval0  in LowerCall()
1925           Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),  in LowerCall()
1943                     VADeclareParam->getVTList(), DeclareParamOps);  in LowerCall()
1960     CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");  in LowerCall()
1976                   CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))  in LowerCall()
1979     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();  in LowerCall()
2017     if (i == (e - 1))  in LowerCall()
2064     int VecIdx = -1; // Index of the first element of the vector.  in LowerCall()
2067     // 32-bits are sign extended or zero extended, depending on whether  in LowerCall()
2070         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;  in LowerCall()
2097       if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() &&  in LowerCall()
2100         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");  in LowerCall()
2113         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");  in LowerCall()
2159         VecIdx = -1;  in LowerCall()
2223     DAG.getContext()->diagnose(NoDynamicAlloca);  in LowerDYNAMIC_STACKALLOC()
2231   uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();  in LowerDYNAMIC_STACKALLOC()
2234   // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.  in LowerDYNAMIC_STACKALLOC()
2235   if (nvTM->is64Bit())  in LowerDYNAMIC_STACKALLOC()
2243                                nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);  in LowerDYNAMIC_STACKALLOC()
2257   unsigned NumOperands = Node->getNumOperands();  in LowerCONCAT_VECTORS()
2259     SDValue SubOp = Node->getOperand(i);  in LowerCONCAT_VECTORS()
2260     EVT VVT = SubOp.getNode()->getValueType(0);  in LowerCONCAT_VECTORS()
2268   return DAG.getBuildVector(Node->getValueType(0), dl, Ops);  in LowerCONCAT_VECTORS()
2272 // would get lowered as two constant loads and vector-packing move.
2277   EVT VT = Op->getValueType(0);  in LowerBUILD_VECTOR()
2283   if (!llvm::all_of(Op->ops(), [](SDValue Operand) {  in LowerBUILD_VECTOR()
2284         return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||  in LowerBUILD_VECTOR()
2287     // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us  in LowerBUILD_VECTOR()
2293           DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),  in LowerBUILD_VECTOR()
2294           DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);  in LowerBUILD_VECTOR()
2297                       DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),  in LowerBUILD_VECTOR()
2301                       DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),  in LowerBUILD_VECTOR()
2309   auto GetOperand = [](SDValue Op, int N) -> APInt {  in LowerBUILD_VECTOR()
2310     const SDValue &Operand = Op->getOperand(N);  in LowerBUILD_VECTOR()
2311     EVT VT = Op->getValueType(0);  in LowerBUILD_VECTOR()
2312     if (Operand->isUndef())  in LowerBUILD_VECTOR()
2316       Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();  in LowerBUILD_VECTOR()
2318       Value = Operand->getAsAPIntVal();  in LowerBUILD_VECTOR()
2337   return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);  in LowerBUILD_VECTOR()
2342   SDValue Index = Op->getOperand(1);  in LowerEXTRACT_VECTOR_ELT()
2343   SDValue Vector = Op->getOperand(0);  in LowerEXTRACT_VECTOR_ELT()
2355     return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));  in LowerEXTRACT_VECTOR_ELT()
2377   SDValue Vector = Op->getOperand(0);  in LowerINSERT_VECTOR_ELT()
2383   SDValue Value = Op->getOperand(1);  in LowerINSERT_VECTOR_ELT()
2384   if (Value->isUndef())  in LowerINSERT_VECTOR_ELT()
2387   SDValue Index = Op->getOperand(2);  in LowerINSERT_VECTOR_ELT()
2396   return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);  in LowerINSERT_VECTOR_ELT()
2410   for (auto I : llvm::enumerate(SVN->getMask())) {  in LowerVECTOR_SHUFFLE()
2411     if (I.value() != -1) // -1 is a placeholder for undef.  in LowerVECTOR_SHUFFLE()
2420 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
2427   assert(Op.getNumOperands() == 3 && "Not a double-shift!");  in LowerShiftRightParts()
2453     // - if (Amt>=size) then  in LowerShiftRightParts()
2454     //      dLo = aHi >> (Amt-size)  in LowerShiftRightParts()
2457     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))  in LowerShiftRightParts()
2481 /// LowerShiftLeftParts - Lower SHL_PARTS, which
2488   assert(Op.getNumOperands() == 3 && "Not a double-shift!");  in LowerShiftLeftParts()
2513     // - if (Amt>=size) then  in LowerShiftLeftParts()
2515     //      dLo = aLo << (Amt-size)  in LowerShiftLeftParts()
2518     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))  in LowerShiftLeftParts()
2556 //   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
2568   // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))  in LowerFROUND32()
2672       return TLI->expandFP_ROUND(Op.getNode(), DAG);  in LowerFP_ROUND()
2675       // This combination was the first to support f32 -> bf16.  in LowerFP_ROUND()
2682           // Round-inexact-to-odd f64 to f32, then do the final rounding using  in LowerFP_ROUND()
2683           // the hardware f32 -> bf16 instruction.  in LowerFP_ROUND()
2684           SDValue rod = TLI->expandRoundInexactToOdd(  in LowerFP_ROUND()
2691       return TLI->expandFP_ROUND(Op.getNode(), DAG);  in LowerFP_ROUND()
2736     llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),  in LowerVectorArith()
2826   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();  in LowerVAARG()
2827   EVT VT = Node->getValueType(0);  in LowerVAARG()
2829   SDValue Tmp1 = Node->getOperand(0);  in LowerVAARG()
2830   SDValue Tmp2 = Node->getOperand(1);  in LowerVAARG()
2831   const MaybeAlign MA(Node->getConstantOperandVal(3));  in LowerVAARG()
2833   SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,  in LowerVAARG()
2837   if (MA && *MA > TLI->getMinStackArgumentAlignment()) {  in LowerVAARG()
2840         DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));  in LowerVAARG()
2844         DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));  in LowerVAARG()
2866   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());  in LowerVASTART()
2869   SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);  in LowerVASTART()
2872   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();  in LowerVASTART()
2878   SDValue Op0 = Op->getOperand(0);  in LowerSelect()
2879   SDValue Op1 = Op->getOperand(1);  in LowerSelect()
2880   SDValue Op2 = Op->getOperand(2);  in LowerSelect()
2902     EVT MemVT = Load->getMemoryVT();  in LowerLOAD()
2904                                         MemVT, *Load->getMemOperand())) {  in LowerLOAD()
2916 // v1 = ld i8* addr (-> i16)
2922   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);  in LowerLOADi1()
2923   assert(Node->getValueType(0) == MVT::i1 &&  in LowerLOADi1()
2925   SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),  in LowerLOADi1()
2926                                  LD->getBasePtr(), LD->getPointerInfo(),  in LowerLOADi1()
2927                                  MVT::i8, LD->getAlign(),  in LowerLOADi1()
2928                                  LD->getMemOperand()->getFlags());  in LowerLOADi1()
2933   SDValue Ops[] = { result, LD->getChain() };  in LowerLOADi1()
2939   EVT VT = Store->getMemoryVT();  in LowerSTORE()
2948                                       VT, *Store->getMemOperand()))  in LowerSTORE()
2964   SDValue Val = N->getOperand(1);  in LowerSTOREVector()
3001     Align Alignment = MemSD->getAlign();  in LowerSTOREVector()
3047     Ops.push_back(N->getOperand(0));  in LowerSTOREVector()
3050       // Combine f16,f16 -> v2f16  in LowerSTOREVector()
3073     Ops.append(N->op_begin() + 2, N->op_end());  in LowerSTOREVector()
3077                                 MemSD->getMemoryVT(), MemSD->getMemOperand());  in LowerSTOREVector()
3094   SDValue Tmp1 = ST->getChain();  in LowerSTOREi1()
3095   SDValue Tmp2 = ST->getBasePtr();  in LowerSTOREi1()
3096   SDValue Tmp3 = ST->getValue();  in LowerSTOREi1()
3100       DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,  in LowerSTOREi1()
3101                         ST->getAlign(), ST->getMemOperand()->getFlags());  in LowerSTOREi1()
3107   // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit  in LowerCopyToReg_128()
3111          "Custom lowering for 128-bit CopyToReg only");  in LowerCopyToReg_128()
3116   SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2));  in LowerCopyToReg_128()
3122   SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1);  in LowerCopyToReg_128()
3123   SmallVector<EVT, 3> ResultsType(Node->values());  in LowerCopyToReg_128()
3125   NewOps[0] = Op->getOperand(0); // Chain  in LowerCopyToReg_128()
3126   NewOps[1] = Op->getOperand(1); // Dst Reg  in LowerCopyToReg_128()
3127   NewOps[2] = Lo;                // Lower 64-bit  in LowerCopyToReg_128()
3128   NewOps[3] = Hi;                // Higher 64-bit  in LowerCopyToReg_128()
3130     NewOps[4] = Op->getOperand(3); // Glue if exists  in LowerCopyToReg_128()
3159   StringRef SavedStr = nvTM->getStrPool().save(  in getParamSymbol()
3173   const AttributeList &PAL = F->getAttributes();  in LowerFormalArguments()
3180   assert(isABI && "Non-ABI compilation is not supported");  in LowerFormalArguments()
3186   for (const Argument &I : F->args()) {  in LowerFormalArguments()
3194   //   * if there is a vector argument with more than typical vector-length  in LowerFormalArguments()
3204     if (theArgs[i]->use_empty()) {  in LowerFormalArguments()
3206       if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {  in LowerFormalArguments()
3219           --InsIdx;  in LowerFormalArguments()
3222       if (Ty->isVectorTy()) {  in LowerFormalArguments()
3224         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);  in LowerFormalArguments()
3230           --InsIdx;  in LowerFormalArguments()
3244         aggregateIsPacked = STy->isPacked();  in LowerFormalArguments()
3257       int VecIdx = -1; // Index of the first element of the current vector.  in LowerFormalArguments()
3260           assert(VecIdx == -1 && "Orphaned vector.");  in LowerFormalArguments()
3266           unsigned NumElts = parti - VecIdx + 1;  in LowerFormalArguments()
3278           EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);  in LowerFormalArguments()
3283               EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));  in LowerFormalArguments()
3285           const MaybeAlign PartAlign = [&]() -> MaybeAlign {  in LowerFormalArguments()
3291                 DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext()));  in LowerFormalArguments()
3299             P.getNode()->setIROrder(i + 1);  in LowerFormalArguments()
3329           VecIdx = -1;  in LowerFormalArguments()
3334         --InsIdx;  in LowerFormalArguments()
3351       p.getNode()->setIROrder(i + 1);  in LowerFormalArguments()
3361 // Use byte-store when the param adress of the return value is unaligned.
3399   assert(isABI && "Non-ABI compilation is not supported");  in LowerReturn()
3426       RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)  in LowerReturn()
3430   // 32-bits are sign extended or zero extended, depending on whether  in LowerReturn()
3433       RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;  in LowerReturn()
3445       // Use 16-bit registers for small load-stores as it's the  in LowerReturn()
3452     if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) {  in LowerReturn()
3455           DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext()));  in LowerReturn()
3482       unsigned NumElts = StoreOperands.size() - 2;  in LowerReturn()
4672     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();  in getTgtMemIntrinsic()
4691     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();  in getTgtMemIntrinsic()
5086 /// getFunctionParamOptimizedAlign - since function arguments are passed via
5091 /// alignment. To allow using 128-bit vectorized loads/stores, this function
5102   if (!F || !F->hasLocalLinkage() ||  in getFunctionParamOptimizedAlign()
5103       F->hasAddressTaken(/*Users=*/nullptr,  in getFunctionParamOptimizedAlign()
5109   assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");  in getFunctionParamOptimizedAlign()
5130   // on non-deprecated ptxas versions.  in getFunctionByValParamAlign()
5145   ParamStr << getTargetMachine().getSymbol(F)->getName();  in getParamName()
5154 /// isLegalAddressingMode - Return true if the addressing mode represented
5162   // AddrMode - This represents an addressing mode of:  in isLegalAddressingMode()
5166   // - [avar]  in isLegalAddressingMode()
5167   // - [areg]  in isLegalAddressingMode()
5168   // - [areg+immoff]  in isLegalAddressingMode()
5169   // - [immAddr]  in isLegalAddressingMode()
5171   // immoff must fit in a signed 32-bit int  in isLegalAddressingMode()
5193 //===----------------------------------------------------------------------===//
5195 //===----------------------------------------------------------------------===//
5197 /// getConstraintType - Given a constraint letter, return the type of
5253 //===----------------------------------------------------------------------===//
5255 //===----------------------------------------------------------------------===//
5259   // Always honor command-line argument  in allowFMA()
5279   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.  in allowUnsafeFPMath()
5281   return F.getFnAttribute("unsafe-fp-math").getValueAsBool();  in allowUnsafeFPMath()
5286   return Const && Const->getZExtValue() == 0;  in isConstZero()
5289 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
5298   // Since integer multiply-add costs the same as integer multiply  in PerformADDCombineWithOperands()
5302   if (!N0.getNode()->hasOneUse())  in PerformADDCombineWithOperands()
5305   // fold (add (mul a, b), c) -> (mad a, b, c)  in PerformADDCombineWithOperands()
5312   //   -> (select cond, c, (mad a, b, c))  in PerformADDCombineWithOperands()
5316     if (isConstZero(N0->getOperand(1)))  in PerformADDCombineWithOperands()
5318     else if (isConstZero(N0->getOperand(2)))  in PerformADDCombineWithOperands()
5323     SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1);  in PerformADDCombineWithOperands()
5324     if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse())  in PerformADDCombineWithOperands()
5328                                   M->getOperand(0), M->getOperand(1), N1);  in PerformADDCombineWithOperands()
5329     return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0),  in PerformADDCombineWithOperands()
5345     if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel))  in PerformFADDCombineWithOperands()
5358     for (const SDNode *User : N0.getNode()->uses()) {  in PerformFADDCombineWithOperands()
5360       if (User->getOpcode() != ISD::FADD)  in PerformFADDCombineWithOperands()
5366       int orderNo = N->getIROrder();  in PerformFADDCombineWithOperands()
5367       int orderNo2 = N0.getNode()->getIROrder();  in PerformFADDCombineWithOperands()
5372       if (orderNo - orderNo2 < 500)  in PerformFADDCombineWithOperands()
5386         for (const SDNode *User : left->uses()) {  in PerformFADDCombineWithOperands()
5387           int orderNo3 = User->getIROrder();  in PerformFADDCombineWithOperands()
5395         for (const SDNode *User : right->uses()) {  in PerformFADDCombineWithOperands()
5396           int orderNo3 = User->getIROrder();  in PerformFADDCombineWithOperands()
5416   if (all_of(N->ops().drop_front(Front).drop_back(Back),  in PerformStoreCombineHelper()
5417              [](const SDUse &U) { return U.get()->isUndef(); }))  in PerformStoreCombineHelper()
5420     return N->getOperand(0);  in PerformStoreCombineHelper()
5436 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
5444   SDValue N0 = N->getOperand(0);  in PerformADDCombine()
5445   SDValue N1 = N->getOperand(1);  in PerformADDCombine()
5447   // Skip non-integer, non-scalar case  in PerformADDCombine()
5460 /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
5465   SDValue N0 = N->getOperand(0);  in PerformFADDCombine()
5466   SDValue N1 = N->getOperand(1);  in PerformFADDCombine()
5485   // target-specific DAG node, the DAG combiner fails to eliminate these AND  in PerformANDCombine()
5487   SDValue Val = N->getOperand(0);  in PerformANDCombine()
5488   SDValue Mask = N->getOperand(1);  in PerformANDCombine()
5496   // Convert BFE-> truncate i16 -> and 255  in PerformANDCombine()
5497   // To just BFE-> truncate i16, as the value already has all the bits in the  in PerformANDCombine()
5507     uint64_t BFEBitsVal = BFEBits->getZExtValue();  in PerformANDCombine()
5514     uint64_t MaskVal = MaskCnst->getZExtValue();  in PerformANDCombine()
5516     if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)  in PerformANDCombine()
5521   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and  in PerformANDCombine()
5524     Val = Val->getOperand(0);  in PerformANDCombine()
5527   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {  in PerformANDCombine()
5528     Val = Val->getOperand(0);  in PerformANDCombine()
5531   if (Val->getOpcode() == NVPTXISD::LoadV2 ||  in PerformANDCombine()
5532       Val->getOpcode() == NVPTXISD::LoadV4) {  in PerformANDCombine()
5539     uint64_t MaskVal = MaskCnst->getZExtValue();  in PerformANDCombine()
5551     EVT MemVT = Mem->getMemoryVT();  in PerformANDCombine()
5557     unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1);  in PerformANDCombine()
5566       // Re-insert the ext as a zext.  in PerformANDCombine()
5582   assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);  in PerformREMCombine()
5584   // Don't do anything at less than -O2.  in PerformREMCombine()
5590   EVT VT = N->getValueType(0);  in PerformREMCombine()
5591   bool IsSigned = N->getOpcode() == ISD::SREM;  in PerformREMCombine()
5594   const SDValue &Num = N->getOperand(0);  in PerformREMCombine()
5595   const SDValue &Den = N->getOperand(1);  in PerformREMCombine()
5597   for (const SDNode *U : Num->uses()) {  in PerformREMCombine()
5598     if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&  in PerformREMCombine()
5599         U->getOperand(1) == Den) {  in PerformREMCombine()
5600       // Num % Den -> Num - (Num / Den) * Den  in PerformREMCombine()
5616 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
5642 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
5663     const APInt &Val = CI->getAPIntValue();  in AreMulWideOperandsDemotable()
5678 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
5679 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
5684   EVT MulType = N->getValueType(0);  in TryMULWIDECombine()
5691   SDValue LHS = N->getOperand(0);  in TryMULWIDECombine()
5692   SDValue RHS = N->getOperand(1);  in TryMULWIDECombine()
5695   if (N->getOpcode() == ISD::MUL) {  in TryMULWIDECombine()
5702   if (N->getOpcode() == ISD::SHL) {  in TryMULWIDECombine()
5708     APInt ShiftAmt = ShlRHS->getAPIntValue();  in TryMULWIDECombine()
5750   return Const && Const->getZExtValue() == 1;  in isConstOne()
5754   if (Add->getOpcode() != ISD::ADD)  in matchMADConstOnePattern()
5757   if (isConstOne(Add->getOperand(0)))  in matchMADConstOnePattern()
5758     return Add->getOperand(1);  in matchMADConstOnePattern()
5760   if (isConstOne(Add->getOperand(1)))  in matchMADConstOnePattern()
5761     return Add->getOperand(0);  in matchMADConstOnePattern()
5778   if (Select->getOpcode() != ISD::SELECT)  in combineMulSelectConstOne()
5781   SDValue Cond = Select->getOperand(0);  in combineMulSelectConstOne()
5784   if (isConstOne(Select->getOperand(1)))  in combineMulSelectConstOne()
5786   else if (isConstOne(Select->getOperand(2)))  in combineMulSelectConstOne()
5791   SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1);  in combineMulSelectConstOne()
5817   // (mul x, (add y, 1)) -> (mad x, y, x)  in PerformMULCombineWithOperands()
5823   // (mul x, (select y, 1)) -> (select (mul x, y), x)  in PerformMULCombineWithOperands()
5832 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
5842   SDValue N0 = N->getOperand(0);  in PerformMULCombine()
5843   SDValue N1 = N->getOperand(1);  in PerformMULCombine()
5847 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
5863   EVT CCType = N->getValueType(0);  in PerformSETCCCombine()
5864   SDValue A = N->getOperand(0);  in PerformSETCCCombine()
5865   SDValue B = N->getOperand(1);  in PerformSETCCCombine()
5882       DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});  in PerformSETCCCombine()
5889   SDValue Vector = N->getOperand(0);  in PerformEXTRACTCombine()
5892   if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&  in PerformEXTRACTCombine()
5903   if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))  in PerformEXTRACTCombine()
5907   // We only handle the types we can extract in-register.  in PerformEXTRACTCombine()
5911   ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));  in PerformEXTRACTCombine()
5913   if (!Index || Index->getZExtValue() == 0)  in PerformEXTRACTCombine()
5925           DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));  in PerformEXTRACTCombine()
5927   // If element has non-integer type, bitcast it back to the expected type.  in PerformEXTRACTCombine()
5930   // Past legalizer, we may need to extent i8 -> i16 to match the register type.  in PerformEXTRACTCombine()
5931   if (EltVT != N->getValueType(0))  in PerformEXTRACTCombine()
5932     Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);  in PerformEXTRACTCombine()
5939   SDValue VA = N->getOperand(1);  in PerformVSELECTCombine()
5944   // We need to split vselect into individual per-element operations Because we  in PerformVSELECTCombine()
5946   // 32-bit values, so we may as well do comparison as i32 to avoid conversions  in PerformVSELECTCombine()
5950   SDValue VCond = N->getOperand(0);  in PerformVSELECTCombine()
5951   SDValue VB = N->getOperand(2);  in PerformVSELECTCombine()
5976   // This is done at dag-combine1 time, so that vector operations with i8  in PerformLOADCombine()
5979   EVT VT = N->getValueType(0);  in PerformLOADCombine()
5992   SmallVector<SDValue, 8> Ops(N->ops());  in PerformLOADCombine()
5993   Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));  in PerformLOADCombine()
5995                                             LD->getMemOperand());  in PerformLOADCombine()
6011   switch (N->getOpcode()) {  in PerformDAGCombine()
6046 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
6049   EVT ResVT = N->getValueType(0);  in ReplaceLoadVector()
6082   Align Alignment = LD->getAlign();  in ReplaceLoadVector()
6085       TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));  in ReplaceLoadVector()
6152   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());  in ReplaceLoadVector()
6156   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));  in ReplaceLoadVector()
6159                                           LD->getMemoryVT(),  in ReplaceLoadVector()
6160                                           LD->getMemOperand());  in ReplaceLoadVector()
6194   SDValue Chain = N->getOperand(0);  in ReplaceINTRINSIC_W_CHAIN()
6195   SDValue Intrin = N->getOperand(1);  in ReplaceINTRINSIC_W_CHAIN()
6199   unsigned IntrinNo = Intrin.getNode()->getAsZExtVal();  in ReplaceINTRINSIC_W_CHAIN()
6209     EVT ResVT = N->getValueType(0);  in ReplaceINTRINSIC_W_CHAIN()
6278       OtherOps.append(N->op_begin() + 2, N->op_end());  in ReplaceINTRINSIC_W_CHAIN()
6283                                               MemSD->getMemoryVT(),  in ReplaceINTRINSIC_W_CHAIN()
6284                                               MemSD->getMemOperand());  in ReplaceINTRINSIC_W_CHAIN()
6306              "Custom handling of non-i8 ldu/ldg?");  in ReplaceINTRINSIC_W_CHAIN()
6308       // Just copy all operands as-is  in ReplaceINTRINSIC_W_CHAIN()
6309       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());  in ReplaceINTRINSIC_W_CHAIN()
6311       // Force output to i16  in ReplaceINTRINSIC_W_CHAIN()
6320                                   MVT::i8, MemSD->getMemOperand());  in ReplaceINTRINSIC_W_CHAIN()
6332   // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit  in ReplaceCopyFromReg_128()
6335   SDValue Chain = N->getOperand(0);  in ReplaceCopyFromReg_128()
6336   SDValue Reg = N->getOperand(1);  in ReplaceCopyFromReg_128()
6337   SDValue Glue = N->getOperand(2);  in ReplaceCopyFromReg_128()
6340          "Custom lowering for CopyFromReg with 128-bit reg only");  in ReplaceCopyFromReg_128()
6341   SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1),  in ReplaceCopyFromReg_128()
6342                                      N->getValueType(2)};  in ReplaceCopyFromReg_128()
6356   switch (N->getOpcode()) {  in ReplaceNodeResults()
6373   Type *Ty = AI->getValOperand()->getType();  in shouldExpandAtomicRMWInIR()
6375   if (AI->isFloatingPointOperation()) {  in shouldExpandAtomicRMWInIR()
6376     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {  in shouldExpandAtomicRMWInIR()
6377       if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&  in shouldExpandAtomicRMWInIR()
6380       if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 &&  in shouldExpandAtomicRMWInIR()
6383       if (Ty->isFloatTy())  in shouldExpandAtomicRMWInIR()
6385       if (Ty->isDoubleTy() && STI.hasAtomAddF64())  in shouldExpandAtomicRMWInIR()
6391   assert(Ty->isIntegerTy() && "Ty should be integer at this point");  in shouldExpandAtomicRMWInIR()
6394   switch (AI->getOperation()) {  in shouldExpandAtomicRMWInIR()
6401     switch (ITy->getBitWidth()) {  in shouldExpandAtomicRMWInIR()
6420     switch (ITy->getBitWidth()) {  in shouldExpandAtomicRMWInIR()