NVPTXISelLowering.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines Matching +full:tri +full:- +full:band
1 //===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
69 #define DEBUG_TYPE "nvptx-lower"
76     "nvptx-sched4reg",
80     "nvptx-fma-level", cl::Hidden,
86     "nvptx-prec-divf32", cl::Hidden,
92     "nvptx-prec-sqrtf32", cl::Hidden,
97     "nvptx-force-min-byval-param-align", cl::Hidden,
98     cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
104     // If nvptx-prec-div32=N is used on the command-line, always honor it  in getDivF32Level()
117     // If nvptx-prec-sqrtf32 is used on the command-line, always honor it  in usePrecSqrtF32()
162 /// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
165 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
175   // Special case for i128 - decompose to (i64, i64)  in ComputePTXValueVTs()
176   if (Ty->isIntegerTy(128)) {  in ComputePTXValueVTs()
181       Offsets->push_back(StartingOffset + 0);  in ComputePTXValueVTs()
182       Offsets->push_back(StartingOffset + 8);  in ComputePTXValueVTs()
192     for(auto *EI : STy->elements()) {  in ComputePTXValueVTs()
194                          StartingOffset + SL->getElementOffset(ElementNum));  in ComputePTXValueVTs()
240           Offsets->push_back(Off + j * EltVT.getStoreSize());  in ComputePTXValueVTs()
245         Offsets->push_back(Off);  in ComputePTXValueVTs()
260           "Promotion is not suitable for scalars of size larger than 64-bits");  in PromoteScalarIntegerPTX()
302   if (Offsets[Idx] & (AccessSize - 1))  in CanMergeParamLoadStoresStartingAt()
321   // PTX ISA can only deal with 2- and 4-element vector ops.  in CanMergeParamLoadStoresStartingAt()
331     if (Offsets[j] - Offsets[j - 1] != EltSize)  in CanMergeParamLoadStoresStartingAt()
338 // Flags for tracking per-element vectorization state of loads/stores
344   // Scalar is effectively a 1-element vector.
368   // Check what we can vectorize using 128/64/32-bit accesses.  in VectorizePTXValueVTs()
574   // that don't have h/w rotation we lower them to multi-instruction assembly.  in NVPTXTargetLowering()
741   // user passed --nvptx-no-fp16-math. The flag is useful because,  in NVPTXTargetLowering()
867   // Custom lowering for inline asm with 128-bit operands  in NVPTXTargetLowering()
1376   auto PtrVT = getPointerTy(DAG.getDataLayout(), GAN->getAddressSpace());  in LowerGlobalAddress()
1377   Op = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, PtrVT);  in LowerGlobalAddress()
1382   return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||  in IsTypePassedAsArray()
1383          Ty->isHalfTy() || Ty->isBFloatTy();  in IsTypePassedAsArray()
1394   assert(isABI && "Non-ABI compilation is not supported");  in getPrototype()
1402   if (retTy->getTypeID() == Type::VoidTyID) {  in getPrototype()
1406     if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&  in getPrototype()
1410         size = ITy->getBitWidth();  in getPrototype()
1412         assert(retTy->isFloatingPointTy() &&  in getPrototype()
1414         size = retTy->getPrimitiveSizeInBits();  in getPrototype()
1425       O << ".param .align " << (retAlignment ? retAlignment->value() : 0)  in getPrototype()
1436   unsigned NumArgs = VAInfo ? VAInfo->first : Args.size();  in getPrototype()
1455           OIdx += len - 1;  in getPrototype()
1465         sz = cast<IntegerType>(Ty)->getBitWidth();  in getPrototype()
1470         sz = Ty->getPrimitiveSizeInBits();  in getPrototype()
1490     O << (first ? "" : ",") << " .param .align " << VAInfo->second  in getPrototype()
1513   const Function *DirectCallee = CB->getCalledFunction();  in getArgumentAlignment()
1556 // Use byte-store when the param address of the argument value is unaligned.
1588 // Use byte-load when the param adress of the returned value is unaligned.
1658   assert(isABI && "Non-ABI compilation is not supported");  in LowerCall()
1672   // initially set to 0, so it can be used for non-variadic arguments (which use  in LowerCall()
1692   //   * if there is a vector argument with more than typical vector-length  in LowerCall()
1720       ArgAlign = getFunctionByValParamAlign(CB->getCalledFunction(), ETy,  in LowerCall()
1772     // than 32-bits are sign extended or zero extended, depending on  in LowerCall()
1776         Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;  in LowerCall()
1814         // Use 16-bit registers for small stores as it's the  in LowerCall()
1852         unsigned NumElts = StoreOperands.size() - 3;  in LowerCall()
1897       --OIdx;  in LowerCall()
1912     //  .param .align N .b8 retval0[<size-in-bytes>], or  in LowerCall()
1913     //  .param .b<size-in-bits> retval0  in LowerCall()
1929           Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),  in LowerCall()
1947                     VADeclareParam->getVTList(), DeclareParamOps);  in LowerCall()
1964     CalleeFunc->addFnAttr("nvptx-libcall-callee", "true");  in LowerCall()
1980                   CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))  in LowerCall()
1983     const char *ProtoStr = nvTM->getStrPool().save(Proto).data();  in LowerCall()
2021     if (i == (e - 1))  in LowerCall()
2068     int VecIdx = -1; // Index of the first element of the vector.  in LowerCall()
2071     // 32-bits are sign extended or zero extended, depending on whether  in LowerCall()
2074         RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;  in LowerCall()
2101       if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType() &&  in LowerCall()
2104         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");  in LowerCall()
2117         assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");  in LowerCall()
2163         VecIdx = -1;  in LowerCall()
2227     DAG.getContext()->diagnose(NoDynamicAlloca);  in LowerDYNAMIC_STACKALLOC()
2235   uint64_t Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();  in LowerDYNAMIC_STACKALLOC()
2238   // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.  in LowerDYNAMIC_STACKALLOC()
2239   if (nvTM->is64Bit())  in LowerDYNAMIC_STACKALLOC()
2247                                nvTM->is64Bit() ? MVT::i64 : MVT::i32, AllocOps);  in LowerDYNAMIC_STACKALLOC()
2261   unsigned NumOperands = Node->getNumOperands();  in LowerCONCAT_VECTORS()
2263     SDValue SubOp = Node->getOperand(i);  in LowerCONCAT_VECTORS()
2264     EVT VVT = SubOp.getNode()->getValueType(0);  in LowerCONCAT_VECTORS()
2272   return DAG.getBuildVector(Node->getValueType(0), dl, Ops);  in LowerCONCAT_VECTORS()
2276 // would get lowered as two constant loads and vector-packing move.
2281   EVT VT = Op->getValueType(0);  in LowerBUILD_VECTOR()
2287   if (!llvm::all_of(Op->ops(), [](SDValue Operand) {  in LowerBUILD_VECTOR()
2288         return Operand->isUndef() || isa<ConstantSDNode>(Operand) ||  in LowerBUILD_VECTOR()
2291     // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us  in LowerBUILD_VECTOR()
2297           DAG.getAnyExtOrTrunc(Op->getOperand(1), DL, MVT::i32),  in LowerBUILD_VECTOR()
2298           DAG.getAnyExtOrTrunc(Op->getOperand(0), DL, MVT::i32), C8, C8);  in LowerBUILD_VECTOR()
2301                       DAG.getAnyExtOrTrunc(Op->getOperand(2), DL, MVT::i32),  in LowerBUILD_VECTOR()
2305                       DAG.getAnyExtOrTrunc(Op->getOperand(3), DL, MVT::i32),  in LowerBUILD_VECTOR()
2313   auto GetOperand = [](SDValue Op, int N) -> APInt {  in LowerBUILD_VECTOR()
2314     const SDValue &Operand = Op->getOperand(N);  in LowerBUILD_VECTOR()
2315     EVT VT = Op->getValueType(0);  in LowerBUILD_VECTOR()
2316     if (Operand->isUndef())  in LowerBUILD_VECTOR()
2320       Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();  in LowerBUILD_VECTOR()
2322       Value = Operand->getAsAPIntVal();  in LowerBUILD_VECTOR()
2341   return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);  in LowerBUILD_VECTOR()
2346   SDValue Index = Op->getOperand(1);  in LowerEXTRACT_VECTOR_ELT()
2347   SDValue Vector = Op->getOperand(0);  in LowerEXTRACT_VECTOR_ELT()
2359     return DAG.getAnyExtOrTrunc(BFE, DL, Op->getValueType(0));  in LowerEXTRACT_VECTOR_ELT()
2381   SDValue Vector = Op->getOperand(0);  in LowerINSERT_VECTOR_ELT()
2387   SDValue Value = Op->getOperand(1);  in LowerINSERT_VECTOR_ELT()
2388   if (Value->isUndef())  in LowerINSERT_VECTOR_ELT()
2391   SDValue Index = Op->getOperand(2);  in LowerINSERT_VECTOR_ELT()
2400   return DAG.getNode(ISD::BITCAST, DL, Op->getValueType(0), BFI);  in LowerINSERT_VECTOR_ELT()
2414   for (auto I : llvm::enumerate(SVN->getMask())) {  in LowerVECTOR_SHUFFLE()
2415     if (I.value() != -1) // -1 is a placeholder for undef.  in LowerVECTOR_SHUFFLE()
2424 /// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
2431   assert(Op.getNumOperands() == 3 && "Not a double-shift!");  in LowerShiftRightParts()
2457     // - if (Amt>=size) then  in LowerShiftRightParts()
2458     //      dLo = aHi >> (Amt-size)  in LowerShiftRightParts()
2461     //      dLo = (aLo >>logic Amt) | (aHi << (size-Amt))  in LowerShiftRightParts()
2485 /// LowerShiftLeftParts - Lower SHL_PARTS, which
2492   assert(Op.getNumOperands() == 3 && "Not a double-shift!");  in LowerShiftLeftParts()
2517     // - if (Amt>=size) then  in LowerShiftLeftParts()
2519     //      dLo = aLo << (Amt-size)  in LowerShiftLeftParts()
2522     //      dHi = (aHi << Amt) | (aLo >> (size-Amt))  in LowerShiftLeftParts()
2560 //   float RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f));
2572   // RoundedA = (float) (int) ( A > 0 ? (A + 0.5f) : (A - 0.5f))  in LowerFROUND32()
2676       return TLI->expandFP_ROUND(Op.getNode(), DAG);  in LowerFP_ROUND()
2679       // This combination was the first to support f32 -> bf16.  in LowerFP_ROUND()
2686           // Round-inexact-to-odd f64 to f32, then do the final rounding using  in LowerFP_ROUND()
2687           // the hardware f32 -> bf16 instruction.  in LowerFP_ROUND()
2688           SDValue rod = TLI->expandRoundInexactToOdd(  in LowerFP_ROUND()
2695       return TLI->expandFP_ROUND(Op.getNode(), DAG);  in LowerFP_ROUND()
2740     llvm::transform(Op->ops(), std::back_inserter(ScalarArgs),  in LowerVectorArith()
2830   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();  in LowerVAARG()
2831   EVT VT = Node->getValueType(0);  in LowerVAARG()
2833   SDValue Tmp1 = Node->getOperand(0);  in LowerVAARG()
2834   SDValue Tmp2 = Node->getOperand(1);  in LowerVAARG()
2835   const MaybeAlign MA(Node->getConstantOperandVal(3));  in LowerVAARG()
2837   SDValue VAListLoad = DAG.getLoad(TLI->getPointerTy(DAG.getDataLayout()), DL,  in LowerVAARG()
2841   if (MA && *MA > TLI->getMinStackArgumentAlignment()) {  in LowerVAARG()
2844         DAG.getConstant(MA->value() - 1, DL, VAList.getValueType()));  in LowerVAARG()
2848         DAG.getConstant(-(int64_t)MA->value(), DL, VAList.getValueType()));  in LowerVAARG()
2870   EVT PtrVT = TLI->getPointerTy(DAG.getDataLayout());  in LowerVASTART()
2873   SDValue Arg = getParamSymbol(DAG, /* vararg */ -1, PtrVT);  in LowerVASTART()
2876   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();  in LowerVASTART()
2882   SDValue Op0 = Op->getOperand(0);  in LowerSelect()
2883   SDValue Op1 = Op->getOperand(1);  in LowerSelect()
2884   SDValue Op2 = Op->getOperand(2);  in LowerSelect()
2906     EVT MemVT = Load->getMemoryVT();  in LowerLOAD()
2908                                         MemVT, *Load->getMemOperand())) {  in LowerLOAD()
2920 // v1 = ld i8* addr (-> i16)
2926   assert(LD->getExtensionType() == ISD::NON_EXTLOAD);  in LowerLOADi1()
2927   assert(Node->getValueType(0) == MVT::i1 &&  in LowerLOADi1()
2929   SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(),  in LowerLOADi1()
2930                                  LD->getBasePtr(), LD->getPointerInfo(),  in LowerLOADi1()
2931                                  MVT::i8, LD->getAlign(),  in LowerLOADi1()
2932                                  LD->getMemOperand()->getFlags());  in LowerLOADi1()
2937   SDValue Ops[] = { result, LD->getChain() };  in LowerLOADi1()
2943   EVT VT = Store->getMemoryVT();  in LowerSTORE()
2952                                       VT, *Store->getMemOperand()))  in LowerSTORE()
2968   SDValue Val = N->getOperand(1);  in LowerSTOREVector()
3005     Align Alignment = MemSD->getAlign();  in LowerSTOREVector()
3051     Ops.push_back(N->getOperand(0));  in LowerSTOREVector()
3054       // Combine f16,f16 -> v2f16  in LowerSTOREVector()
3077     Ops.append(N->op_begin() + 2, N->op_end());  in LowerSTOREVector()
3081                                 MemSD->getMemoryVT(), MemSD->getMemOperand());  in LowerSTOREVector()
3098   SDValue Tmp1 = ST->getChain();  in LowerSTOREi1()
3099   SDValue Tmp2 = ST->getBasePtr();  in LowerSTOREi1()
3100   SDValue Tmp3 = ST->getValue();  in LowerSTOREi1()
3104       DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8,  in LowerSTOREi1()
3105                         ST->getAlign(), ST->getMemOperand()->getFlags());  in LowerSTOREi1()
3111   // Change the CopyToReg to take in two 64-bit operands instead of a 128-bit  in LowerCopyToReg_128()
3115          "Custom lowering for 128-bit CopyToReg only");  in LowerCopyToReg_128()
3120   SDValue Cast = DAG.getBitcast(MVT::v2i64, Op->getOperand(2));  in LowerCopyToReg_128()
3126   SmallVector<SDValue, 5> NewOps(Op->getNumOperands() + 1);  in LowerCopyToReg_128()
3127   SmallVector<EVT, 3> ResultsType(Node->values());  in LowerCopyToReg_128()
3129   NewOps[0] = Op->getOperand(0); // Chain  in LowerCopyToReg_128()
3130   NewOps[1] = Op->getOperand(1); // Dst Reg  in LowerCopyToReg_128()
3131   NewOps[2] = Lo;                // Lower 64-bit  in LowerCopyToReg_128()
3132   NewOps[3] = Hi;                // Higher 64-bit  in LowerCopyToReg_128()
3134     NewOps[4] = Op->getOperand(3); // Glue if exists  in LowerCopyToReg_128()
3163   StringRef SavedStr = nvTM->getStrPool().save(  in getParamSymbol()
3177   const AttributeList &PAL = F->getAttributes();  in LowerFormalArguments()
3184   assert(isABI && "Non-ABI compilation is not supported");  in LowerFormalArguments()
3190   for (const Argument &I : F->args()) {  in LowerFormalArguments()
3198   //   * if there is a vector argument with more than typical vector-length  in LowerFormalArguments()
3208     if (theArgs[i]->use_empty()) {  in LowerFormalArguments()
3210       if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {  in LowerFormalArguments()
3223           --InsIdx;  in LowerFormalArguments()
3226       if (Ty->isVectorTy()) {  in LowerFormalArguments()
3228         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);  in LowerFormalArguments()
3234           --InsIdx;  in LowerFormalArguments()
3248         aggregateIsPacked = STy->isPacked();  in LowerFormalArguments()
3261       int VecIdx = -1; // Index of the first element of the current vector.  in LowerFormalArguments()
3264           assert(VecIdx == -1 && "Orphaned vector.");  in LowerFormalArguments()
3270           unsigned NumElts = parti - VecIdx + 1;  in LowerFormalArguments()
3282           EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);  in LowerFormalArguments()
3287               EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));  in LowerFormalArguments()
3289           const MaybeAlign PartAlign = [&]() -> MaybeAlign {  in LowerFormalArguments()
3295                 DL.getABITypeAlign(EltVT.getTypeForEVT(F->getContext()));  in LowerFormalArguments()
3303             P.getNode()->setIROrder(i + 1);  in LowerFormalArguments()
3333           VecIdx = -1;  in LowerFormalArguments()
3338         --InsIdx;  in LowerFormalArguments()
3355       p.getNode()->setIROrder(i + 1);  in LowerFormalArguments()
3365 // Use byte-store when the param adress of the return value is unaligned.
3403   assert(isABI && "Non-ABI compilation is not supported");  in LowerReturn()
3430       RetTy->isSized() ? getFunctionParamOptimizedAlign(&F, RetTy, DL)  in LowerReturn()
3434   // 32-bits are sign extended or zero extended, depending on whether  in LowerReturn()
3437       RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;  in LowerReturn()
3449       // Use 16-bit registers for small load-stores as it's the  in LowerReturn()
3456     if (VectorInfo[i] == PVF_SCALAR && RetTy->isAggregateType()) {  in LowerReturn()
3459           DL.getABITypeAlign(ElementType.getTypeForEVT(RetTy->getContext()));  in LowerReturn()
3486       unsigned NumElts = StoreOperands.size() - 2;  in LowerReturn()
4676     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();  in getTgtMemIntrinsic()
4695     Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();  in getTgtMemIntrinsic()
5090 /// getFunctionParamOptimizedAlign - since function arguments are passed via
5095 /// alignment. To allow using 128-bit vectorized loads/stores, this function
5106   if (!F || !F->hasLocalLinkage() ||  in getFunctionParamOptimizedAlign()
5107       F->hasAddressTaken(/*Users=*/nullptr,  in getFunctionParamOptimizedAlign()
5113   assert(!isKernelFunction(*F) && "Expect kernels to have non-local linkage");  in getFunctionParamOptimizedAlign()
5134   // on non-deprecated ptxas versions.  in getFunctionByValParamAlign()
5149   ParamStr << getTargetMachine().getSymbol(F)->getName();  in getParamName()
5158 /// isLegalAddressingMode - Return true if the addressing mode represented
5166   // AddrMode - This represents an addressing mode of:  in isLegalAddressingMode()
5170   // - [avar]  in isLegalAddressingMode()
5171   // - [areg]  in isLegalAddressingMode()
5172   // - [areg+immoff]  in isLegalAddressingMode()
5173   // - [immAddr]  in isLegalAddressingMode()
5175   // immoff must fit in a signed 32-bit int  in isLegalAddressingMode()
5197 //===----------------------------------------------------------------------===//
5199 //===----------------------------------------------------------------------===//
5201 /// getConstraintType - Given a constraint letter, return the type of
5226 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,  in getRegForInlineAsmConstraint()  argument
5254   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);  in getRegForInlineAsmConstraint()
5257 //===----------------------------------------------------------------------===//
5259 //===----------------------------------------------------------------------===//
5263   // Always honor command-line argument  in allowFMA()
5283   // Allow unsafe math if unsafe-fp-math attribute explicitly says so.  in allowUnsafeFPMath()
5285   return F.getFnAttribute("unsafe-fp-math").getValueAsBool();  in allowUnsafeFPMath()
5290   return Const && Const->getZExtValue() == 0;  in isConstZero()
5293 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
5302   // Since integer multiply-add costs the same as integer multiply  in PerformADDCombineWithOperands()
5306   if (!N0.getNode()->hasOneUse())  in PerformADDCombineWithOperands()
5309   // fold (add (mul a, b), c) -> (mad a, b, c)  in PerformADDCombineWithOperands()
5316   //   -> (select cond, c, (mad a, b, c))  in PerformADDCombineWithOperands()
5320     if (isConstZero(N0->getOperand(1)))  in PerformADDCombineWithOperands()
5322     else if (isConstZero(N0->getOperand(2)))  in PerformADDCombineWithOperands()
5327     SDValue M = N0->getOperand((ZeroOpNum == 1) ? 2 : 1);  in PerformADDCombineWithOperands()
5328     if (M->getOpcode() != ISD::MUL || !M.getNode()->hasOneUse())  in PerformADDCombineWithOperands()
5332                                   M->getOperand(0), M->getOperand(1), N1);  in PerformADDCombineWithOperands()
5333     return DCI.DAG.getSelect(SDLoc(N), VT, N0->getOperand(0),  in PerformADDCombineWithOperands()
5349     if (!TLI->allowFMA(DCI.DAG.getMachineFunction(), OptLevel))  in PerformFADDCombineWithOperands()
5362     for (const SDNode *User : N0.getNode()->uses()) {  in PerformFADDCombineWithOperands()
5364       if (User->getOpcode() != ISD::FADD)  in PerformFADDCombineWithOperands()
5370       int orderNo = N->getIROrder();  in PerformFADDCombineWithOperands()
5371       int orderNo2 = N0.getNode()->getIROrder();  in PerformFADDCombineWithOperands()
5376       if (orderNo - orderNo2 < 500)  in PerformFADDCombineWithOperands()
5390         for (const SDNode *User : left->uses()) {  in PerformFADDCombineWithOperands()
5391           int orderNo3 = User->getIROrder();  in PerformFADDCombineWithOperands()
5399         for (const SDNode *User : right->uses()) {  in PerformFADDCombineWithOperands()
5400           int orderNo3 = User->getIROrder();  in PerformFADDCombineWithOperands()
5420   if (all_of(N->ops().drop_front(Front).drop_back(Back),  in PerformStoreCombineHelper()
5421              [](const SDUse &U) { return U.get()->isUndef(); }))  in PerformStoreCombineHelper()
5424     return N->getOperand(0);  in PerformStoreCombineHelper()
5440 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
5448   SDValue N0 = N->getOperand(0);  in PerformADDCombine()
5449   SDValue N1 = N->getOperand(1);  in PerformADDCombine()
5451   // Skip non-integer, non-scalar case  in PerformADDCombine()
5464 /// PerformFADDCombine - Target-specific dag combine xforms for ISD::FADD.
5469   SDValue N0 = N->getOperand(0);  in PerformFADDCombine()
5470   SDValue N1 = N->getOperand(1);  in PerformFADDCombine()
5489   // target-specific DAG node, the DAG combiner fails to eliminate these AND  in PerformANDCombine()
5491   SDValue Val = N->getOperand(0);  in PerformANDCombine()
5492   SDValue Mask = N->getOperand(1);  in PerformANDCombine()
5500   // Convert BFE-> truncate i16 -> and 255  in PerformANDCombine()
5501   // To just BFE-> truncate i16, as the value already has all the bits in the  in PerformANDCombine()
5511     uint64_t BFEBitsVal = BFEBits->getZExtValue();  in PerformANDCombine()
5518     uint64_t MaskVal = MaskCnst->getZExtValue();  in PerformANDCombine()
5520     if (MaskVal != (uint64_t(1) << BFEBitsVal) - 1)  in PerformANDCombine()
5525   // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and  in PerformANDCombine()
5528     Val = Val->getOperand(0);  in PerformANDCombine()
5531   if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {  in PerformANDCombine()
5532     Val = Val->getOperand(0);  in PerformANDCombine()
5535   if (Val->getOpcode() == NVPTXISD::LoadV2 ||  in PerformANDCombine()
5536       Val->getOpcode() == NVPTXISD::LoadV4) {  in PerformANDCombine()
5543     uint64_t MaskVal = MaskCnst->getZExtValue();  in PerformANDCombine()
5555     EVT MemVT = Mem->getMemoryVT();  in PerformANDCombine()
5561     unsigned ExtType = Val->getConstantOperandVal(Val->getNumOperands() - 1);  in PerformANDCombine()
5570       // Re-insert the ext as a zext.  in PerformANDCombine()
5586   assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);  in PerformREMCombine()
5588   // Don't do anything at less than -O2.  in PerformREMCombine()
5594   EVT VT = N->getValueType(0);  in PerformREMCombine()
5595   bool IsSigned = N->getOpcode() == ISD::SREM;  in PerformREMCombine()
5598   const SDValue &Num = N->getOperand(0);  in PerformREMCombine()
5599   const SDValue &Den = N->getOperand(1);  in PerformREMCombine()
5601   for (const SDNode *U : Num->uses()) {  in PerformREMCombine()
5602     if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&  in PerformREMCombine()
5603         U->getOperand(1) == Den) {  in PerformREMCombine()
5604       // Num % Den -> Num - (Num / Den) * Den  in PerformREMCombine()
5620 /// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
5646 /// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
5667     const APInt &Val = CI->getAPIntValue();  in AreMulWideOperandsDemotable()
5682 /// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
5683 /// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
5688   EVT MulType = N->getValueType(0);  in TryMULWIDECombine()
5695   SDValue LHS = N->getOperand(0);  in TryMULWIDECombine()
5696   SDValue RHS = N->getOperand(1);  in TryMULWIDECombine()
5699   if (N->getOpcode() == ISD::MUL) {  in TryMULWIDECombine()
5706   if (N->getOpcode() == ISD::SHL) {  in TryMULWIDECombine()
5712     APInt ShiftAmt = ShlRHS->getAPIntValue();  in TryMULWIDECombine()
5754   return Const && Const->getZExtValue() == 1;  in isConstOne()
5758   if (Add->getOpcode() != ISD::ADD)  in matchMADConstOnePattern()
5761   if (isConstOne(Add->getOperand(0)))  in matchMADConstOnePattern()
5762     return Add->getOperand(1);  in matchMADConstOnePattern()
5764   if (isConstOne(Add->getOperand(1)))  in matchMADConstOnePattern()
5765     return Add->getOperand(0);  in matchMADConstOnePattern()
5782   if (Select->getOpcode() != ISD::SELECT)  in combineMulSelectConstOne()
5785   SDValue Cond = Select->getOperand(0);  in combineMulSelectConstOne()
5788   if (isConstOne(Select->getOperand(1)))  in combineMulSelectConstOne()
5790   else if (isConstOne(Select->getOperand(2)))  in combineMulSelectConstOne()
5795   SDValue Y = Select->getOperand((ConstOpNo == 1) ? 2 : 1);  in combineMulSelectConstOne()
5821   // (mul x, (add y, 1)) -> (mad x, y, x)  in PerformMULCombineWithOperands()
5827   // (mul x, (select y, 1)) -> (select (mul x, y), x)  in PerformMULCombineWithOperands()
5836 /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
5846   SDValue N0 = N->getOperand(0);  in PerformMULCombine()
5847   SDValue N1 = N->getOperand(1);  in PerformMULCombine()
5851 /// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
5867   EVT CCType = N->getValueType(0);  in PerformSETCCCombine()
5868   SDValue A = N->getOperand(0);  in PerformSETCCCombine()
5869   SDValue B = N->getOperand(1);  in PerformSETCCCombine()
5886       DL, DCI.DAG.getVTList(MVT::i1, MVT::i1), {A, B, N->getOperand(2)});  in PerformSETCCCombine()
5893   SDValue Vector = N->getOperand(0);  in PerformEXTRACTCombine()
5896   if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&  in PerformEXTRACTCombine()
5907   if (Vector->isUndef() || ISD::allOperandsUndef(Vector.getNode()))  in PerformEXTRACTCombine()
5911   // We only handle the types we can extract in-register.  in PerformEXTRACTCombine()
5915   ConstantSDNode *Index = dyn_cast<ConstantSDNode>(N->getOperand(1));  in PerformEXTRACTCombine()
5917   if (!Index || Index->getZExtValue() == 0)  in PerformEXTRACTCombine()
5929           DCI.DAG.getConstant(Index->getZExtValue() * EltBits, DL, IVT)));  in PerformEXTRACTCombine()
5931   // If element has non-integer type, bitcast it back to the expected type.  in PerformEXTRACTCombine()
5934   // Past legalizer, we may need to extent i8 -> i16 to match the register type.  in PerformEXTRACTCombine()
5935   if (EltVT != N->getValueType(0))  in PerformEXTRACTCombine()
5936     Result = DCI.DAG.getNode(ISD::ANY_EXTEND, DL, N->getValueType(0), Result);  in PerformEXTRACTCombine()
5943   SDValue VA = N->getOperand(1);  in PerformVSELECTCombine()
5948   // We need to split vselect into individual per-element operations Because we  in PerformVSELECTCombine()
5950   // 32-bit values, so we may as well do comparison as i32 to avoid conversions  in PerformVSELECTCombine()
5954   SDValue VCond = N->getOperand(0);  in PerformVSELECTCombine()
5955   SDValue VB = N->getOperand(2);  in PerformVSELECTCombine()
5980   // This is done at dag-combine1 time, so that vector operations with i8  in PerformLOADCombine()
5983   EVT VT = N->getValueType(0);  in PerformLOADCombine()
5996   SmallVector<SDValue, 8> Ops(N->ops());  in PerformLOADCombine()
5997   Ops.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));  in PerformLOADCombine()
5999                                             LD->getMemOperand());  in PerformLOADCombine()
6015   switch (N->getOpcode()) {  in PerformDAGCombine()
6050 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
6053   EVT ResVT = N->getValueType(0);  in ReplaceLoadVector()
6086   Align Alignment = LD->getAlign();  in ReplaceLoadVector()
6089       TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));  in ReplaceLoadVector()
6156   SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());  in ReplaceLoadVector()
6160   OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));  in ReplaceLoadVector()
6163                                           LD->getMemoryVT(),  in ReplaceLoadVector()
6164                                           LD->getMemOperand());  in ReplaceLoadVector()
6198   SDValue Chain = N->getOperand(0);  in ReplaceINTRINSIC_W_CHAIN()
6199   SDValue Intrin = N->getOperand(1);  in ReplaceINTRINSIC_W_CHAIN()
6203   unsigned IntrinNo = Intrin.getNode()->getAsZExtVal();  in ReplaceINTRINSIC_W_CHAIN()
6213     EVT ResVT = N->getValueType(0);  in ReplaceINTRINSIC_W_CHAIN()
6282       OtherOps.append(N->op_begin() + 2, N->op_end());  in ReplaceINTRINSIC_W_CHAIN()
6287                                               MemSD->getMemoryVT(),  in ReplaceINTRINSIC_W_CHAIN()
6288                                               MemSD->getMemOperand());  in ReplaceINTRINSIC_W_CHAIN()
6310              "Custom handling of non-i8 ldu/ldg?");  in ReplaceINTRINSIC_W_CHAIN()
6312       // Just copy all operands as-is  in ReplaceINTRINSIC_W_CHAIN()
6313       SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());  in ReplaceINTRINSIC_W_CHAIN()
6324                                   MVT::i8, MemSD->getMemOperand());  in ReplaceINTRINSIC_W_CHAIN()
6336   // Change the CopyFromReg to output 2 64-bit results instead of a 128-bit  in ReplaceCopyFromReg_128()
6339   SDValue Chain = N->getOperand(0);  in ReplaceCopyFromReg_128()
6340   SDValue Reg = N->getOperand(1);  in ReplaceCopyFromReg_128()
6341   SDValue Glue = N->getOperand(2);  in ReplaceCopyFromReg_128()
6344          "Custom lowering for CopyFromReg with 128-bit reg only");  in ReplaceCopyFromReg_128()
6345   SmallVector<EVT, 4> ResultsType = {MVT::i64, MVT::i64, N->getValueType(1),  in ReplaceCopyFromReg_128()
6346                                      N->getValueType(2)};  in ReplaceCopyFromReg_128()
6360   switch (N->getOpcode()) {  in ReplaceNodeResults()
6377   Type *Ty = AI->getValOperand()->getType();  in shouldExpandAtomicRMWInIR()
6379   if (AI->isFloatingPointOperation()) {  in shouldExpandAtomicRMWInIR()
6380     if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) {  in shouldExpandAtomicRMWInIR()
6381       if (Ty->isHalfTy() && STI.getSmVersion() >= 70 &&  in shouldExpandAtomicRMWInIR()
6384       if (Ty->isBFloatTy() && STI.getSmVersion() >= 90 &&  in shouldExpandAtomicRMWInIR()
6387       if (Ty->isFloatTy())  in shouldExpandAtomicRMWInIR()
6389       if (Ty->isDoubleTy() && STI.hasAtomAddF64())  in shouldExpandAtomicRMWInIR()
6395   assert(Ty->isIntegerTy() && "Ty should be integer at this point");  in shouldExpandAtomicRMWInIR()
6398   switch (AI->getOperation()) {  in shouldExpandAtomicRMWInIR()
6405     switch (ITy->getBitWidth()) {  in shouldExpandAtomicRMWInIR()
6424     switch (ITy->getBitWidth()) {  in shouldExpandAtomicRMWInIR()