Lines Matching +full:abs +full:- +full:flat

1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //==-----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
39 #define DEBUG_TYPE "amdgpu-isel"
43 //===----------------------------------------------------------------------===//
45 //===----------------------------------------------------------------------===//
52 // Figure out if this is really an extract of the high 16-bits of a dword.
58 if (!Idx->isOne()) in isExtractHiElt()
71 if (ShiftAmt->getZExtValue() == 16) { in isExtractHiElt()
81 // Look through operations that obscure just looking at the low 16-bits of the
101 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
102 "AMDGPU DAG->DAG Pattern Instruction Selection", false,
111 INITIALIZE_PASS_END(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", in INITIALIZE_PASS_DEPENDENCY()
112 "AMDGPU DAG->DAG Pattern Instruction Selection", false, in INITIALIZE_PASS_DEPENDENCY()
115 /// This pass converts a legalized DAG into a AMDGPU-specific in INITIALIZE_PASS_DEPENDENCY()
130 Subtarget->checkSubtargetFeatures(MF.getFunction()); in runOnMachineFunction()
136 // XXX - only need to list legal operations. in fp16SrcZerosHighBits()
180 // On gfx10, all 16-bit instructions preserve the high bits. in fp16SrcZerosHighBits()
181 return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9; in fp16SrcZerosHighBits()
186 return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; in fp16SrcZerosHighBits()
190 return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; in fp16SrcZerosHighBits()
192 // fcopysign, select and others may be lowered to 32-bit bit operations in fp16SrcZerosHighBits()
202 for (auto &L : LI->getLoopsInPreorder()) { in runOnMachineFunction()
203 assert(L->isLCSSAForm(DT)); in runOnMachineFunction()
220 assert(Subtarget->d16PreservesUnusedBits()); in matchLoadD16FromBuildVector()
221 MVT VT = N->getValueType(0).getSimpleVT(); in matchLoadD16FromBuildVector()
225 SDValue Lo = N->getOperand(0); in matchLoadD16FromBuildVector()
226 SDValue Hi = N->getOperand(1); in matchLoadD16FromBuildVector()
230 // build_vector lo, (load ptr) -> load_d16_hi ptr, lo in matchLoadD16FromBuildVector()
231 // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo in matchLoadD16FromBuildVector()
232 // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo in matchLoadD16FromBuildVector()
236 if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) { in matchLoadD16FromBuildVector()
237 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); in matchLoadD16FromBuildVector()
239 SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo); in matchLoadD16FromBuildVector()
241 LdHi->getChain(), LdHi->getBasePtr(), TiedIn in matchLoadD16FromBuildVector()
245 if (LdHi->getMemoryVT() == MVT::i8) { in matchLoadD16FromBuildVector()
246 LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ? in matchLoadD16FromBuildVector()
249 assert(LdHi->getMemoryVT() == MVT::i16); in matchLoadD16FromBuildVector()
253 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList, in matchLoadD16FromBuildVector()
254 Ops, LdHi->getMemoryVT(), in matchLoadD16FromBuildVector()
255 LdHi->getMemOperand()); in matchLoadD16FromBuildVector()
257 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi); in matchLoadD16FromBuildVector()
258 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1)); in matchLoadD16FromBuildVector()
262 // build_vector (load ptr), hi -> load_d16_lo ptr, hi in matchLoadD16FromBuildVector()
263 // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi in matchLoadD16FromBuildVector()
264 // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi in matchLoadD16FromBuildVector()
268 if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode())) in matchLoadD16FromBuildVector()
271 SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); in matchLoadD16FromBuildVector()
273 if (LdLo->getMemoryVT() == MVT::i8) { in matchLoadD16FromBuildVector()
274 LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ? in matchLoadD16FromBuildVector()
277 assert(LdLo->getMemoryVT() == MVT::i16); in matchLoadD16FromBuildVector()
280 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn); in matchLoadD16FromBuildVector()
283 LdLo->getChain(), LdLo->getBasePtr(), TiedIn in matchLoadD16FromBuildVector()
287 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList, in matchLoadD16FromBuildVector()
288 Ops, LdLo->getMemoryVT(), in matchLoadD16FromBuildVector()
289 LdLo->getMemOperand()); in matchLoadD16FromBuildVector()
291 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo); in matchLoadD16FromBuildVector()
292 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1)); in matchLoadD16FromBuildVector()
300 if (!Subtarget->d16PreservesUnusedBits()) in PreprocessISelDAG()
303 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); in PreprocessISelDAG()
306 while (Position != CurDAG->allnodes_begin()) { in PreprocessISelDAG()
307 SDNode *N = &*--Position; in PreprocessISelDAG()
308 if (N->use_empty()) in PreprocessISelDAG()
311 switch (N->getOpcode()) { in PreprocessISelDAG()
322 CurDAG->RemoveDeadNodes(); in PreprocessISelDAG()
324 CurDAG->dump();); in PreprocessISelDAG()
329 if (N->isUndef()) in isInlineImmediate()
332 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in isInlineImmediate()
334 return TII->isInlineConstant(C->getAPIntValue()); in isInlineImmediate()
337 return TII->isInlineConstant(C->getValueAPF()); in isInlineImmediate()
348 if (!N->isMachineOpcode()) { in getOperandRegClass()
349 if (N->getOpcode() == ISD::CopyToReg) { in getOperandRegClass()
350 Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); in getOperandRegClass()
352 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); in getOperandRegClass()
357 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); in getOperandRegClass()
358 return TRI->getPhysRegBaseClass(Reg); in getOperandRegClass()
364 switch (N->getMachineOpcode()) { in getOperandRegClass()
367 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); in getOperandRegClass()
372 if (RegClass == -1) in getOperandRegClass()
375 return Subtarget->getRegisterInfo()->getRegClass(RegClass); in getOperandRegClass()
378 unsigned RCID = N->getConstantOperandVal(0); in getOperandRegClass()
380 Subtarget->getRegisterInfo()->getRegClass(RCID); in getOperandRegClass()
382 SDValue SubRegOp = N->getOperand(OpNo + 1); in getOperandRegClass()
383 unsigned SubRegIdx = SubRegOp->getAsZExtVal(); in getOperandRegClass()
384 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, in getOperandRegClass()
394 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) in glueCopyToOp()
395 Ops.push_back(N->getOperand(i)); in glueCopyToOp()
398 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); in glueCopyToOp()
405 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); in glueCopyToM0()
407 SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); in glueCopyToM0()
412 unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); in glueCopyToM0LDSInit()
414 if (Subtarget->ldsRequiresM0Init()) in glueCopyToM0LDSInit()
415 return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); in glueCopyToM0LDSInit()
417 MachineFunction &MF = CurDAG->getMachineFunction(); in glueCopyToM0LDSInit()
418 unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize(); in glueCopyToM0LDSInit()
420 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32)); in glueCopyToM0LDSInit()
427 SDNode *Lo = CurDAG->getMachineNode( in buildSMovImm64()
429 CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); in buildSMovImm64()
431 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, in buildSMovImm64()
432 CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32)); in buildSMovImm64()
434 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), in buildSMovImm64()
435 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), in buildSMovImm64()
436 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; in buildSMovImm64()
438 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); in buildSMovImm64()
442 EVT VT = N->getValueType(0); in SelectBuildVector()
446 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); in SelectBuildVector()
449 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), in SelectBuildVector()
461 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() == in SelectBuildVector()
463 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); in SelectBuildVector()
465 unsigned NOps = N->getNumOperands(); in SelectBuildVector()
468 if (isa<RegisterSDNode>(N->getOperand(i))) { in SelectBuildVector()
474 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); in SelectBuildVector()
475 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); in SelectBuildVector()
479 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); in SelectBuildVector()
480 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, in SelectBuildVector()
487 CurDAG->getTargetConstant(Sub, DL, MVT::i32); in SelectBuildVector()
493 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); in SelectBuildVector()
497 unsigned int Opc = N->getOpcode(); in Select()
498 if (N->isMachineOpcode()) { in Select()
499 N->setNodeId(-1); in Select()
521 if (N->getValueType(0) != MVT::i64) in Select()
529 if (N->getValueType(0) != MVT::i32) in Select()
550 EVT VT = N->getValueType(0); in Select()
565 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); in Select()
572 if (N->getValueType(0) == MVT::i128) { in Select()
573 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); in Select()
574 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); in Select()
575 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); in Select()
576 } else if (N->getValueType(0) == MVT::i64) { in Select()
577 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); in Select()
578 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); in Select()
579 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); in Select()
583 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, in Select()
584 N->getOperand(1), SubReg1 }; in Select()
585 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, in Select()
586 N->getValueType(0), Ops)); in Select()
592 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) in Select()
597 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); in Select()
602 Imm = C->getZExtValue(); in Select()
608 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); in Select()
621 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); in Select()
625 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); in Select()
631 uint32_t OffsetVal = Offset->getZExtValue(); in Select()
632 uint32_t WidthVal = Width->getZExtValue(); in Select()
634 ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal, in Select()
660 if (N->getValueType(0) != MVT::i32) in Select()
677 if (N->getValueType(0) == MVT::i32) { in Select()
679 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), in Select()
680 { N->getOperand(0), N->getOperand(1) }); in Select()
713 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); in isUniformBr()
714 const Instruction *Term = BB->getTerminator(); in isUniformBr()
715 return Term->getMetadata("amdgpu.uniform") || in isUniformBr()
716 Term->getMetadata("structurizecfg.uniform"); in isUniformBr()
721 assert(N->getOpcode() == ISD::AND); in isUnneededShiftMask()
723 const APInt &RHS = N->getConstantOperandAPInt(1); in isUnneededShiftMask()
727 const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero; in isUnneededShiftMask()
735 // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e. in getBaseWithOffsetUsingSplitOR()
764 if (CurDAG->isBaseWithConstantOffset(Addr)) { in isBaseWithConstantOffset64()
779 return "AMDGPU DAG->DAG Pattern Instruction Selection"; in getPassName()
796 assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!"); in run()
801 //===----------------------------------------------------------------------===//
803 //===----------------------------------------------------------------------===//
816 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); in SelectADDRIndirect()
817 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); in SelectADDRIndirect()
820 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); in SelectADDRIndirect()
821 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); in SelectADDRIndirect()
825 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); in SelectADDRIndirect()
828 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectADDRIndirect()
836 SDNode *Mov = CurDAG->getMachineNode( in getMaterializedScalarImm32()
838 CurDAG->getTargetConstant(Val, DL, MVT::i32)); in getMaterializedScalarImm32()
845 SDValue LHS = N->getOperand(0); in SelectADD_SUB_I64()
846 SDValue RHS = N->getOperand(1); in SelectADD_SUB_I64()
848 unsigned Opcode = N->getOpcode(); in SelectADD_SUB_I64()
854 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); in SelectADD_SUB_I64()
855 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); in SelectADD_SUB_I64()
857 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectADD_SUB_I64()
859 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectADD_SUB_I64()
862 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectADD_SUB_I64()
864 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectADD_SUB_I64()
867 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); in SelectADD_SUB_I64()
875 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd]; in SelectADD_SUB_I64()
876 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd]; in SelectADD_SUB_I64()
881 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); in SelectADD_SUB_I64()
883 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; in SelectADD_SUB_I64()
884 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); in SelectADD_SUB_I64()
891 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); in SelectADD_SUB_I64()
894 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), in SelectADD_SUB_I64()
900 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, in SelectADD_SUB_I64()
904 // Replace the carry-use in SelectADD_SUB_I64()
914 SDValue LHS = N->getOperand(0); in SelectAddcSubb()
915 SDValue RHS = N->getOperand(1); in SelectAddcSubb()
916 SDValue CI = N->getOperand(2); in SelectAddcSubb()
918 if (N->isDivergent()) { in SelectAddcSubb()
919 unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64 in SelectAddcSubb()
921 CurDAG->SelectNodeTo( in SelectAddcSubb()
922 N, Opc, N->getVTList(), in SelectAddcSubb()
924 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); in SelectAddcSubb()
926 unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO in SelectAddcSubb()
928 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI}); in SelectAddcSubb()
936 bool IsAdd = N->getOpcode() == ISD::UADDO; in SelectUADDO_USUBO()
937 bool IsVALU = N->isDivergent(); in SelectUADDO_USUBO()
939 for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; in SelectUADDO_USUBO()
942 if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) || in SelectUADDO_USUBO()
943 (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) { in SelectUADDO_USUBO()
952 CurDAG->SelectNodeTo( in SelectUADDO_USUBO()
953 N, Opc, N->getVTList(), in SelectUADDO_USUBO()
954 {N->getOperand(0), N->getOperand(1), in SelectUADDO_USUBO()
955 CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); in SelectUADDO_USUBO()
957 unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO in SelectUADDO_USUBO()
960 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), in SelectUADDO_USUBO()
961 {N->getOperand(0), N->getOperand(1)}); in SelectUADDO_USUBO()
970 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); in SelectFMA_W_CHAIN()
971 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); in SelectFMA_W_CHAIN()
972 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); in SelectFMA_W_CHAIN()
973 Ops[8] = N->getOperand(0); in SelectFMA_W_CHAIN()
974 Ops[9] = N->getOperand(4); in SelectFMA_W_CHAIN()
978 bool UseFMAC = Subtarget->hasDLInsts() && in SelectFMA_W_CHAIN()
979 cast<ConstantSDNode>(Ops[0])->isZero() && in SelectFMA_W_CHAIN()
980 cast<ConstantSDNode>(Ops[2])->isZero() && in SelectFMA_W_CHAIN()
981 cast<ConstantSDNode>(Ops[4])->isZero(); in SelectFMA_W_CHAIN()
983 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops); in SelectFMA_W_CHAIN()
991 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); in SelectFMUL_W_CHAIN()
992 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); in SelectFMUL_W_CHAIN()
993 Ops[6] = N->getOperand(0); in SelectFMUL_W_CHAIN()
994 Ops[7] = N->getOperand(3); in SelectFMUL_W_CHAIN()
996 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); in SelectFMUL_W_CHAIN()
1003 EVT VT = N->getValueType(0); in SelectDIV_SCALE()
1013 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); in SelectDIV_SCALE()
1014 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]); in SelectDIV_SCALE()
1015 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]); in SelectDIV_SCALE()
1016 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); in SelectDIV_SCALE()
1023 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; in SelectMAD_64_32()
1025 if (Subtarget->hasMADIntraFwdBug()) in SelectMAD_64_32()
1031 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); in SelectMAD_64_32()
1032 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), in SelectMAD_64_32()
1034 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); in SelectMAD_64_32()
1041 bool Signed = N->getOpcode() == ISD::SMUL_LOHI; in SelectMUL_LOHI()
1043 if (Subtarget->hasMADIntraFwdBug()) in SelectMUL_LOHI()
1049 SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64); in SelectMUL_LOHI()
1050 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); in SelectMUL_LOHI()
1051 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp}; in SelectMUL_LOHI()
1052 SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops); in SelectMUL_LOHI()
1054 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32); in SelectMUL_LOHI()
1055 SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL, in SelectMUL_LOHI()
1060 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32); in SelectMUL_LOHI()
1061 SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL, in SelectMUL_LOHI()
1065 CurDAG->RemoveDeadNode(N); in SelectMUL_LOHI()
1072 if (!Base || Subtarget->hasUsableDSOffset() || in isDSOffsetLegal()
1073 Subtarget->unsafeDSOffsetFoldingEnabled()) in isDSOffsetLegal()
1078 return CurDAG->SignBitIsZero(Base); in isDSOffsetLegal()
1084 if (CurDAG->isBaseWithConstantOffset(Addr)) { in SelectDS1Addr1Offset()
1088 if (isDSOffsetLegal(N0, C1->getSExtValue())) { in SelectDS1Addr1Offset()
1091 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); in SelectDS1Addr1Offset()
1095 // sub C, x -> add (sub 0, x), C in SelectDS1Addr1Offset()
1097 int64_t ByteOffset = C->getSExtValue(); in SelectDS1Addr1Offset()
1099 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectDS1Addr1Offset()
1101 // XXX - This is kind of hacky. Create a dummy sub node so we can check in SelectDS1Addr1Offset()
1104 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, in SelectDS1Addr1Offset()
1112 // FIXME: Select to VOP3 version for with-carry. in SelectDS1Addr1Offset()
1114 if (Subtarget->hasAddNoCarry()) { in SelectDS1Addr1Offset()
1117 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit in SelectDS1Addr1Offset()
1121 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); in SelectDS1Addr1Offset()
1124 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); in SelectDS1Addr1Offset()
1137 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) { in SelectDS1Addr1Offset()
1138 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectDS1Addr1Offset()
1139 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, in SelectDS1Addr1Offset()
1142 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); in SelectDS1Addr1Offset()
1149 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); in SelectDS1Addr1Offset()
1161 if (!Base || Subtarget->hasUsableDSOffset() || in isDSOffset2Legal()
1162 Subtarget->unsafeDSOffsetFoldingEnabled()) in isDSOffset2Legal()
1167 return CurDAG->SignBitIsZero(Base); in isDSOffset2Legal()
1173 Addr->getFlags().hasNoUnsignedWrap()) || in isNoUnsignedWrap()
1174 Addr->getOpcode() == ISD::OR; in isNoUnsignedWrap()
1177 // Check that the base address of flat scratch load/store in the form of `base +
1186 if (Subtarget->hasSignedScratchOffsets()) in isFlatScratchBaseLegal()
1198 if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000) in isFlatScratchBaseLegal()
1202 return CurDAG->SignBitIsZero(LHS); in isFlatScratchBaseLegal()
1205 // Check address value in SGPR/VGPR are legal for flat scratch in the form
1213 if (Subtarget->hasSignedScratchOffsets()) in isFlatScratchBaseLegalSV()
1218 return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS); in isFlatScratchBaseLegalSV()
1221 // Check address value in SGPR/VGPR are legal for flat scratch in the form
1237 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000))) in isFlatScratchBaseLegalSVImm()
1242 return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS); in isFlatScratchBaseLegalSVImm()
1245 // TODO: If offset is too big, put low 16-bit into offset.
1263 if (CurDAG->isBaseWithConstantOffset(Addr)) { in SelectDSReadWrite2()
1267 unsigned OffsetValue0 = C1->getZExtValue(); in SelectDSReadWrite2()
1273 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1274 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1278 // sub C, x -> add (sub 0, x), C in SelectDSReadWrite2()
1281 unsigned OffsetValue0 = C->getZExtValue(); in SelectDSReadWrite2()
1286 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectDSReadWrite2()
1288 // XXX - This is kind of hacky. Create a dummy sub node so we can check in SelectDSReadWrite2()
1292 CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1)); in SelectDSReadWrite2()
1299 if (Subtarget->hasAddNoCarry()) { in SelectDSReadWrite2()
1302 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit in SelectDSReadWrite2()
1305 MachineSDNode *MachineSub = CurDAG->getMachineNode( in SelectDSReadWrite2()
1309 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1310 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1316 unsigned OffsetValue0 = CAddr->getZExtValue(); in SelectDSReadWrite2()
1320 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectDSReadWrite2()
1322 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero); in SelectDSReadWrite2()
1324 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1325 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); in SelectDSReadWrite2()
1333 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); in SelectDSReadWrite2()
1334 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); in SelectDSReadWrite2()
1342 // Subtarget prefers to use flat instruction in SelectMUBUF()
1344 if (Subtarget->useFlatForGlobal()) in SelectMUBUF()
1349 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectMUBUF()
1350 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectMUBUF()
1351 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectMUBUF()
1352 SOffset = Subtarget->hasRestrictedSOffset() in SelectMUBUF()
1353 ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32) in SelectMUBUF()
1354 : CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUF()
1358 if (CurDAG->isBaseWithConstantOffset(Addr)) { in SelectMUBUF()
1360 if (isUInt<32>(C1->getZExtValue())) in SelectMUBUF()
1367 // (add N2, N3) -> addr64, or in SelectMUBUF()
1368 // (add (add N2, N3), C1) -> addr64 in SelectMUBUF()
1371 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); in SelectMUBUF()
1373 if (N2->isDivergent()) { in SelectMUBUF()
1374 if (N3->isDivergent()) { in SelectMUBUF()
1389 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUF()
1390 } else if (N0->isDivergent()) { in SelectMUBUF()
1395 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); in SelectMUBUF()
1397 // N0 -> offset, or in SelectMUBUF()
1398 // (N0 + C1) -> offset in SelectMUBUF()
1399 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUF()
1405 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUF()
1409 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectMUBUF()
1410 if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) { in SelectMUBUF()
1412 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); in SelectMUBUF()
1417 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUF()
1419 SDValue(CurDAG->getMachineNode( in SelectMUBUF()
1421 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), in SelectMUBUF()
1433 if (!Subtarget->hasAddr64()) in SelectMUBUFAddr64()
1440 if (C->getSExtValue()) { in SelectMUBUFAddr64()
1458 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N; in foldFrameIndex()
1464 return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32)); in foldFrameIndex()
1473 MachineFunction &MF = CurDAG->getMachineFunction(); in SelectMUBUFScratchOffen()
1476 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); in SelectMUBUFScratchOffen()
1479 int64_t Imm = CAddr->getSExtValue(); in SelectMUBUFScratchOffen()
1486 CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32); in SelectMUBUFScratchOffen()
1487 MachineSDNode *MovHighBits = CurDAG->getMachineNode( in SelectMUBUFScratchOffen()
1491 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUFScratchOffen()
1492 ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32); in SelectMUBUFScratchOffen()
1497 if (CurDAG->isBaseWithConstantOffset(Addr)) { in SelectMUBUFScratchOffen()
1513 // check. For out-of-bounds MUBUF loads, a 0 is returned. in SelectMUBUFScratchOffen()
1518 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectMUBUFScratchOffen()
1519 if (TII->isLegalMUBUFImmOffset(C1) && in SelectMUBUFScratchOffen()
1520 (!Subtarget->privateMemoryResourceIsRangeChecked() || in SelectMUBUFScratchOffen()
1521 CurDAG->SignBitIsZero(N0))) { in SelectMUBUFScratchOffen()
1523 ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32); in SelectMUBUFScratchOffen()
1530 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUFScratchOffen()
1537 auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg(); in IsCopyFromSGPR()
1550 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); in SelectMUBUFScratchOffset()
1551 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectMUBUFScratchOffset()
1552 MachineFunction &MF = CurDAG->getMachineFunction(); in SelectMUBUFScratchOffset()
1558 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); in SelectMUBUFScratchOffset()
1560 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUFScratchOffset()
1568 if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) in SelectMUBUFScratchOffset()
1575 TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) { in SelectMUBUFScratchOffset()
1577 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMUBUFScratchOffset()
1582 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); in SelectMUBUFScratchOffset()
1584 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32); in SelectMUBUFScratchOffset()
1592 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectMUBUFOffset()
1597 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && in SelectMUBUFOffset()
1598 !cast<ConstantSDNode>(Idxen)->getSExtValue() && in SelectMUBUFOffset()
1599 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { in SelectMUBUFOffset()
1600 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | in SelectMUBUFOffset()
1615 if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) { in SelectBUFSOffset()
1616 SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); in SelectBUFSOffset()
1631 for (SDValue V : N->op_values()) in findMemSDNode()
1643 unsigned AS = findMemSDNode(N)->getAddressSpace(); in SelectFlatOffsetImpl()
1646 Subtarget->hasFlatSegmentOffsetBug() && in SelectFlatOffsetImpl()
1647 FlatVariant == SIInstrFlags::FLAT && in SelectFlatOffsetImpl()
1650 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) { in SelectFlatOffsetImpl()
1655 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); in SelectFlatOffsetImpl()
1657 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectFlatOffsetImpl()
1658 if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) { in SelectFlatOffsetImpl()
1665 // For a FLAT instruction the hardware decides whether to access in SelectFlatOffsetImpl()
1676 TII->splitFlatOffset(COffsetVal, AS, FlatVariant); in SelectFlatOffsetImpl()
1680 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectFlatOffsetImpl()
1687 if (Subtarget->hasAddNoCarry()) { in SelectFlatOffsetImpl()
1691 Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0); in SelectFlatOffsetImpl()
1695 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); in SelectFlatOffsetImpl()
1696 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); in SelectFlatOffsetImpl()
1698 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectFlatOffsetImpl()
1700 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, in SelectFlatOffsetImpl()
1706 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); in SelectFlatOffsetImpl()
1709 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs, in SelectFlatOffsetImpl()
1712 SDNode *Addc = CurDAG->getMachineNode( in SelectFlatOffsetImpl()
1717 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), in SelectFlatOffsetImpl()
1720 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, in SelectFlatOffsetImpl()
1729 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); in SelectFlatOffsetImpl()
1736 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT); in SelectFlatOffset()
1761 // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
1774 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue(); in SelectGlobalSAddr()
1775 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectGlobalSAddr()
1777 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, in SelectGlobalSAddr()
1781 } else if (!LHS->isDivergent()) { in SelectGlobalSAddr()
1784 // saddr + large_offset -> saddr + in SelectGlobalSAddr()
1788 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( in SelectGlobalSAddr()
1792 SDNode *VMov = CurDAG->getMachineNode( in SelectGlobalSAddr()
1794 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32)); in SelectGlobalSAddr()
1797 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32); in SelectGlobalSAddr()
1808 !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) + in SelectGlobalSAddr()
1809 !TII->isInlineConstant(APInt(32, COffsetVal >> 32)); in SelectGlobalSAddr()
1810 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals) in SelectGlobalSAddr()
1820 if (!LHS->isDivergent()) { in SelectGlobalSAddr()
1828 if (!SAddr && !RHS->isDivergent()) { in SelectGlobalSAddr()
1837 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); in SelectGlobalSAddr()
1842 if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF || in SelectGlobalSAddr()
1846 // It's cheaper to materialize a single 32-bit zero for vaddr than the two in SelectGlobalSAddr()
1847 // moves required to copy a 64-bit SGPR to VGPR. in SelectGlobalSAddr()
1850 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32, in SelectGlobalSAddr()
1851 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32)); in SelectGlobalSAddr()
1853 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); in SelectGlobalSAddr()
1859 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); in SelectSAddrFI()
1865 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), in SelectSAddrFI()
1866 FI->getValueType(0)); in SelectSAddrFI()
1867 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr), in SelectSAddrFI()
1875 // Match (32-bit SGPR base) + sext(imm offset)
1879 if (Addr->isDivergent()) in SelectScratchSAddr()
1886 if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) { in SelectScratchSAddr()
1887 COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); in SelectScratchSAddr()
1895 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectScratchSAddr()
1897 if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, in SelectScratchSAddr()
1900 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( in SelectScratchSAddr()
1908 : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); in SelectScratchSAddr()
1909 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, in SelectScratchSAddr()
1914 Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32); in SelectScratchSAddr()
1919 // Check whether the flat scratch SVS swizzle bug affects this access.
1922 if (!Subtarget->hasFlatScratchSVSSwizzleBug()) in checkFlatScratchSVSSwizzleBug()
1928 KnownBits VKnown = CurDAG->computeKnownBits(VAddr); in checkFlatScratchSVSSwizzleBug()
1931 CurDAG->computeKnownBits(SAddr), in checkFlatScratchSVSSwizzleBug()
1946 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue(); in SelectScratchSVAddr()
1947 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectScratchSVAddr()
1949 if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) { in SelectScratchSVAddr()
1952 } else if (!LHS->isDivergent() && COffsetVal > 0) { in SelectScratchSVAddr()
1954 // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) + in SelectScratchSVAddr()
1958 = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true); in SelectScratchSVAddr()
1961 SDNode *VMov = CurDAG->getMachineNode( in SelectScratchSVAddr()
1963 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32)); in SelectScratchSVAddr()
1970 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32); in SelectScratchSVAddr()
1982 if (!LHS->isDivergent() && RHS->isDivergent()) { in SelectScratchSVAddr()
1985 } else if (!RHS->isDivergent() && LHS->isDivergent()) { in SelectScratchSVAddr()
2003 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); in SelectScratchSVAddr()
2016 KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); in isSOffsetLegalWithImmOffset()
2025 // not null) offset. If Imm32Only is true, match only 32-bit immediate
2060 int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue(); in SelectSMRDOffset()
2064 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); in SelectSMRDOffset()
2074 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); in SelectSMRDOffset()
2082 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); in SelectSMRDOffset()
2084 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); in SelectSMRDOffset()
2095 // Zero-extend a 32-bit address. in Expand32BitAddress()
2098 const MachineFunction &MF = CurDAG->getMachineFunction(); in Expand32BitAddress()
2100 unsigned AddrHiVal = Info->get32BitAddressHighBits(); in Expand32BitAddress()
2101 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); in Expand32BitAddress()
2104 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), in Expand32BitAddress()
2106 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), in Expand32BitAddress()
2107 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), in Expand32BitAddress()
2109 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), in Expand32BitAddress()
2112 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, in Expand32BitAddress()
2118 // true, match only 32-bit immediate offsets available on CI.
2133 ImmOff = C->getSExtValue(); in SelectSMRDBaseOffset()
2139 // A 32-bit (address + offset) should not cause unsigned 32-bit integer in SelectSMRDBaseOffset()
2142 !Addr->getFlags().hasNoUnsignedWrap()) in SelectSMRDBaseOffset()
2147 if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) { in SelectSMRDBaseOffset()
2179 *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); in SelectSMRD()
2193 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); in SelectSMRDImm32()
2216 assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); in SelectSMRDBufferImm32()
2223 // Match the (soffset + offset) pair as a 32-bit register base and in SelectSMRDBufferSgprImm()
2236 if (CurDAG->isBaseWithConstantOffset(Index)) { in SelectMOVRELOffset()
2245 if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) || in SelectMOVRELOffset()
2246 (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) { in SelectMOVRELOffset()
2248 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); in SelectMOVRELOffset()
2257 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); in SelectMOVRELOffset()
2264 if (Val->isDivergent()) { in getBFE32()
2266 SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32); in getBFE32()
2267 SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32); in getBFE32()
2269 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W); in getBFE32()
2276 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); in getBFE32()
2278 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); in getBFE32()
2282 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) in SelectS_BFEFromShifts()
2283 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) in SelectS_BFEFromShifts()
2286 const SDValue &Shl = N->getOperand(0); in SelectS_BFEFromShifts()
2287 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); in SelectS_BFEFromShifts()
2288 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); in SelectS_BFEFromShifts()
2291 uint32_t BVal = B->getZExtValue(); in SelectS_BFEFromShifts()
2292 uint32_t CVal = C->getZExtValue(); in SelectS_BFEFromShifts()
2295 bool Signed = N->getOpcode() == ISD::SRA; in SelectS_BFEFromShifts()
2296 ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal, in SelectS_BFEFromShifts()
2297 32 - CVal)); in SelectS_BFEFromShifts()
2305 switch (N->getOpcode()) { in SelectS_BFE()
2307 if (N->getOperand(0).getOpcode() == ISD::SRL) { in SelectS_BFE()
2308 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" in SelectS_BFE()
2310 const SDValue &Srl = N->getOperand(0); in SelectS_BFE()
2312 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); in SelectS_BFE()
2315 uint32_t ShiftVal = Shift->getZExtValue(); in SelectS_BFE()
2316 uint32_t MaskVal = Mask->getZExtValue(); in SelectS_BFE()
2328 if (N->getOperand(0).getOpcode() == ISD::AND) { in SelectS_BFE()
2329 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" in SelectS_BFE()
2331 const SDValue &And = N->getOperand(0); in SelectS_BFE()
2332 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); in SelectS_BFE()
2333 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); in SelectS_BFE()
2336 uint32_t ShiftVal = Shift->getZExtValue(); in SelectS_BFE()
2337 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; in SelectS_BFE()
2346 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { in SelectS_BFE()
2352 if (N->getOperand(0).getOpcode() == ISD::SHL) { in SelectS_BFE()
2359 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 in SelectS_BFE()
2360 SDValue Src = N->getOperand(0); in SelectS_BFE()
2368 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); in SelectS_BFE()
2370 Amt->getZExtValue(), Width)); in SelectS_BFE()
2379 assert(N->getOpcode() == ISD::BRCOND); in isCBranchSCC()
2380 if (!N->hasOneUse()) in isCBranchSCC()
2383 SDValue Cond = N->getOperand(1); in isCBranchSCC()
2397 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); in isCBranchSCC()
2398 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); in isCBranchSCC()
2405 assert(VCMP->getOpcode() == AMDGPUISD::SETCC); in combineBallotPattern()
2415 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get(); in combineBallotPattern()
2420 if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension. in combineBallotPattern()
2432 SDValue Cond = N->getOperand(1); in SelectBRCOND()
2435 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, in SelectBRCOND()
2436 N->getOperand(2), N->getOperand(0)); in SelectBRCOND()
2441 const SIRegisterInfo *TRI = ST->getRegisterInfo(); in SelectBRCOND()
2448 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) { in SelectBRCOND()
2449 SDValue VCMP = Cond->getOperand(0); in SelectBRCOND()
2450 auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get(); in SelectBRCOND()
2452 isNullConstant(Cond->getOperand(1)) && in SelectBRCOND()
2453 // We may encounter ballot.i64 in wave32 mode on -O0. in SelectBRCOND()
2454 VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) { in SelectBRCOND()
2466 UseSCCBr = !BallotCond->isDivergent(); in SelectBRCOND()
2484 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC(); in SelectBRCOND()
2501 Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32 in SelectBRCOND()
2504 CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO in SelectBRCOND()
2511 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); in SelectBRCOND()
2512 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, in SelectBRCOND()
2513 N->getOperand(2), // Basic Block in SelectBRCOND()
2518 if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 && in SelectFP_EXTEND()
2519 !N->isDivergent()) { in SelectFP_EXTEND()
2520 SDValue Src = N->getOperand(0); in SelectFP_EXTEND()
2523 CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(), in SelectFP_EXTEND()
2539 SDValue Chain = N->getOperand(0); in SelectDSAppendConsume()
2540 SDValue Ptr = N->getOperand(2); in SelectDSAppendConsume()
2542 MachineMemOperand *MMO = M->getMemOperand(); in SelectDSAppendConsume()
2543 bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; in SelectDSAppendConsume()
2546 if (CurDAG->isBaseWithConstantOffset(Ptr)) { in SelectDSAppendConsume()
2550 const APInt &OffsetVal = PtrOffset->getAsAPIntVal(); in SelectDSAppendConsume()
2553 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); in SelectDSAppendConsume()
2559 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); in SelectDSAppendConsume()
2564 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), in SelectDSAppendConsume()
2566 N->getOperand(N->getNumOperands() - 1) // New glue in SelectDSAppendConsume()
2569 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); in SelectDSAppendConsume()
2570 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); in SelectDSAppendConsume()
2577 SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4), in SelectDSBvhStackIntrinsic()
2578 N->getOperand(5), N->getOperand(0)}; in SelectDSBvhStackIntrinsic()
2581 MachineMemOperand *MMO = M->getMemOperand(); in SelectDSBvhStackIntrinsic()
2582 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); in SelectDSBvhStackIntrinsic()
2583 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); in SelectDSBvhStackIntrinsic()
2606 if (!Subtarget->hasGWS() || in SelectDS_GWS()
2608 !Subtarget->hasGWSSemaReleaseAll())) { in SelectDS_GWS()
2615 const bool HasVSrc = N->getNumOperands() == 4; in SelectDS_GWS()
2616 assert(HasVSrc || N->getNumOperands() == 3); in SelectDS_GWS()
2619 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2); in SelectDS_GWS()
2622 MachineMemOperand *MMO = M->getMemOperand(); in SelectDS_GWS()
2633 // default -1 only set the low 16-bits, we could leave it as-is and add 1 to in SelectDS_GWS()
2635 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); in SelectDS_GWS()
2636 ImmOffset = ConstOffset->getZExtValue(); in SelectDS_GWS()
2638 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { in SelectDS_GWS()
2647 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, in SelectDS_GWS()
2651 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, in SelectDS_GWS()
2653 CurDAG->getTargetConstant(16, SL, MVT::i32)); in SelectDS_GWS()
2657 SDValue Chain = N->getOperand(0); in SelectDS_GWS()
2658 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); in SelectDS_GWS()
2663 Ops.push_back(N->getOperand(2)); in SelectDS_GWS()
2667 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); in SelectDS_GWS()
2668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); in SelectDS_GWS()
2672 if (Subtarget->getLDSBankCount() != 16) { in SelectInterpP1F16()
2699 SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0, in SelectInterpP1F16()
2700 N->getOperand(5), SDValue()); in SelectInterpP1F16()
2702 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other); in SelectInterpP1F16()
2705 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, { in SelectInterpP1F16()
2706 CurDAG->getTargetConstant(2, DL, MVT::i32), // P0 in SelectInterpP1F16()
2707 N->getOperand(3), // Attr in SelectInterpP1F16()
2708 N->getOperand(2), // Attrchan in SelectInterpP1F16()
2713 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, { in SelectInterpP1F16()
2714 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers in SelectInterpP1F16()
2715 N->getOperand(1), // Src0 in SelectInterpP1F16()
2716 N->getOperand(3), // Attr in SelectInterpP1F16()
2717 N->getOperand(2), // Attrchan in SelectInterpP1F16()
2718 CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers in SelectInterpP1F16()
2719 SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high in SelectInterpP1F16()
2720 N->getOperand(4), // high in SelectInterpP1F16()
2721 CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp in SelectInterpP1F16()
2722 CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod in SelectInterpP1F16()
2726 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0)); in SelectInterpP1F16()
2730 unsigned IntrID = N->getConstantOperandVal(1); in SelectINTRINSIC_W_CHAIN()
2734 if (N->getValueType(0) != MVT::i32) in SelectINTRINSIC_W_CHAIN()
2748 unsigned IntrID = N->getConstantOperandVal(0); in SelectINTRINSIC_WO_CHAIN()
2750 SDNode *ConvGlueNode = N->getGluedNode(); in SelectINTRINSIC_WO_CHAIN()
2753 assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE); in SelectINTRINSIC_WO_CHAIN()
2754 ConvGlueNode = ConvGlueNode->getOperand(0).getNode(); in SelectINTRINSIC_WO_CHAIN()
2756 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {}, in SelectINTRINSIC_WO_CHAIN()
2784 SDValue Src = N->getOperand(1); in SelectINTRINSIC_WO_CHAIN()
2785 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); in SelectINTRINSIC_WO_CHAIN()
2789 SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); in SelectINTRINSIC_WO_CHAIN()
2791 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps); in SelectINTRINSIC_WO_CHAIN()
2796 unsigned IntrID = N->getConstantOperandVal(1); in SelectINTRINSIC_VOID()
2815 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32); in SelectWAVE_ADDRESS()
2816 CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(), in SelectWAVE_ADDRESS()
2817 {N->getOperand(0), Log2WaveSize}); in SelectWAVE_ADDRESS()
2821 SDValue SrcVal = N->getOperand(1); in SelectSTACKRESTORE()
2828 Register SP = TLI->getStackPointerRegisterToSaveRestore(); in SelectSTACKRESTORE()
2834 SDValue Log2WaveSize = CurDAG->getTargetConstant( in SelectSTACKRESTORE()
2835 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32); in SelectSTACKRESTORE()
2837 if (N->isDivergent()) { in SelectSTACKRESTORE()
2838 SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, in SelectSTACKRESTORE()
2843 CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, in SelectSTACKRESTORE()
2848 SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal); in SelectSTACKRESTORE()
2849 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP); in SelectSTACKRESTORE()
2863 // Fold fsub [+-]0 into fneg. This may not have folded depending on the in SelectVOP3ModsImpl()
2866 if (LHS && LHS->isZero()) { in SelectVOP3ModsImpl()
2873 Mods |= SISrcMods::ABS; in SelectVOP3ModsImpl()
2885 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3Mods()
2897 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3ModsNonCanonicalizing()
2910 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3BMods()
2934 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVINTERPModsImpl()
2955 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3Mods0()
2956 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3Mods0()
2965 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3BMods0()
2966 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3BMods0()
2976 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3OMods()
2977 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); in SelectVOP3OMods()
2994 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) { in SelectVOP3PMods()
3021 Lo = CurDAG->getTargetExtractSubreg( in SelectVOP3PMods()
3027 Hi = CurDAG->getTargetExtractSubreg( in SelectVOP3PMods()
3046 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, in SelectVOP3PMods()
3048 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID in SelectVOP3PMods()
3051 CurDAG->getTargetConstant(RC, SL, MVT::i32), in SelectVOP3PMods()
3052 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), in SelectVOP3PMods()
3053 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) }; in SelectVOP3PMods()
3055 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL, in SelectVOP3PMods()
3058 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PMods()
3063 uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF() in SelectVOP3PMods()
3065 if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) { in SelectVOP3PMods()
3066 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64); in SelectVOP3PMods()
3067 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PMods()
3075 // Packed instructions do not have abs modifiers. in SelectVOP3PMods()
3078 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PMods()
3091 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); in SelectVOP3PModsNeg()
3094 unsigned SrcSign = C->getZExtValue(); in SelectVOP3PModsNeg()
3098 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PModsNeg()
3105 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); in SelectWMMAOpSelVOP3PMods()
3108 unsigned SrcVal = C->getZExtValue(); in SelectWMMAOpSelVOP3PMods()
3112 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectWMMAOpSelVOP3PMods()
3139 Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32)); in buildRegSequence32()
3142 Ops.push_back(CurDAG->getTargetConstant( in buildRegSequence32()
3145 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops); in buildRegSequence32()
3155 // Pack 16-bit elements in pairs into 32-bit register. If both elements are in buildRegSequence16()
3156 // unpacked from 32-bit source use it, otherwise pack them using v_perm. in buildRegSequence16()
3163 SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32); in buildRegSequence16()
3165 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32, in buildRegSequence16()
3190 // Check if all elements also have abs modifier in selectWMMAModsNegAbs()
3195 NegAbsElts.push_back(El->getOperand(0)); in selectWMMAModsNegAbs()
3201 // Neg and Abs in selectWMMAModsNegAbs()
3207 // Abs in selectWMMAModsNegAbs()
3218 for (unsigned i = 0; i < BV->getNumOperands(); ++i) { in checkWMMAElementsModifiersF16()
3220 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) { in checkWMMAElementsModifiersF16()
3221 for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) { in checkWMMAElementsModifiersF16()
3222 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i)); in checkWMMAElementsModifiersF16()
3239 checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool { in SelectWMMAModsF16Neg()
3247 if (BV->getNumOperands() * 2 == EltsF16.size()) { in SelectWMMAModsF16Neg()
3257 for (unsigned i = 0; i < BV->getNumOperands(); ++i) { in SelectWMMAModsF16Neg()
3258 SDValue ElV2f16 = stripBitcast(BV->getOperand(i)); in SelectWMMAModsF16Neg()
3259 // Based on first element decide which mod we match, neg or abs in SelectWMMAModsF16Neg()
3266 if (BV->getNumOperands() == EltsV2F16.size()) { in SelectWMMAModsF16Neg()
3273 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectWMMAModsF16Neg()
3286 checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool { in SelectWMMAModsF16NegAbs()
3287 // Based on first element decide which mod we match, neg or abs in SelectWMMAModsF16NegAbs()
3297 if (BV->getNumOperands() * 2 == EltsF16.size()) in SelectWMMAModsF16NegAbs()
3306 for (unsigned i = 0; i < BV->getNumOperands(); ++i) { in SelectWMMAModsF16NegAbs()
3307 SDValue ElV2f16 = stripBitcast(BV->getOperand(i)); in SelectWMMAModsF16NegAbs()
3308 // Based on first element decide which mod we match, neg or abs in SelectWMMAModsF16NegAbs()
3311 if (ElV2f16->getOpcode() != ModOpcode) in SelectWMMAModsF16NegAbs()
3313 EltsV2F16.push_back(ElV2f16->getOperand(0)); in SelectWMMAModsF16NegAbs()
3317 if (BV->getNumOperands() == EltsV2F16.size()) in SelectWMMAModsF16NegAbs()
3322 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectWMMAModsF16NegAbs()
3333 assert(BV->getNumOperands() > 0); in SelectWMMAModsF32NegAbs()
3334 // Based on first element decide which mod we match, neg or abs in SelectWMMAModsF32NegAbs()
3335 SDValue ElF32 = stripBitcast(BV->getOperand(0)); in SelectWMMAModsF32NegAbs()
3338 for (unsigned i = 0; i < BV->getNumOperands(); ++i) { in SelectWMMAModsF32NegAbs()
3339 SDValue ElF32 = stripBitcast(BV->getOperand(i)); in SelectWMMAModsF32NegAbs()
3346 if (BV->getNumOperands() == EltsF32.size()) in SelectWMMAModsF32NegAbs()
3351 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectWMMAModsF32NegAbs()
3358 if (SDValue Splat = BV->getSplatValue(&UndefElements)) in SelectWMMAVISrc()
3361 unsigned Imm = C->getAPIntValue().getSExtValue(); in SelectWMMAVISrc()
3362 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32); in SelectWMMAVISrc()
3366 unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue(); in SelectWMMAVISrc()
3367 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32); in SelectWMMAVISrc()
3377 if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) { in SelectWMMAVISrc()
3380 if (SDValue Splat = SplatSrc16BV->getSplatValue()) { in SelectWMMAVISrc()
3381 const SIInstrInfo *TII = Subtarget->getInstrInfo(); in SelectWMMAVISrc()
3384 RawValue = C->getValueAPF().bitcastToAPInt(); in SelectWMMAVISrc()
3386 RawValue = C->getAPIntValue(); in SelectWMMAVISrc()
3395 if (TII->isInlineConstant(FloatVal)) { in SelectWMMAVISrc()
3396 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In), in SelectWMMAVISrc()
3401 if (TII->isInlineConstant(RawValue.value())) { in SelectWMMAVISrc()
3402 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In), in SelectWMMAVISrc()
3407 llvm_unreachable("unknown 16-bit type"); in SelectWMMAVISrc()
3424 ShiftAmt->getZExtValue() % 8 == 0) { in SelectSWMMACIndex8()
3425 Key = ShiftAmt->getZExtValue() / 8; in SelectSWMMACIndex8()
3430 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32); in SelectSWMMACIndex8()
3443 ShiftAmt->getZExtValue() == 16) { in SelectSWMMACIndex16()
3449 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32); in SelectSWMMACIndex16()
3457 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); in SelectVOP3OpSel()
3479 // Be careful about folding modifiers if we already have an abs. fneg is in SelectVOP3PMadMixModsImpl()
3481 if ((Mods & SISrcMods::ABS) == 0) { in SelectVOP3PMadMixModsImpl()
3488 if ((ModsTmp & SISrcMods::ABS) != 0) in SelectVOP3PMadMixModsImpl()
3489 Mods |= SISrcMods::ABS; in SelectVOP3PMadMixModsImpl()
3501 // TODO: Should we try to look for neg/abs here? in SelectVOP3PMadMixModsImpl()
3515 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PMadMixModsExt()
3523 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); in SelectVOP3PMadMixMods()
3529 return CurDAG->getUNDEF(MVT::i32); in getHi16Elt()
3533 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32); in getHi16Elt()
3538 return CurDAG->getConstant( in getHi16Elt()
3539 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); in getHi16Elt()
3550 assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn); in isVGPRImm()
3553 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); in isVGPRImm()
3555 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); in isVGPRImm()
3559 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); in isVGPRImm()
3566 if (!RC || SIRI->isSGPRClass(RC)) in isVGPRImm()
3572 if (User->isMachineOpcode()) { in isVGPRImm()
3573 unsigned Opc = User->getMachineOpcode(); in isVGPRImm()
3574 const MCInstrDesc &Desc = SII->get(Opc); in isVGPRImm()
3578 if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { in isVGPRImm()
3579 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); in isVGPRImm()
3601 const MachineMemOperand *MMO = Ld->getMemOperand(); in isUniformLoad()
3602 if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(MMO)) in isUniformLoad()
3605 return MMO->getSize().hasValue() && in isUniformLoad()
3606 Ld->getAlign() >= in isUniformLoad()
3607 Align(std::min(MMO->getSize().getValue().getKnownMinValue(), in isUniformLoad()
3609 ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || in isUniformLoad()
3610 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) || in isUniformLoad()
3611 (Subtarget->getScalarizeGlobalBehavior() && in isUniformLoad()
3612 Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && in isUniformLoad()
3613 Ld->isSimple() && in isUniformLoad()
3615 ->isMemOpHasNoClobberedMemOperand(N))); in isUniformLoad()
3626 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); in PostprocessISelDAG()
3627 while (Position != CurDAG->allnodes_end()) { in PostprocessISelDAG()
3640 CurDAG->RemoveDeadNodes(); in PostprocessISelDAG()