xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (revision 6e516c87b6d779911edde7481d8aef165b837a03)
10b57cec5SDimitry Andric //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Defines an instruction selector for the AMDGPU target.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
14349cc55cSDimitry Andric #include "AMDGPUISelDAGToDAG.h"
150b57cec5SDimitry Andric #include "AMDGPU.h"
16bdd1243dSDimitry Andric #include "AMDGPUInstrInfo.h"
1781ad6265SDimitry Andric #include "AMDGPUSubtarget.h"
180b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
1981ad6265SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20349cc55cSDimitry Andric #include "MCTargetDesc/R600MCTargetDesc.h"
21349cc55cSDimitry Andric #include "R600RegisterInfo.h"
225f757f3fSDimitry Andric #include "SIISelLowering.h"
230b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
2406c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/FunctionLoweringInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGISel.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
31480093f4SDimitry Andric #include "llvm/InitializePasses.h"
3206c3fb27SDimitry Andric #include "llvm/Support/ErrorHandling.h"
33e8d8bef9SDimitry Andric 
340b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
35e8d8bef9SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
360b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
370b57cec5SDimitry Andric #endif
380b57cec5SDimitry Andric 
39fcaf7f86SDimitry Andric #define DEBUG_TYPE "amdgpu-isel"
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric using namespace llvm;
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
440b57cec5SDimitry Andric // Instruction Selector Implementation
450b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric namespace {
stripBitcast(SDValue Val)480b57cec5SDimitry Andric static SDValue stripBitcast(SDValue Val) {
490b57cec5SDimitry Andric   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
500b57cec5SDimitry Andric }
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)530b57cec5SDimitry Andric static bool isExtractHiElt(SDValue In, SDValue &Out) {
540b57cec5SDimitry Andric   In = stripBitcast(In);
55fe6060f1SDimitry Andric 
56fe6060f1SDimitry Andric   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
57fe6060f1SDimitry Andric     if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
58fe6060f1SDimitry Andric       if (!Idx->isOne())
59fe6060f1SDimitry Andric         return false;
60fe6060f1SDimitry Andric       Out = In.getOperand(0);
61fe6060f1SDimitry Andric       return true;
62fe6060f1SDimitry Andric     }
63fe6060f1SDimitry Andric   }
64fe6060f1SDimitry Andric 
650b57cec5SDimitry Andric   if (In.getOpcode() != ISD::TRUNCATE)
660b57cec5SDimitry Andric     return false;
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   SDValue Srl = In.getOperand(0);
690b57cec5SDimitry Andric   if (Srl.getOpcode() == ISD::SRL) {
700b57cec5SDimitry Andric     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
710b57cec5SDimitry Andric       if (ShiftAmt->getZExtValue() == 16) {
720b57cec5SDimitry Andric         Out = stripBitcast(Srl.getOperand(0));
730b57cec5SDimitry Andric         return true;
740b57cec5SDimitry Andric       }
750b57cec5SDimitry Andric     }
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   return false;
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric // Look through operations that obscure just looking at the low 16-bits of the
820b57cec5SDimitry Andric // same register.
stripExtractLoElt(SDValue In)830b57cec5SDimitry Andric static SDValue stripExtractLoElt(SDValue In) {
84fe6060f1SDimitry Andric   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
855f757f3fSDimitry Andric     SDValue Idx = In.getOperand(1);
865f757f3fSDimitry Andric     if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)
87fe6060f1SDimitry Andric       return In.getOperand(0);
88fe6060f1SDimitry Andric   }
89fe6060f1SDimitry Andric 
900b57cec5SDimitry Andric   if (In.getOpcode() == ISD::TRUNCATE) {
910b57cec5SDimitry Andric     SDValue Src = In.getOperand(0);
920b57cec5SDimitry Andric     if (Src.getValueType().getSizeInBits() == 32)
930b57cec5SDimitry Andric       return stripBitcast(Src);
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   return In;
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric } // end anonymous namespace
1000b57cec5SDimitry Andric 
1010fca6ea1SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
1020fca6ea1SDimitry Andric                       "AMDGPU DAG->DAG Pattern Instruction Selection", false,
1030fca6ea1SDimitry Andric                       false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)1040b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
1050b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10606c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
1070b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
1080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1100b57cec5SDimitry Andric #endif
1110fca6ea1SDimitry Andric INITIALIZE_PASS_END(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
1120fca6ea1SDimitry Andric                     "AMDGPU DAG->DAG Pattern Instruction Selection", false,
1130fca6ea1SDimitry Andric                     false)
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric /// This pass converts a legalized DAG into a AMDGPU-specific
1160b57cec5SDimitry Andric // DAG, ready for instruction scheduling.
117bdd1243dSDimitry Andric FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
1185f757f3fSDimitry Andric                                         CodeGenOptLevel OptLevel) {
1190fca6ea1SDimitry Andric   return new AMDGPUDAGToDAGISelLegacy(TM, OptLevel);
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric 
AMDGPUDAGToDAGISel(TargetMachine & TM,CodeGenOptLevel OptLevel)122bdd1243dSDimitry Andric AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
1235f757f3fSDimitry Andric                                        CodeGenOptLevel OptLevel)
1240fca6ea1SDimitry Andric     : SelectionDAGISel(TM, OptLevel) {
125349cc55cSDimitry Andric   EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)1280b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
1290b57cec5SDimitry Andric   Subtarget = &MF.getSubtarget<GCNSubtarget>();
1300fca6ea1SDimitry Andric   Subtarget->checkSubtargetFeatures(MF.getFunction());
1315f757f3fSDimitry Andric   Mode = SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
1320b57cec5SDimitry Andric   return SelectionDAGISel::runOnMachineFunction(MF);
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric 
fp16SrcZerosHighBits(unsigned Opc) const135fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
136fe6060f1SDimitry Andric   // XXX - only need to list legal operations.
137fe6060f1SDimitry Andric   switch (Opc) {
138fe6060f1SDimitry Andric   case ISD::FADD:
139fe6060f1SDimitry Andric   case ISD::FSUB:
140fe6060f1SDimitry Andric   case ISD::FMUL:
141fe6060f1SDimitry Andric   case ISD::FDIV:
142fe6060f1SDimitry Andric   case ISD::FREM:
143fe6060f1SDimitry Andric   case ISD::FCANONICALIZE:
144fe6060f1SDimitry Andric   case ISD::UINT_TO_FP:
145fe6060f1SDimitry Andric   case ISD::SINT_TO_FP:
146fe6060f1SDimitry Andric   case ISD::FABS:
147fe6060f1SDimitry Andric     // Fabs is lowered to a bit operation, but it's an and which will clear the
148fe6060f1SDimitry Andric     // high bits anyway.
149fe6060f1SDimitry Andric   case ISD::FSQRT:
150fe6060f1SDimitry Andric   case ISD::FSIN:
151fe6060f1SDimitry Andric   case ISD::FCOS:
152fe6060f1SDimitry Andric   case ISD::FPOWI:
153fe6060f1SDimitry Andric   case ISD::FPOW:
154fe6060f1SDimitry Andric   case ISD::FLOG:
155fe6060f1SDimitry Andric   case ISD::FLOG2:
156fe6060f1SDimitry Andric   case ISD::FLOG10:
157fe6060f1SDimitry Andric   case ISD::FEXP:
158fe6060f1SDimitry Andric   case ISD::FEXP2:
159fe6060f1SDimitry Andric   case ISD::FCEIL:
160fe6060f1SDimitry Andric   case ISD::FTRUNC:
161fe6060f1SDimitry Andric   case ISD::FRINT:
162fe6060f1SDimitry Andric   case ISD::FNEARBYINT:
1635f757f3fSDimitry Andric   case ISD::FROUNDEVEN:
164fe6060f1SDimitry Andric   case ISD::FROUND:
165fe6060f1SDimitry Andric   case ISD::FFLOOR:
166fe6060f1SDimitry Andric   case ISD::FMINNUM:
167fe6060f1SDimitry Andric   case ISD::FMAXNUM:
16806c3fb27SDimitry Andric   case ISD::FLDEXP:
169fe6060f1SDimitry Andric   case AMDGPUISD::FRACT:
170fe6060f1SDimitry Andric   case AMDGPUISD::CLAMP:
171fe6060f1SDimitry Andric   case AMDGPUISD::COS_HW:
172fe6060f1SDimitry Andric   case AMDGPUISD::SIN_HW:
173fe6060f1SDimitry Andric   case AMDGPUISD::FMIN3:
174fe6060f1SDimitry Andric   case AMDGPUISD::FMAX3:
175fe6060f1SDimitry Andric   case AMDGPUISD::FMED3:
176fe6060f1SDimitry Andric   case AMDGPUISD::FMAD_FTZ:
177fe6060f1SDimitry Andric   case AMDGPUISD::RCP:
178fe6060f1SDimitry Andric   case AMDGPUISD::RSQ:
179fe6060f1SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
180fe6060f1SDimitry Andric     // On gfx10, all 16-bit instructions preserve the high bits.
181fe6060f1SDimitry Andric     return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
182fe6060f1SDimitry Andric   case ISD::FP_ROUND:
183fe6060f1SDimitry Andric     // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
184fe6060f1SDimitry Andric     // high bits on gfx9.
185fe6060f1SDimitry Andric     // TODO: If we had the source node we could see if the source was fma/mad
186fe6060f1SDimitry Andric     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
187fe6060f1SDimitry Andric   case ISD::FMA:
188fe6060f1SDimitry Andric   case ISD::FMAD:
189fe6060f1SDimitry Andric   case AMDGPUISD::DIV_FIXUP:
190fe6060f1SDimitry Andric     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
191fe6060f1SDimitry Andric   default:
192fe6060f1SDimitry Andric     // fcopysign, select and others may be lowered to 32-bit bit operations
193fe6060f1SDimitry Andric     // which don't zero the high bits.
194fe6060f1SDimitry Andric     return false;
195fe6060f1SDimitry Andric   }
196fe6060f1SDimitry Andric }
197fe6060f1SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)1980fca6ea1SDimitry Andric bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
1990fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
2000fca6ea1SDimitry Andric   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2010fca6ea1SDimitry Andric   LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2020fca6ea1SDimitry Andric   for (auto &L : LI->getLoopsInPreorder()) {
2030fca6ea1SDimitry Andric     assert(L->isLCSSAForm(DT));
2040fca6ea1SDimitry Andric   }
2050fca6ea1SDimitry Andric #endif
2060fca6ea1SDimitry Andric   return SelectionDAGISelLegacy::runOnMachineFunction(MF);
2070fca6ea1SDimitry Andric }
2080fca6ea1SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const2090fca6ea1SDimitry Andric void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
210349cc55cSDimitry Andric   AU.addRequired<AMDGPUArgumentUsageInfo>();
21106c3fb27SDimitry Andric   AU.addRequired<UniformityInfoWrapperPass>();
212349cc55cSDimitry Andric #ifdef EXPENSIVE_CHECKS
213349cc55cSDimitry Andric   AU.addRequired<DominatorTreeWrapperPass>();
214349cc55cSDimitry Andric   AU.addRequired<LoopInfoWrapperPass>();
215349cc55cSDimitry Andric #endif
2160fca6ea1SDimitry Andric   SelectionDAGISelLegacy::getAnalysisUsage(AU);
217349cc55cSDimitry Andric }
218349cc55cSDimitry Andric 
matchLoadD16FromBuildVector(SDNode * N) const2190b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
2200b57cec5SDimitry Andric   assert(Subtarget->d16PreservesUnusedBits());
2210b57cec5SDimitry Andric   MVT VT = N->getValueType(0).getSimpleVT();
2220b57cec5SDimitry Andric   if (VT != MVT::v2i16 && VT != MVT::v2f16)
2230b57cec5SDimitry Andric     return false;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   SDValue Lo = N->getOperand(0);
2260b57cec5SDimitry Andric   SDValue Hi = N->getOperand(1);
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric   LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
2310b57cec5SDimitry Andric   // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
2320b57cec5SDimitry Andric   // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   // Need to check for possible indirect dependencies on the other half of the
2350b57cec5SDimitry Andric   // vector to avoid introducing a cycle.
2360b57cec5SDimitry Andric   if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
2370b57cec5SDimitry Andric     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric     SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
2400b57cec5SDimitry Andric     SDValue Ops[] = {
2410b57cec5SDimitry Andric       LdHi->getChain(), LdHi->getBasePtr(), TiedIn
2420b57cec5SDimitry Andric     };
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric     unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
2450b57cec5SDimitry Andric     if (LdHi->getMemoryVT() == MVT::i8) {
2460b57cec5SDimitry Andric       LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
2470b57cec5SDimitry Andric         AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
2480b57cec5SDimitry Andric     } else {
2490b57cec5SDimitry Andric       assert(LdHi->getMemoryVT() == MVT::i16);
2500b57cec5SDimitry Andric     }
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric     SDValue NewLoadHi =
2530b57cec5SDimitry Andric       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
2540b57cec5SDimitry Andric                                   Ops, LdHi->getMemoryVT(),
2550b57cec5SDimitry Andric                                   LdHi->getMemOperand());
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
2580b57cec5SDimitry Andric     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
2590b57cec5SDimitry Andric     return true;
2600b57cec5SDimitry Andric   }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric   // build_vector (load ptr), hi -> load_d16_lo ptr, hi
2630b57cec5SDimitry Andric   // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
2640b57cec5SDimitry Andric   // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
2650b57cec5SDimitry Andric   LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
2660b57cec5SDimitry Andric   if (LdLo && Lo.hasOneUse()) {
2670b57cec5SDimitry Andric     SDValue TiedIn = getHi16Elt(Hi);
2680b57cec5SDimitry Andric     if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
2690b57cec5SDimitry Andric       return false;
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
2720b57cec5SDimitry Andric     unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
2730b57cec5SDimitry Andric     if (LdLo->getMemoryVT() == MVT::i8) {
2740b57cec5SDimitry Andric       LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
2750b57cec5SDimitry Andric         AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
2760b57cec5SDimitry Andric     } else {
2770b57cec5SDimitry Andric       assert(LdLo->getMemoryVT() == MVT::i16);
2780b57cec5SDimitry Andric     }
2790b57cec5SDimitry Andric 
2800b57cec5SDimitry Andric     TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric     SDValue Ops[] = {
2830b57cec5SDimitry Andric       LdLo->getChain(), LdLo->getBasePtr(), TiedIn
2840b57cec5SDimitry Andric     };
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric     SDValue NewLoadLo =
2870b57cec5SDimitry Andric       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
2880b57cec5SDimitry Andric                                   Ops, LdLo->getMemoryVT(),
2890b57cec5SDimitry Andric                                   LdLo->getMemOperand());
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
2920b57cec5SDimitry Andric     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
2930b57cec5SDimitry Andric     return true;
2940b57cec5SDimitry Andric   }
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   return false;
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric 
PreprocessISelDAG()2990b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
3000b57cec5SDimitry Andric   if (!Subtarget->d16PreservesUnusedBits())
3010b57cec5SDimitry Andric     return;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric   bool MadeChange = false;
3060b57cec5SDimitry Andric   while (Position != CurDAG->allnodes_begin()) {
3070b57cec5SDimitry Andric     SDNode *N = &*--Position;
3080b57cec5SDimitry Andric     if (N->use_empty())
3090b57cec5SDimitry Andric       continue;
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric     switch (N->getOpcode()) {
3120b57cec5SDimitry Andric     case ISD::BUILD_VECTOR:
3131db9f3b2SDimitry Andric       // TODO: Match load d16 from shl (extload:i16), 16
3140b57cec5SDimitry Andric       MadeChange |= matchLoadD16FromBuildVector(N);
3150b57cec5SDimitry Andric       break;
3160b57cec5SDimitry Andric     default:
3170b57cec5SDimitry Andric       break;
3180b57cec5SDimitry Andric     }
3190b57cec5SDimitry Andric   }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   if (MadeChange) {
3220b57cec5SDimitry Andric     CurDAG->RemoveDeadNodes();
3230b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "After PreProcess:\n";
3240b57cec5SDimitry Andric                CurDAG->dump(););
3250b57cec5SDimitry Andric   }
3260b57cec5SDimitry Andric }
3270b57cec5SDimitry Andric 
isInlineImmediate(const SDNode * N) const3281db9f3b2SDimitry Andric bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
3290b57cec5SDimitry Andric   if (N->isUndef())
3300b57cec5SDimitry Andric     return true;
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   const SIInstrInfo *TII = Subtarget->getInstrInfo();
3330b57cec5SDimitry Andric   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
3340b57cec5SDimitry Andric     return TII->isInlineConstant(C->getAPIntValue());
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
3370fca6ea1SDimitry Andric     return TII->isInlineConstant(C->getValueAPF());
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   return false;
3400b57cec5SDimitry Andric }
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric /// Determine the register class for \p OpNo
3430b57cec5SDimitry Andric /// \returns The register class of the virtual register that will be used for
3440b57cec5SDimitry Andric /// the given operand number \OpNo or NULL if the register class cannot be
3450b57cec5SDimitry Andric /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const3460b57cec5SDimitry Andric const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
3470b57cec5SDimitry Andric                                                           unsigned OpNo) const {
3480b57cec5SDimitry Andric   if (!N->isMachineOpcode()) {
3490b57cec5SDimitry Andric     if (N->getOpcode() == ISD::CopyToReg) {
350e8d8bef9SDimitry Andric       Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
351e8d8bef9SDimitry Andric       if (Reg.isVirtual()) {
3520b57cec5SDimitry Andric         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
3530b57cec5SDimitry Andric         return MRI.getRegClass(Reg);
3540b57cec5SDimitry Andric       }
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric       const SIRegisterInfo *TRI
3570b57cec5SDimitry Andric         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358bdd1243dSDimitry Andric       return TRI->getPhysRegBaseClass(Reg);
3590b57cec5SDimitry Andric     }
3600b57cec5SDimitry Andric 
3610b57cec5SDimitry Andric     return nullptr;
3620b57cec5SDimitry Andric   }
3630b57cec5SDimitry Andric 
3640b57cec5SDimitry Andric   switch (N->getMachineOpcode()) {
3650b57cec5SDimitry Andric   default: {
3660b57cec5SDimitry Andric     const MCInstrDesc &Desc =
3670b57cec5SDimitry Andric         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
3680b57cec5SDimitry Andric     unsigned OpIdx = Desc.getNumDefs() + OpNo;
3690b57cec5SDimitry Andric     if (OpIdx >= Desc.getNumOperands())
3700b57cec5SDimitry Andric       return nullptr;
371bdd1243dSDimitry Andric     int RegClass = Desc.operands()[OpIdx].RegClass;
3720b57cec5SDimitry Andric     if (RegClass == -1)
3730b57cec5SDimitry Andric       return nullptr;
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
3760b57cec5SDimitry Andric   }
3770b57cec5SDimitry Andric   case AMDGPU::REG_SEQUENCE: {
378647cbc5dSDimitry Andric     unsigned RCID = N->getConstantOperandVal(0);
3790b57cec5SDimitry Andric     const TargetRegisterClass *SuperRC =
3800b57cec5SDimitry Andric         Subtarget->getRegisterInfo()->getRegClass(RCID);
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric     SDValue SubRegOp = N->getOperand(OpNo + 1);
3831db9f3b2SDimitry Andric     unsigned SubRegIdx = SubRegOp->getAsZExtVal();
3840b57cec5SDimitry Andric     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
3850b57cec5SDimitry Andric                                                               SubRegIdx);
3860b57cec5SDimitry Andric   }
3870b57cec5SDimitry Andric   }
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric 
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const3908bcb0991SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
3918bcb0991SDimitry Andric                                          SDValue Glue) const {
3928bcb0991SDimitry Andric   SmallVector <SDValue, 8> Ops;
3938bcb0991SDimitry Andric   Ops.push_back(NewChain); // Replace the chain.
3948bcb0991SDimitry Andric   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
3958bcb0991SDimitry Andric     Ops.push_back(N->getOperand(i));
3968bcb0991SDimitry Andric 
3978bcb0991SDimitry Andric   Ops.push_back(Glue);
3988bcb0991SDimitry Andric   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
3998bcb0991SDimitry Andric }
4008bcb0991SDimitry Andric 
glueCopyToM0(SDNode * N,SDValue Val) const4010b57cec5SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
4020b57cec5SDimitry Andric   const SITargetLowering& Lowering =
4030b57cec5SDimitry Andric     *static_cast<const SITargetLowering*>(getTargetLowering());
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
4060b57cec5SDimitry Andric 
4078bcb0991SDimitry Andric   SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
4088bcb0991SDimitry Andric   return glueCopyToOp(N, M0, M0.getValue(1));
4090b57cec5SDimitry Andric }
4100b57cec5SDimitry Andric 
glueCopyToM0LDSInit(SDNode * N) const4110b57cec5SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
4120b57cec5SDimitry Andric   unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
4130b57cec5SDimitry Andric   if (AS == AMDGPUAS::LOCAL_ADDRESS) {
4140b57cec5SDimitry Andric     if (Subtarget->ldsRequiresM0Init())
4150b57cec5SDimitry Andric       return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
4160b57cec5SDimitry Andric   } else if (AS == AMDGPUAS::REGION_ADDRESS) {
4170b57cec5SDimitry Andric     MachineFunction &MF = CurDAG->getMachineFunction();
4180b57cec5SDimitry Andric     unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
4190b57cec5SDimitry Andric     return
4200b57cec5SDimitry Andric         glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
4210b57cec5SDimitry Andric   }
4220b57cec5SDimitry Andric   return N;
4230b57cec5SDimitry Andric }
4240b57cec5SDimitry Andric 
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const4250b57cec5SDimitry Andric MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
4260b57cec5SDimitry Andric                                                   EVT VT) const {
4270b57cec5SDimitry Andric   SDNode *Lo = CurDAG->getMachineNode(
4280b57cec5SDimitry Andric       AMDGPU::S_MOV_B32, DL, MVT::i32,
4290b57cec5SDimitry Andric       CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
4300b57cec5SDimitry Andric   SDNode *Hi =
4310b57cec5SDimitry Andric       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
4320b57cec5SDimitry Andric                              CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
4330b57cec5SDimitry Andric   const SDValue Ops[] = {
4340b57cec5SDimitry Andric       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
4350b57cec5SDimitry Andric       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
4360b57cec5SDimitry Andric       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
4390b57cec5SDimitry Andric }
4400b57cec5SDimitry Andric 
SelectBuildVector(SDNode * N,unsigned RegClassID)4410b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
4420b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
4430b57cec5SDimitry Andric   unsigned NumVectorElts = VT.getVectorNumElements();
4440b57cec5SDimitry Andric   EVT EltVT = VT.getVectorElementType();
4450b57cec5SDimitry Andric   SDLoc DL(N);
4460b57cec5SDimitry Andric   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric   if (NumVectorElts == 1) {
4490b57cec5SDimitry Andric     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
4500b57cec5SDimitry Andric                          RegClass);
4510b57cec5SDimitry Andric     return;
4520b57cec5SDimitry Andric   }
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
4550b57cec5SDimitry Andric                                   "supported yet");
4560b57cec5SDimitry Andric   // 32 = Max Num Vector Elements
4570b57cec5SDimitry Andric   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
4580b57cec5SDimitry Andric   // 1 = Vector Register Class
4590b57cec5SDimitry Andric   SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
4600b57cec5SDimitry Andric 
4615ffd83dbSDimitry Andric   bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
4625ffd83dbSDimitry Andric                Triple::amdgcn;
4630b57cec5SDimitry Andric   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
4640b57cec5SDimitry Andric   bool IsRegSeq = true;
4650b57cec5SDimitry Andric   unsigned NOps = N->getNumOperands();
4660b57cec5SDimitry Andric   for (unsigned i = 0; i < NOps; i++) {
4670b57cec5SDimitry Andric     // XXX: Why is this here?
4680b57cec5SDimitry Andric     if (isa<RegisterSDNode>(N->getOperand(i))) {
4690b57cec5SDimitry Andric       IsRegSeq = false;
4700b57cec5SDimitry Andric       break;
4710b57cec5SDimitry Andric     }
4725ffd83dbSDimitry Andric     unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
4735ffd83dbSDimitry Andric                          : R600RegisterInfo::getSubRegFromChannel(i);
4740b57cec5SDimitry Andric     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
4750b57cec5SDimitry Andric     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
4760b57cec5SDimitry Andric   }
4770b57cec5SDimitry Andric   if (NOps != NumVectorElts) {
4780b57cec5SDimitry Andric     // Fill in the missing undef elements if this was a scalar_to_vector.
4790b57cec5SDimitry Andric     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
4800b57cec5SDimitry Andric     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
4810b57cec5SDimitry Andric                                                    DL, EltVT);
4820b57cec5SDimitry Andric     for (unsigned i = NOps; i < NumVectorElts; ++i) {
4835ffd83dbSDimitry Andric       unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
4845ffd83dbSDimitry Andric                            : R600RegisterInfo::getSubRegFromChannel(i);
4850b57cec5SDimitry Andric       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
4860b57cec5SDimitry Andric       RegSeqArgs[1 + (2 * i) + 1] =
4870b57cec5SDimitry Andric           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
4880b57cec5SDimitry Andric     }
4890b57cec5SDimitry Andric   }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric   if (!IsRegSeq)
4920b57cec5SDimitry Andric     SelectCode(N);
4930b57cec5SDimitry Andric   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric 
Select(SDNode * N)4960b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::Select(SDNode *N) {
4970b57cec5SDimitry Andric   unsigned int Opc = N->getOpcode();
4980b57cec5SDimitry Andric   if (N->isMachineOpcode()) {
4990b57cec5SDimitry Andric     N->setNodeId(-1);
5000b57cec5SDimitry Andric     return;   // Already selected.
5010b57cec5SDimitry Andric   }
5020b57cec5SDimitry Andric 
5038bcb0991SDimitry Andric   // isa<MemSDNode> almost works but is slightly too permissive for some DS
5048bcb0991SDimitry Andric   // intrinsics.
5050fca6ea1SDimitry Andric   if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N)) {
5060b57cec5SDimitry Andric     N = glueCopyToM0LDSInit(N);
5078bcb0991SDimitry Andric     SelectCode(N);
5088bcb0991SDimitry Andric     return;
5098bcb0991SDimitry Andric   }
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   switch (Opc) {
5120b57cec5SDimitry Andric   default:
5130b57cec5SDimitry Andric     break;
5140b57cec5SDimitry Andric   // We are selecting i64 ADD here instead of custom lower it during
5150b57cec5SDimitry Andric   // DAG legalization, so we can fold some i64 ADDs used for address
5160b57cec5SDimitry Andric   // calculation into the LOAD and STORE instructions.
5170b57cec5SDimitry Andric   case ISD::ADDC:
5180b57cec5SDimitry Andric   case ISD::ADDE:
5190b57cec5SDimitry Andric   case ISD::SUBC:
5200b57cec5SDimitry Andric   case ISD::SUBE: {
5210b57cec5SDimitry Andric     if (N->getValueType(0) != MVT::i64)
5220b57cec5SDimitry Andric       break;
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric     SelectADD_SUB_I64(N);
5250b57cec5SDimitry Andric     return;
5260b57cec5SDimitry Andric   }
52706c3fb27SDimitry Andric   case ISD::UADDO_CARRY:
52806c3fb27SDimitry Andric   case ISD::USUBO_CARRY:
5290b57cec5SDimitry Andric     if (N->getValueType(0) != MVT::i32)
5300b57cec5SDimitry Andric       break;
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric     SelectAddcSubb(N);
5330b57cec5SDimitry Andric     return;
5340b57cec5SDimitry Andric   case ISD::UADDO:
5350b57cec5SDimitry Andric   case ISD::USUBO: {
5360b57cec5SDimitry Andric     SelectUADDO_USUBO(N);
5370b57cec5SDimitry Andric     return;
5380b57cec5SDimitry Andric   }
5390b57cec5SDimitry Andric   case AMDGPUISD::FMUL_W_CHAIN: {
5400b57cec5SDimitry Andric     SelectFMUL_W_CHAIN(N);
5410b57cec5SDimitry Andric     return;
5420b57cec5SDimitry Andric   }
5430b57cec5SDimitry Andric   case AMDGPUISD::FMA_W_CHAIN: {
5440b57cec5SDimitry Andric     SelectFMA_W_CHAIN(N);
5450b57cec5SDimitry Andric     return;
5460b57cec5SDimitry Andric   }
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   case ISD::SCALAR_TO_VECTOR:
5490b57cec5SDimitry Andric   case ISD::BUILD_VECTOR: {
5500b57cec5SDimitry Andric     EVT VT = N->getValueType(0);
5510b57cec5SDimitry Andric     unsigned NumVectorElts = VT.getVectorNumElements();
5520b57cec5SDimitry Andric     if (VT.getScalarSizeInBits() == 16) {
5530b57cec5SDimitry Andric       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
5540b57cec5SDimitry Andric         if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
5550b57cec5SDimitry Andric           ReplaceNode(N, Packed);
5560b57cec5SDimitry Andric           return;
5570b57cec5SDimitry Andric         }
5580b57cec5SDimitry Andric       }
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric       break;
5610b57cec5SDimitry Andric     }
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric     assert(VT.getVectorElementType().bitsEq(MVT::i32));
5645ffd83dbSDimitry Andric     unsigned RegClassID =
5655ffd83dbSDimitry Andric         SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
5660b57cec5SDimitry Andric     SelectBuildVector(N, RegClassID);
5670b57cec5SDimitry Andric     return;
5680b57cec5SDimitry Andric   }
5690b57cec5SDimitry Andric   case ISD::BUILD_PAIR: {
5700b57cec5SDimitry Andric     SDValue RC, SubReg0, SubReg1;
5710b57cec5SDimitry Andric     SDLoc DL(N);
5720b57cec5SDimitry Andric     if (N->getValueType(0) == MVT::i128) {
5738bcb0991SDimitry Andric       RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
5740b57cec5SDimitry Andric       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
5750b57cec5SDimitry Andric       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
5760b57cec5SDimitry Andric     } else if (N->getValueType(0) == MVT::i64) {
5770b57cec5SDimitry Andric       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
5780b57cec5SDimitry Andric       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
5790b57cec5SDimitry Andric       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
5800b57cec5SDimitry Andric     } else {
5810b57cec5SDimitry Andric       llvm_unreachable("Unhandled value type for BUILD_PAIR");
5820b57cec5SDimitry Andric     }
5830b57cec5SDimitry Andric     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
5840b57cec5SDimitry Andric                             N->getOperand(1), SubReg1 };
5850b57cec5SDimitry Andric     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
5860b57cec5SDimitry Andric                                           N->getValueType(0), Ops));
5870b57cec5SDimitry Andric     return;
5880b57cec5SDimitry Andric   }
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric   case ISD::Constant:
5910b57cec5SDimitry Andric   case ISD::ConstantFP: {
5920b57cec5SDimitry Andric     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
5930b57cec5SDimitry Andric       break;
5940b57cec5SDimitry Andric 
5950b57cec5SDimitry Andric     uint64_t Imm;
5965f757f3fSDimitry Andric     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) {
5970b57cec5SDimitry Andric       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
5985f757f3fSDimitry Andric       if (AMDGPU::isValid32BitLiteral(Imm, true))
5995f757f3fSDimitry Andric         break;
6005f757f3fSDimitry Andric     } else {
6010b57cec5SDimitry Andric       ConstantSDNode *C = cast<ConstantSDNode>(N);
6020b57cec5SDimitry Andric       Imm = C->getZExtValue();
6035f757f3fSDimitry Andric       if (AMDGPU::isValid32BitLiteral(Imm, false))
6045f757f3fSDimitry Andric         break;
6050b57cec5SDimitry Andric     }
6060b57cec5SDimitry Andric 
6070b57cec5SDimitry Andric     SDLoc DL(N);
6080b57cec5SDimitry Andric     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
6090b57cec5SDimitry Andric     return;
6100b57cec5SDimitry Andric   }
6110b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32:
6120b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
6130b57cec5SDimitry Andric     // There is a scalar version available, but unlike the vector version which
6140b57cec5SDimitry Andric     // has a separate operand for the offset and width, the scalar version packs
6150b57cec5SDimitry Andric     // the width and offset into a single operand. Try to move to the scalar
6160b57cec5SDimitry Andric     // version if the offsets are constant, so that we can try to keep extended
6170b57cec5SDimitry Andric     // loads of kernel arguments in SGPRs.
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric     // TODO: Technically we could try to pattern match scalar bitshifts of
6200b57cec5SDimitry Andric     // dynamic values, but it's probably not useful.
6210b57cec5SDimitry Andric     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
6220b57cec5SDimitry Andric     if (!Offset)
6230b57cec5SDimitry Andric       break;
6240b57cec5SDimitry Andric 
6250b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
6260b57cec5SDimitry Andric     if (!Width)
6270b57cec5SDimitry Andric       break;
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric     bool Signed = Opc == AMDGPUISD::BFE_I32;
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric     uint32_t OffsetVal = Offset->getZExtValue();
6320b57cec5SDimitry Andric     uint32_t WidthVal = Width->getZExtValue();
6330b57cec5SDimitry Andric 
634349cc55cSDimitry Andric     ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
635349cc55cSDimitry Andric                             WidthVal));
6360b57cec5SDimitry Andric     return;
6370b57cec5SDimitry Andric   }
6380b57cec5SDimitry Andric   case AMDGPUISD::DIV_SCALE: {
6390b57cec5SDimitry Andric     SelectDIV_SCALE(N);
6400b57cec5SDimitry Andric     return;
6410b57cec5SDimitry Andric   }
6420b57cec5SDimitry Andric   case AMDGPUISD::MAD_I64_I32:
6430b57cec5SDimitry Andric   case AMDGPUISD::MAD_U64_U32: {
6440b57cec5SDimitry Andric     SelectMAD_64_32(N);
6450b57cec5SDimitry Andric     return;
6460b57cec5SDimitry Andric   }
6474824e7fdSDimitry Andric   case ISD::SMUL_LOHI:
6484824e7fdSDimitry Andric   case ISD::UMUL_LOHI:
6494824e7fdSDimitry Andric     return SelectMUL_LOHI(N);
6500b57cec5SDimitry Andric   case ISD::CopyToReg: {
6510b57cec5SDimitry Andric     const SITargetLowering& Lowering =
6520b57cec5SDimitry Andric       *static_cast<const SITargetLowering*>(getTargetLowering());
6530b57cec5SDimitry Andric     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
6540b57cec5SDimitry Andric     break;
6550b57cec5SDimitry Andric   }
6560b57cec5SDimitry Andric   case ISD::AND:
6570b57cec5SDimitry Andric   case ISD::SRL:
6580b57cec5SDimitry Andric   case ISD::SRA:
6590b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG:
6600b57cec5SDimitry Andric     if (N->getValueType(0) != MVT::i32)
6610b57cec5SDimitry Andric       break;
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric     SelectS_BFE(N);
6640b57cec5SDimitry Andric     return;
6650b57cec5SDimitry Andric   case ISD::BRCOND:
6660b57cec5SDimitry Andric     SelectBRCOND(N);
6670b57cec5SDimitry Andric     return;
6685f757f3fSDimitry Andric   case ISD::FP_EXTEND:
6695f757f3fSDimitry Andric     SelectFP_EXTEND(N);
6705f757f3fSDimitry Andric     return;
6710b57cec5SDimitry Andric   case AMDGPUISD::CVT_PKRTZ_F16_F32:
6720b57cec5SDimitry Andric   case AMDGPUISD::CVT_PKNORM_I16_F32:
6730b57cec5SDimitry Andric   case AMDGPUISD::CVT_PKNORM_U16_F32:
6740b57cec5SDimitry Andric   case AMDGPUISD::CVT_PK_U16_U32:
6750b57cec5SDimitry Andric   case AMDGPUISD::CVT_PK_I16_I32: {
6760b57cec5SDimitry Andric     // Hack around using a legal type if f16 is illegal.
6770b57cec5SDimitry Andric     if (N->getValueType(0) == MVT::i32) {
6780b57cec5SDimitry Andric       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
6790b57cec5SDimitry Andric       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
6800b57cec5SDimitry Andric                               { N->getOperand(0), N->getOperand(1) });
6810b57cec5SDimitry Andric       SelectCode(N);
6820b57cec5SDimitry Andric       return;
6830b57cec5SDimitry Andric     }
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric     break;
6860b57cec5SDimitry Andric   }
6870b57cec5SDimitry Andric   case ISD::INTRINSIC_W_CHAIN: {
6880b57cec5SDimitry Andric     SelectINTRINSIC_W_CHAIN(N);
6890b57cec5SDimitry Andric     return;
6900b57cec5SDimitry Andric   }
6918bcb0991SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
6928bcb0991SDimitry Andric     SelectINTRINSIC_WO_CHAIN(N);
6938bcb0991SDimitry Andric     return;
6948bcb0991SDimitry Andric   }
6950b57cec5SDimitry Andric   case ISD::INTRINSIC_VOID: {
6960b57cec5SDimitry Andric     SelectINTRINSIC_VOID(N);
6970b57cec5SDimitry Andric     return;
6980b57cec5SDimitry Andric   }
6995f757f3fSDimitry Andric   case AMDGPUISD::WAVE_ADDRESS: {
7005f757f3fSDimitry Andric     SelectWAVE_ADDRESS(N);
7015f757f3fSDimitry Andric     return;
7025f757f3fSDimitry Andric   }
7035f757f3fSDimitry Andric   case ISD::STACKRESTORE: {
7045f757f3fSDimitry Andric     SelectSTACKRESTORE(N);
7055f757f3fSDimitry Andric     return;
7065f757f3fSDimitry Andric   }
7070b57cec5SDimitry Andric   }
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   SelectCode(N);
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric 
isUniformBr(const SDNode * N) const7120b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
7130b57cec5SDimitry Andric   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
7140b57cec5SDimitry Andric   const Instruction *Term = BB->getTerminator();
7150b57cec5SDimitry Andric   return Term->getMetadata("amdgpu.uniform") ||
7160b57cec5SDimitry Andric          Term->getMetadata("structurizecfg.uniform");
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric 
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const7194824e7fdSDimitry Andric bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
7204824e7fdSDimitry Andric                                              unsigned ShAmtBits) const {
7214824e7fdSDimitry Andric   assert(N->getOpcode() == ISD::AND);
7224824e7fdSDimitry Andric 
723647cbc5dSDimitry Andric   const APInt &RHS = N->getConstantOperandAPInt(1);
72406c3fb27SDimitry Andric   if (RHS.countr_one() >= ShAmtBits)
7254824e7fdSDimitry Andric     return true;
7264824e7fdSDimitry Andric 
7274824e7fdSDimitry Andric   const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
72806c3fb27SDimitry Andric   return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;
7294824e7fdSDimitry Andric }
7304824e7fdSDimitry Andric 
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)731e8d8bef9SDimitry Andric static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
732e8d8bef9SDimitry Andric                                           SDValue &N0, SDValue &N1) {
733e8d8bef9SDimitry Andric   if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
734e8d8bef9SDimitry Andric       Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
735e8d8bef9SDimitry Andric     // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
736e8d8bef9SDimitry Andric     // (i64 (bitcast (v2i32 (build_vector
737e8d8bef9SDimitry Andric     //                        (or (extract_vector_elt V, 0), OFFSET),
738e8d8bef9SDimitry Andric     //                        (extract_vector_elt V, 1)))))
739e8d8bef9SDimitry Andric     SDValue Lo = Addr.getOperand(0).getOperand(0);
740e8d8bef9SDimitry Andric     if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
741e8d8bef9SDimitry Andric       SDValue BaseLo = Lo.getOperand(0);
742e8d8bef9SDimitry Andric       SDValue BaseHi = Addr.getOperand(0).getOperand(1);
743e8d8bef9SDimitry Andric       // Check that split base (Lo and Hi) are extracted from the same one.
744e8d8bef9SDimitry Andric       if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
745e8d8bef9SDimitry Andric           BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
746e8d8bef9SDimitry Andric           BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
747e8d8bef9SDimitry Andric           // Lo is statically extracted from index 0.
748e8d8bef9SDimitry Andric           isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
749e8d8bef9SDimitry Andric           BaseLo.getConstantOperandVal(1) == 0 &&
750e8d8bef9SDimitry Andric           // Hi is statically extracted from index 0.
751e8d8bef9SDimitry Andric           isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
752e8d8bef9SDimitry Andric           BaseHi.getConstantOperandVal(1) == 1) {
753e8d8bef9SDimitry Andric         N0 = BaseLo.getOperand(0).getOperand(0);
754e8d8bef9SDimitry Andric         N1 = Lo.getOperand(1);
755e8d8bef9SDimitry Andric         return true;
756e8d8bef9SDimitry Andric       }
757e8d8bef9SDimitry Andric     }
758e8d8bef9SDimitry Andric   }
759e8d8bef9SDimitry Andric   return false;
760e8d8bef9SDimitry Andric }
761e8d8bef9SDimitry Andric 
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const762e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
763e8d8bef9SDimitry Andric                                                     SDValue &RHS) const {
764e8d8bef9SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr)) {
765e8d8bef9SDimitry Andric     LHS = Addr.getOperand(0);
766e8d8bef9SDimitry Andric     RHS = Addr.getOperand(1);
767e8d8bef9SDimitry Andric     return true;
768e8d8bef9SDimitry Andric   }
769e8d8bef9SDimitry Andric 
770e8d8bef9SDimitry Andric   if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
771e8d8bef9SDimitry Andric     assert(LHS && RHS && isa<ConstantSDNode>(RHS));
772e8d8bef9SDimitry Andric     return true;
773e8d8bef9SDimitry Andric   }
774e8d8bef9SDimitry Andric 
775e8d8bef9SDimitry Andric   return false;
776e8d8bef9SDimitry Andric }
777e8d8bef9SDimitry Andric 
getPassName() const7780fca6ea1SDimitry Andric StringRef AMDGPUDAGToDAGISelLegacy::getPassName() const {
7790b57cec5SDimitry Andric   return "AMDGPU DAG->DAG Pattern Instruction Selection";
7800b57cec5SDimitry Andric }
7810b57cec5SDimitry Andric 
AMDGPUISelDAGToDAGPass(TargetMachine & TM)7820fca6ea1SDimitry Andric AMDGPUISelDAGToDAGPass::AMDGPUISelDAGToDAGPass(TargetMachine &TM)
7830fca6ea1SDimitry Andric     : SelectionDAGISelPass(
7840fca6ea1SDimitry Andric           std::make_unique<AMDGPUDAGToDAGISel>(TM, TM.getOptLevel())) {}
7850fca6ea1SDimitry Andric 
7860fca6ea1SDimitry Andric PreservedAnalyses
run(MachineFunction & MF,MachineFunctionAnalysisManager & MFAM)7870fca6ea1SDimitry Andric AMDGPUISelDAGToDAGPass::run(MachineFunction &MF,
7880fca6ea1SDimitry Andric                             MachineFunctionAnalysisManager &MFAM) {
7890fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
7900fca6ea1SDimitry Andric   auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF)
7910fca6ea1SDimitry Andric                   .getManager();
7920fca6ea1SDimitry Andric   auto &F = MF.getFunction();
7930fca6ea1SDimitry Andric   DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
7940fca6ea1SDimitry Andric   LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
7950fca6ea1SDimitry Andric   for (auto &L : LI.getLoopsInPreorder())
7960fca6ea1SDimitry Andric     assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!");
7970fca6ea1SDimitry Andric #endif
7980fca6ea1SDimitry Andric   return SelectionDAGISelPass::run(MF, MFAM);
7990fca6ea1SDimitry Andric }
8000fca6ea1SDimitry Andric 
8010b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8020b57cec5SDimitry Andric // Complex Patterns
8030b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8040b57cec5SDimitry Andric 
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)8050b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
8060b57cec5SDimitry Andric                                             SDValue &Offset) {
8070b57cec5SDimitry Andric   return false;
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric 
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)8100b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
8110b57cec5SDimitry Andric                                             SDValue &Offset) {
8120b57cec5SDimitry Andric   ConstantSDNode *C;
8130b57cec5SDimitry Andric   SDLoc DL(Addr);
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
8160b57cec5SDimitry Andric     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
8170b57cec5SDimitry Andric     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8180b57cec5SDimitry Andric   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
8190b57cec5SDimitry Andric              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
8200b57cec5SDimitry Andric     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
8210b57cec5SDimitry Andric     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8220b57cec5SDimitry Andric   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
8230b57cec5SDimitry Andric             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
8240b57cec5SDimitry Andric     Base = Addr.getOperand(0);
8250b57cec5SDimitry Andric     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8260b57cec5SDimitry Andric   } else {
8270b57cec5SDimitry Andric     Base = Addr;
8280b57cec5SDimitry Andric     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
8290b57cec5SDimitry Andric   }
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric   return true;
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric 
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const8348bcb0991SDimitry Andric SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
8358bcb0991SDimitry Andric                                                        const SDLoc &DL) const {
8368bcb0991SDimitry Andric   SDNode *Mov = CurDAG->getMachineNode(
8378bcb0991SDimitry Andric     AMDGPU::S_MOV_B32, DL, MVT::i32,
8388bcb0991SDimitry Andric     CurDAG->getTargetConstant(Val, DL, MVT::i32));
8398bcb0991SDimitry Andric   return SDValue(Mov, 0);
8408bcb0991SDimitry Andric }
8418bcb0991SDimitry Andric 
84206c3fb27SDimitry Andric // FIXME: Should only handle uaddo_carry/usubo_carry
SelectADD_SUB_I64(SDNode * N)8430b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
8440b57cec5SDimitry Andric   SDLoc DL(N);
8450b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
8460b57cec5SDimitry Andric   SDValue RHS = N->getOperand(1);
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric   unsigned Opcode = N->getOpcode();
8490b57cec5SDimitry Andric   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
8500b57cec5SDimitry Andric   bool ProduceCarry =
8510b57cec5SDimitry Andric       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
8520b57cec5SDimitry Andric   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
8550b57cec5SDimitry Andric   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8580b57cec5SDimitry Andric                                        DL, MVT::i32, LHS, Sub0);
8590b57cec5SDimitry Andric   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8600b57cec5SDimitry Andric                                        DL, MVT::i32, LHS, Sub1);
8610b57cec5SDimitry Andric 
8620b57cec5SDimitry Andric   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8630b57cec5SDimitry Andric                                        DL, MVT::i32, RHS, Sub0);
8640b57cec5SDimitry Andric   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8650b57cec5SDimitry Andric                                        DL, MVT::i32, RHS, Sub1);
8660b57cec5SDimitry Andric 
8670b57cec5SDimitry Andric   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
8680b57cec5SDimitry Andric 
8695ffd83dbSDimitry Andric   static const unsigned OpcMap[2][2][2] = {
8705ffd83dbSDimitry Andric       {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
871e8d8bef9SDimitry Andric        {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
8725ffd83dbSDimitry Andric       {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
8735ffd83dbSDimitry Andric        {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
8745ffd83dbSDimitry Andric 
8755ffd83dbSDimitry Andric   unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
8765ffd83dbSDimitry Andric   unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
8770b57cec5SDimitry Andric 
8780b57cec5SDimitry Andric   SDNode *AddLo;
8790b57cec5SDimitry Andric   if (!ConsumeCarry) {
8800b57cec5SDimitry Andric     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
8810b57cec5SDimitry Andric     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
8820b57cec5SDimitry Andric   } else {
8830b57cec5SDimitry Andric     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
8840b57cec5SDimitry Andric     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
8850b57cec5SDimitry Andric   }
8860b57cec5SDimitry Andric   SDValue AddHiArgs[] = {
8870b57cec5SDimitry Andric     SDValue(Hi0, 0),
8880b57cec5SDimitry Andric     SDValue(Hi1, 0),
8890b57cec5SDimitry Andric     SDValue(AddLo, 1)
8900b57cec5SDimitry Andric   };
8910b57cec5SDimitry Andric   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
8920b57cec5SDimitry Andric 
8930b57cec5SDimitry Andric   SDValue RegSequenceArgs[] = {
8940b57cec5SDimitry Andric     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
8950b57cec5SDimitry Andric     SDValue(AddLo,0),
8960b57cec5SDimitry Andric     Sub0,
8970b57cec5SDimitry Andric     SDValue(AddHi,0),
8980b57cec5SDimitry Andric     Sub1,
8990b57cec5SDimitry Andric   };
9000b57cec5SDimitry Andric   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
9010b57cec5SDimitry Andric                                                MVT::i64, RegSequenceArgs);
9020b57cec5SDimitry Andric 
9030b57cec5SDimitry Andric   if (ProduceCarry) {
9040b57cec5SDimitry Andric     // Replace the carry-use
9050b57cec5SDimitry Andric     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
9060b57cec5SDimitry Andric   }
9070b57cec5SDimitry Andric 
9080b57cec5SDimitry Andric   // Replace the remaining uses.
9090b57cec5SDimitry Andric   ReplaceNode(N, RegSequence);
9100b57cec5SDimitry Andric }
9110b57cec5SDimitry Andric 
SelectAddcSubb(SDNode * N)9120b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
9130b57cec5SDimitry Andric   SDLoc DL(N);
9140b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
9150b57cec5SDimitry Andric   SDValue RHS = N->getOperand(1);
9160b57cec5SDimitry Andric   SDValue CI = N->getOperand(2);
9170b57cec5SDimitry Andric 
9185ffd83dbSDimitry Andric   if (N->isDivergent()) {
91906c3fb27SDimitry Andric     unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64
9200b57cec5SDimitry Andric                                                       : AMDGPU::V_SUBB_U32_e64;
9210b57cec5SDimitry Andric     CurDAG->SelectNodeTo(
9220b57cec5SDimitry Andric         N, Opc, N->getVTList(),
9235ffd83dbSDimitry Andric         {LHS, RHS, CI,
9245ffd83dbSDimitry Andric          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9255ffd83dbSDimitry Andric   } else {
92606c3fb27SDimitry Andric     unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO
9275ffd83dbSDimitry Andric                                                       : AMDGPU::S_SUB_CO_PSEUDO;
9285ffd83dbSDimitry Andric     CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
9295ffd83dbSDimitry Andric   }
9300b57cec5SDimitry Andric }
9310b57cec5SDimitry Andric 
SelectUADDO_USUBO(SDNode * N)9320b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
9330b57cec5SDimitry Andric   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
9340b57cec5SDimitry Andric   // carry out despite the _i32 name. These were renamed in VI to _U32.
9350b57cec5SDimitry Andric   // FIXME: We should probably rename the opcodes here.
9365ffd83dbSDimitry Andric   bool IsAdd = N->getOpcode() == ISD::UADDO;
9375ffd83dbSDimitry Andric   bool IsVALU = N->isDivergent();
9385ffd83dbSDimitry Andric 
9395ffd83dbSDimitry Andric   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
9405ffd83dbSDimitry Andric        ++UI)
9415ffd83dbSDimitry Andric     if (UI.getUse().getResNo() == 1) {
94206c3fb27SDimitry Andric       if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) ||
94306c3fb27SDimitry Andric           (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) {
9445ffd83dbSDimitry Andric         IsVALU = true;
9455ffd83dbSDimitry Andric         break;
9465ffd83dbSDimitry Andric       }
9475ffd83dbSDimitry Andric     }
9485ffd83dbSDimitry Andric 
9495ffd83dbSDimitry Andric   if (IsVALU) {
950e8d8bef9SDimitry Andric     unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
9510b57cec5SDimitry Andric 
9520b57cec5SDimitry Andric     CurDAG->SelectNodeTo(
9530b57cec5SDimitry Andric         N, Opc, N->getVTList(),
9540b57cec5SDimitry Andric         {N->getOperand(0), N->getOperand(1),
9550b57cec5SDimitry Andric          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9565ffd83dbSDimitry Andric   } else {
9575ffd83dbSDimitry Andric     unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
9585ffd83dbSDimitry Andric                                                 : AMDGPU::S_USUBO_PSEUDO;
9595ffd83dbSDimitry Andric 
9605ffd83dbSDimitry Andric     CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
9615ffd83dbSDimitry Andric                          {N->getOperand(0), N->getOperand(1)});
9625ffd83dbSDimitry Andric   }
9630b57cec5SDimitry Andric }
9640b57cec5SDimitry Andric 
SelectFMA_W_CHAIN(SDNode * N)9650b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
9660b57cec5SDimitry Andric   SDLoc SL(N);
9670b57cec5SDimitry Andric   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
9680b57cec5SDimitry Andric   SDValue Ops[10];
9690b57cec5SDimitry Andric 
9700b57cec5SDimitry Andric   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
9710b57cec5SDimitry Andric   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9720b57cec5SDimitry Andric   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
9730b57cec5SDimitry Andric   Ops[8] = N->getOperand(0);
9740b57cec5SDimitry Andric   Ops[9] = N->getOperand(4);
9750b57cec5SDimitry Andric 
976349cc55cSDimitry Andric   // If there are no source modifiers, prefer fmac over fma because it can use
977349cc55cSDimitry Andric   // the smaller VOP2 encoding.
978349cc55cSDimitry Andric   bool UseFMAC = Subtarget->hasDLInsts() &&
979349cc55cSDimitry Andric                  cast<ConstantSDNode>(Ops[0])->isZero() &&
980349cc55cSDimitry Andric                  cast<ConstantSDNode>(Ops[2])->isZero() &&
981349cc55cSDimitry Andric                  cast<ConstantSDNode>(Ops[4])->isZero();
982349cc55cSDimitry Andric   unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
983349cc55cSDimitry Andric   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
9840b57cec5SDimitry Andric }
9850b57cec5SDimitry Andric 
SelectFMUL_W_CHAIN(SDNode * N)9860b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
9870b57cec5SDimitry Andric   SDLoc SL(N);
9880b57cec5SDimitry Andric   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
9890b57cec5SDimitry Andric   SDValue Ops[8];
9900b57cec5SDimitry Andric 
9910b57cec5SDimitry Andric   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
9920b57cec5SDimitry Andric   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9930b57cec5SDimitry Andric   Ops[6] = N->getOperand(0);
9940b57cec5SDimitry Andric   Ops[7] = N->getOperand(3);
9950b57cec5SDimitry Andric 
9960b57cec5SDimitry Andric   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
9970b57cec5SDimitry Andric }
9980b57cec5SDimitry Andric 
9990b57cec5SDimitry Andric // We need to handle this here because tablegen doesn't support matching
10000b57cec5SDimitry Andric // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)10010b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
10020b57cec5SDimitry Andric   SDLoc SL(N);
10030b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
10040b57cec5SDimitry Andric 
10050b57cec5SDimitry Andric   assert(VT == MVT::f32 || VT == MVT::f64);
10060b57cec5SDimitry Andric 
10070b57cec5SDimitry Andric   unsigned Opc
1008e8d8bef9SDimitry Andric     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
10090b57cec5SDimitry Andric 
1010e8d8bef9SDimitry Andric   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1011e8d8bef9SDimitry Andric   // omod
1012e8d8bef9SDimitry Andric   SDValue Ops[8];
1013e8d8bef9SDimitry Andric   SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1014e8d8bef9SDimitry Andric   SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1015e8d8bef9SDimitry Andric   SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
10160b57cec5SDimitry Andric   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric 
10190b57cec5SDimitry Andric // We need to handle this here because tablegen doesn't support matching
10200b57cec5SDimitry Andric // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)10210b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
10220b57cec5SDimitry Andric   SDLoc SL(N);
10230b57cec5SDimitry Andric   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
102481ad6265SDimitry Andric   unsigned Opc;
1025bdd1243dSDimitry Andric   if (Subtarget->hasMADIntraFwdBug())
102681ad6265SDimitry Andric     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
102781ad6265SDimitry Andric                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
102881ad6265SDimitry Andric   else
102981ad6265SDimitry Andric     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10300b57cec5SDimitry Andric 
10310b57cec5SDimitry Andric   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10320b57cec5SDimitry Andric   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
10330b57cec5SDimitry Andric                     Clamp };
10340b57cec5SDimitry Andric   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10350b57cec5SDimitry Andric }
10360b57cec5SDimitry Andric 
10374824e7fdSDimitry Andric // We need to handle this here because tablegen doesn't support matching
10384824e7fdSDimitry Andric // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)10394824e7fdSDimitry Andric void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
10404824e7fdSDimitry Andric   SDLoc SL(N);
10414824e7fdSDimitry Andric   bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
104281ad6265SDimitry Andric   unsigned Opc;
1043bdd1243dSDimitry Andric   if (Subtarget->hasMADIntraFwdBug())
104481ad6265SDimitry Andric     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
104581ad6265SDimitry Andric                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
104681ad6265SDimitry Andric   else
104781ad6265SDimitry Andric     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10484824e7fdSDimitry Andric 
10494824e7fdSDimitry Andric   SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
10504824e7fdSDimitry Andric   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10514824e7fdSDimitry Andric   SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
10524824e7fdSDimitry Andric   SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
10534824e7fdSDimitry Andric   if (!SDValue(N, 0).use_empty()) {
10544824e7fdSDimitry Andric     SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
10554824e7fdSDimitry Andric     SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
10564824e7fdSDimitry Andric                                         MVT::i32, SDValue(Mad, 0), Sub0);
10574824e7fdSDimitry Andric     ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
10584824e7fdSDimitry Andric   }
10594824e7fdSDimitry Andric   if (!SDValue(N, 1).use_empty()) {
10604824e7fdSDimitry Andric     SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
10614824e7fdSDimitry Andric     SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
10624824e7fdSDimitry Andric                                         MVT::i32, SDValue(Mad, 0), Sub1);
10634824e7fdSDimitry Andric     ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
10644824e7fdSDimitry Andric   }
10654824e7fdSDimitry Andric   CurDAG->RemoveDeadNode(N);
10664824e7fdSDimitry Andric }
10674824e7fdSDimitry Andric 
isDSOffsetLegal(SDValue Base,unsigned Offset) const1068e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1069e8d8bef9SDimitry Andric   if (!isUInt<16>(Offset))
10700b57cec5SDimitry Andric     return false;
10710b57cec5SDimitry Andric 
1072e8d8bef9SDimitry Andric   if (!Base || Subtarget->hasUsableDSOffset() ||
10730b57cec5SDimitry Andric       Subtarget->unsafeDSOffsetFoldingEnabled())
10740b57cec5SDimitry Andric     return true;
10750b57cec5SDimitry Andric 
10760b57cec5SDimitry Andric   // On Southern Islands instruction with a negative base value and an offset
10770b57cec5SDimitry Andric   // don't seem to work.
10780b57cec5SDimitry Andric   return CurDAG->SignBitIsZero(Base);
10790b57cec5SDimitry Andric }
10800b57cec5SDimitry Andric 
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const10810b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
10820b57cec5SDimitry Andric                                               SDValue &Offset) const {
10830b57cec5SDimitry Andric   SDLoc DL(Addr);
10840b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr)) {
10850b57cec5SDimitry Andric     SDValue N0 = Addr.getOperand(0);
10860b57cec5SDimitry Andric     SDValue N1 = Addr.getOperand(1);
10870b57cec5SDimitry Andric     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1088e8d8bef9SDimitry Andric     if (isDSOffsetLegal(N0, C1->getSExtValue())) {
10890b57cec5SDimitry Andric       // (add n0, c0)
10900b57cec5SDimitry Andric       Base = N0;
10910b57cec5SDimitry Andric       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
10920b57cec5SDimitry Andric       return true;
10930b57cec5SDimitry Andric     }
10940b57cec5SDimitry Andric   } else if (Addr.getOpcode() == ISD::SUB) {
10950b57cec5SDimitry Andric     // sub C, x -> add (sub 0, x), C
10960b57cec5SDimitry Andric     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
10970b57cec5SDimitry Andric       int64_t ByteOffset = C->getSExtValue();
1098e8d8bef9SDimitry Andric       if (isDSOffsetLegal(SDValue(), ByteOffset)) {
10990b57cec5SDimitry Andric         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
11000b57cec5SDimitry Andric 
11010b57cec5SDimitry Andric         // XXX - This is kind of hacky. Create a dummy sub node so we can check
11020b57cec5SDimitry Andric         // the known bits in isDSOffsetLegal. We need to emit the selected node
11030b57cec5SDimitry Andric         // here, so this is thrown away.
11040b57cec5SDimitry Andric         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
11050b57cec5SDimitry Andric                                       Zero, Addr.getOperand(1));
11060b57cec5SDimitry Andric 
1107e8d8bef9SDimitry Andric         if (isDSOffsetLegal(Sub, ByteOffset)) {
11080b57cec5SDimitry Andric           SmallVector<SDValue, 3> Opnds;
11090b57cec5SDimitry Andric           Opnds.push_back(Zero);
11100b57cec5SDimitry Andric           Opnds.push_back(Addr.getOperand(1));
11110b57cec5SDimitry Andric 
11120b57cec5SDimitry Andric           // FIXME: Select to VOP3 version for with-carry.
1113e8d8bef9SDimitry Andric           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
11140b57cec5SDimitry Andric           if (Subtarget->hasAddNoCarry()) {
11150b57cec5SDimitry Andric             SubOp = AMDGPU::V_SUB_U32_e64;
11160b57cec5SDimitry Andric             Opnds.push_back(
11170b57cec5SDimitry Andric                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
11180b57cec5SDimitry Andric           }
11190b57cec5SDimitry Andric 
11200b57cec5SDimitry Andric           MachineSDNode *MachineSub =
11210b57cec5SDimitry Andric               CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric           Base = SDValue(MachineSub, 0);
11240b57cec5SDimitry Andric           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
11250b57cec5SDimitry Andric           return true;
11260b57cec5SDimitry Andric         }
11270b57cec5SDimitry Andric       }
11280b57cec5SDimitry Andric     }
11290b57cec5SDimitry Andric   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
11300b57cec5SDimitry Andric     // If we have a constant address, prefer to put the constant into the
11310b57cec5SDimitry Andric     // offset. This can save moves to load the constant address since multiple
11320b57cec5SDimitry Andric     // operations can share the zero base address register, and enables merging
11330b57cec5SDimitry Andric     // into read2 / write2 instructions.
11340b57cec5SDimitry Andric 
11350b57cec5SDimitry Andric     SDLoc DL(Addr);
11360b57cec5SDimitry Andric 
1137e8d8bef9SDimitry Andric     if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
11380b57cec5SDimitry Andric       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
11390b57cec5SDimitry Andric       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
11400b57cec5SDimitry Andric                                  DL, MVT::i32, Zero);
11410b57cec5SDimitry Andric       Base = SDValue(MovZero, 0);
11420b57cec5SDimitry Andric       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
11430b57cec5SDimitry Andric       return true;
11440b57cec5SDimitry Andric     }
11450b57cec5SDimitry Andric   }
11460b57cec5SDimitry Andric 
11470b57cec5SDimitry Andric   // default case
11480b57cec5SDimitry Andric   Base = Addr;
11490b57cec5SDimitry Andric   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
11500b57cec5SDimitry Andric   return true;
11510b57cec5SDimitry Andric }
11520b57cec5SDimitry Andric 
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const1153e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1154e8d8bef9SDimitry Andric                                           unsigned Offset1,
1155e8d8bef9SDimitry Andric                                           unsigned Size) const {
1156e8d8bef9SDimitry Andric   if (Offset0 % Size != 0 || Offset1 % Size != 0)
1157e8d8bef9SDimitry Andric     return false;
1158e8d8bef9SDimitry Andric   if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1159e8d8bef9SDimitry Andric     return false;
1160e8d8bef9SDimitry Andric 
1161e8d8bef9SDimitry Andric   if (!Base || Subtarget->hasUsableDSOffset() ||
1162e8d8bef9SDimitry Andric       Subtarget->unsafeDSOffsetFoldingEnabled())
1163e8d8bef9SDimitry Andric     return true;
1164e8d8bef9SDimitry Andric 
1165e8d8bef9SDimitry Andric   // On Southern Islands instruction with a negative base value and an offset
1166e8d8bef9SDimitry Andric   // don't seem to work.
1167e8d8bef9SDimitry Andric   return CurDAG->SignBitIsZero(Base);
1168e8d8bef9SDimitry Andric }
1169e8d8bef9SDimitry Andric 
11705f757f3fSDimitry Andric // Return whether the operation has NoUnsignedWrap property.
isNoUnsignedWrap(SDValue Addr)11715f757f3fSDimitry Andric static bool isNoUnsignedWrap(SDValue Addr) {
11725f757f3fSDimitry Andric   return (Addr.getOpcode() == ISD::ADD &&
11735f757f3fSDimitry Andric           Addr->getFlags().hasNoUnsignedWrap()) ||
11745f757f3fSDimitry Andric          Addr->getOpcode() == ISD::OR;
11755f757f3fSDimitry Andric }
11765f757f3fSDimitry Andric 
11775f757f3fSDimitry Andric // Check that the base address of flat scratch load/store in the form of `base +
11785f757f3fSDimitry Andric // offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
11795f757f3fSDimitry Andric // requirement). We always treat the first operand as the base address here.
isFlatScratchBaseLegal(SDValue Addr) const11805f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
11815f757f3fSDimitry Andric   if (isNoUnsignedWrap(Addr))
118206c3fb27SDimitry Andric     return true;
11835f757f3fSDimitry Andric 
11845f757f3fSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
11855f757f3fSDimitry Andric   // values.
11867a6dacacSDimitry Andric   if (Subtarget->hasSignedScratchOffsets())
11875f757f3fSDimitry Andric     return true;
11885f757f3fSDimitry Andric 
11895f757f3fSDimitry Andric   auto LHS = Addr.getOperand(0);
11905f757f3fSDimitry Andric   auto RHS = Addr.getOperand(1);
11915f757f3fSDimitry Andric 
11925f757f3fSDimitry Andric   // If the immediate offset is negative and within certain range, the base
11935f757f3fSDimitry Andric   // address cannot also be negative. If the base is also negative, the sum
11945f757f3fSDimitry Andric   // would be either negative or much larger than the valid range of scratch
11955f757f3fSDimitry Andric   // memory a thread can access.
11965f757f3fSDimitry Andric   ConstantSDNode *ImmOp = nullptr;
11975f757f3fSDimitry Andric   if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
11985f757f3fSDimitry Andric     if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
11995f757f3fSDimitry Andric       return true;
12005f757f3fSDimitry Andric   }
12015f757f3fSDimitry Andric 
12025f757f3fSDimitry Andric   return CurDAG->SignBitIsZero(LHS);
12035f757f3fSDimitry Andric }
12045f757f3fSDimitry Andric 
12055f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
12065f757f3fSDimitry Andric // of: SGPR + VGPR.
isFlatScratchBaseLegalSV(SDValue Addr) const12075f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
12085f757f3fSDimitry Andric   if (isNoUnsignedWrap(Addr))
12095f757f3fSDimitry Andric     return true;
12105f757f3fSDimitry Andric 
12117a6dacacSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
12127a6dacacSDimitry Andric   // values.
12137a6dacacSDimitry Andric   if (Subtarget->hasSignedScratchOffsets())
12147a6dacacSDimitry Andric     return true;
12157a6dacacSDimitry Andric 
12165f757f3fSDimitry Andric   auto LHS = Addr.getOperand(0);
12175f757f3fSDimitry Andric   auto RHS = Addr.getOperand(1);
12185f757f3fSDimitry Andric   return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
12195f757f3fSDimitry Andric }
12205f757f3fSDimitry Andric 
12215f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
12225f757f3fSDimitry Andric // of: SGPR + VGPR + Imm.
isFlatScratchBaseLegalSVImm(SDValue Addr) const12235f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
12247a6dacacSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
12257a6dacacSDimitry Andric   // values.
12267a6dacacSDimitry Andric   if (AMDGPU::isGFX12Plus(*Subtarget))
12277a6dacacSDimitry Andric     return true;
12287a6dacacSDimitry Andric 
12295f757f3fSDimitry Andric   auto Base = Addr.getOperand(0);
12305f757f3fSDimitry Andric   auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));
12315f757f3fSDimitry Andric   // If the immediate offset is negative and within certain range, the base
12325f757f3fSDimitry Andric   // address cannot also be negative. If the base is also negative, the sum
12335f757f3fSDimitry Andric   // would be either negative or much larger than the valid range of scratch
12345f757f3fSDimitry Andric   // memory a thread can access.
12355f757f3fSDimitry Andric   if (isNoUnsignedWrap(Base) &&
12365f757f3fSDimitry Andric       (isNoUnsignedWrap(Addr) ||
12375f757f3fSDimitry Andric        (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
12385f757f3fSDimitry Andric     return true;
12395f757f3fSDimitry Andric 
12405f757f3fSDimitry Andric   auto LHS = Base.getOperand(0);
12415f757f3fSDimitry Andric   auto RHS = Base.getOperand(1);
12425f757f3fSDimitry Andric   return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
124306c3fb27SDimitry Andric }
124406c3fb27SDimitry Andric 
12450b57cec5SDimitry Andric // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const12460b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
12470b57cec5SDimitry Andric                                                    SDValue &Offset0,
12480b57cec5SDimitry Andric                                                    SDValue &Offset1) const {
1249e8d8bef9SDimitry Andric   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1250e8d8bef9SDimitry Andric }
1251e8d8bef9SDimitry Andric 
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const1252e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1253e8d8bef9SDimitry Andric                                                     SDValue &Offset0,
1254e8d8bef9SDimitry Andric                                                     SDValue &Offset1) const {
1255e8d8bef9SDimitry Andric   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1256e8d8bef9SDimitry Andric }
1257e8d8bef9SDimitry Andric 
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const1258e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1259e8d8bef9SDimitry Andric                                             SDValue &Offset0, SDValue &Offset1,
1260e8d8bef9SDimitry Andric                                             unsigned Size) const {
12610b57cec5SDimitry Andric   SDLoc DL(Addr);
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr)) {
12640b57cec5SDimitry Andric     SDValue N0 = Addr.getOperand(0);
12650b57cec5SDimitry Andric     SDValue N1 = Addr.getOperand(1);
12660b57cec5SDimitry Andric     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1267e8d8bef9SDimitry Andric     unsigned OffsetValue0 = C1->getZExtValue();
1268e8d8bef9SDimitry Andric     unsigned OffsetValue1 = OffsetValue0 + Size;
1269e8d8bef9SDimitry Andric 
12700b57cec5SDimitry Andric     // (add n0, c0)
1271e8d8bef9SDimitry Andric     if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
12720b57cec5SDimitry Andric       Base = N0;
1273e8d8bef9SDimitry Andric       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1274e8d8bef9SDimitry Andric       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
12750b57cec5SDimitry Andric       return true;
12760b57cec5SDimitry Andric     }
12770b57cec5SDimitry Andric   } else if (Addr.getOpcode() == ISD::SUB) {
12780b57cec5SDimitry Andric     // sub C, x -> add (sub 0, x), C
1279e8d8bef9SDimitry Andric     if (const ConstantSDNode *C =
1280e8d8bef9SDimitry Andric             dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1281e8d8bef9SDimitry Andric       unsigned OffsetValue0 = C->getZExtValue();
1282e8d8bef9SDimitry Andric       unsigned OffsetValue1 = OffsetValue0 + Size;
12830b57cec5SDimitry Andric 
1284e8d8bef9SDimitry Andric       if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
12850b57cec5SDimitry Andric         SDLoc DL(Addr);
12860b57cec5SDimitry Andric         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
12870b57cec5SDimitry Andric 
12880b57cec5SDimitry Andric         // XXX - This is kind of hacky. Create a dummy sub node so we can check
12890b57cec5SDimitry Andric         // the known bits in isDSOffsetLegal. We need to emit the selected node
12900b57cec5SDimitry Andric         // here, so this is thrown away.
1291e8d8bef9SDimitry Andric         SDValue Sub =
1292e8d8bef9SDimitry Andric             CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
12930b57cec5SDimitry Andric 
1294e8d8bef9SDimitry Andric         if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
12950b57cec5SDimitry Andric           SmallVector<SDValue, 3> Opnds;
12960b57cec5SDimitry Andric           Opnds.push_back(Zero);
12970b57cec5SDimitry Andric           Opnds.push_back(Addr.getOperand(1));
1298e8d8bef9SDimitry Andric           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
12990b57cec5SDimitry Andric           if (Subtarget->hasAddNoCarry()) {
13000b57cec5SDimitry Andric             SubOp = AMDGPU::V_SUB_U32_e64;
13010b57cec5SDimitry Andric             Opnds.push_back(
13020b57cec5SDimitry Andric                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
13030b57cec5SDimitry Andric           }
13040b57cec5SDimitry Andric 
1305e8d8bef9SDimitry Andric           MachineSDNode *MachineSub = CurDAG->getMachineNode(
1306e8d8bef9SDimitry Andric               SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric           Base = SDValue(MachineSub, 0);
1309e8d8bef9SDimitry Andric           Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1310e8d8bef9SDimitry Andric           Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
13110b57cec5SDimitry Andric           return true;
13120b57cec5SDimitry Andric         }
13130b57cec5SDimitry Andric       }
13140b57cec5SDimitry Andric     }
13150b57cec5SDimitry Andric   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1316e8d8bef9SDimitry Andric     unsigned OffsetValue0 = CAddr->getZExtValue();
1317e8d8bef9SDimitry Andric     unsigned OffsetValue1 = OffsetValue0 + Size;
13180b57cec5SDimitry Andric 
1319e8d8bef9SDimitry Andric     if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
13200b57cec5SDimitry Andric       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1321e8d8bef9SDimitry Andric       MachineSDNode *MovZero =
1322e8d8bef9SDimitry Andric           CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
13230b57cec5SDimitry Andric       Base = SDValue(MovZero, 0);
1324e8d8bef9SDimitry Andric       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1325e8d8bef9SDimitry Andric       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
13260b57cec5SDimitry Andric       return true;
13270b57cec5SDimitry Andric     }
13280b57cec5SDimitry Andric   }
13290b57cec5SDimitry Andric 
13300b57cec5SDimitry Andric   // default case
13310b57cec5SDimitry Andric 
13320b57cec5SDimitry Andric   Base = Addr;
13330b57cec5SDimitry Andric   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
13340b57cec5SDimitry Andric   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
13350b57cec5SDimitry Andric   return true;
13360b57cec5SDimitry Andric }
13370b57cec5SDimitry Andric 
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const1338fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1339fe6060f1SDimitry Andric                                      SDValue &SOffset, SDValue &Offset,
1340fe6060f1SDimitry Andric                                      SDValue &Offen, SDValue &Idxen,
1341fe6060f1SDimitry Andric                                      SDValue &Addr64) const {
13420b57cec5SDimitry Andric   // Subtarget prefers to use flat instruction
13435ffd83dbSDimitry Andric   // FIXME: This should be a pattern predicate and not reach here
13440b57cec5SDimitry Andric   if (Subtarget->useFlatForGlobal())
13450b57cec5SDimitry Andric     return false;
13460b57cec5SDimitry Andric 
13470b57cec5SDimitry Andric   SDLoc DL(Addr);
13480b57cec5SDimitry Andric 
13490b57cec5SDimitry Andric   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13500b57cec5SDimitry Andric   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13510b57cec5SDimitry Andric   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
13525f757f3fSDimitry Andric   SOffset = Subtarget->hasRestrictedSOffset()
13535f757f3fSDimitry Andric                 ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
13545f757f3fSDimitry Andric                 : CurDAG->getTargetConstant(0, DL, MVT::i32);
13550b57cec5SDimitry Andric 
13560b57cec5SDimitry Andric   ConstantSDNode *C1 = nullptr;
13570b57cec5SDimitry Andric   SDValue N0 = Addr;
13580b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr)) {
13590b57cec5SDimitry Andric     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
13600b57cec5SDimitry Andric     if (isUInt<32>(C1->getZExtValue()))
13610b57cec5SDimitry Andric       N0 = Addr.getOperand(0);
13620b57cec5SDimitry Andric     else
13630b57cec5SDimitry Andric       C1 = nullptr;
13640b57cec5SDimitry Andric   }
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::ADD) {
13670b57cec5SDimitry Andric     // (add N2, N3) -> addr64, or
13680b57cec5SDimitry Andric     // (add (add N2, N3), C1) -> addr64
13690b57cec5SDimitry Andric     SDValue N2 = N0.getOperand(0);
13700b57cec5SDimitry Andric     SDValue N3 = N0.getOperand(1);
13710b57cec5SDimitry Andric     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
13720b57cec5SDimitry Andric 
13730b57cec5SDimitry Andric     if (N2->isDivergent()) {
13740b57cec5SDimitry Andric       if (N3->isDivergent()) {
13750b57cec5SDimitry Andric         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
13760b57cec5SDimitry Andric         // addr64, and construct the resource from a 0 address.
13770b57cec5SDimitry Andric         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
13780b57cec5SDimitry Andric         VAddr = N0;
13790b57cec5SDimitry Andric       } else {
13800b57cec5SDimitry Andric         // N2 is divergent, N3 is not.
13810b57cec5SDimitry Andric         Ptr = N3;
13820b57cec5SDimitry Andric         VAddr = N2;
13830b57cec5SDimitry Andric       }
13840b57cec5SDimitry Andric     } else {
13850b57cec5SDimitry Andric       // N2 is not divergent.
13860b57cec5SDimitry Andric       Ptr = N2;
13870b57cec5SDimitry Andric       VAddr = N3;
13880b57cec5SDimitry Andric     }
138906c3fb27SDimitry Andric     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
13900b57cec5SDimitry Andric   } else if (N0->isDivergent()) {
13910b57cec5SDimitry Andric     // N0 is divergent. Use it as the addr64, and construct the resource from a
13920b57cec5SDimitry Andric     // 0 address.
13930b57cec5SDimitry Andric     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
13940b57cec5SDimitry Andric     VAddr = N0;
13950b57cec5SDimitry Andric     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
13960b57cec5SDimitry Andric   } else {
13970b57cec5SDimitry Andric     // N0 -> offset, or
13980b57cec5SDimitry Andric     // (N0 + C1) -> offset
13990b57cec5SDimitry Andric     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
14000b57cec5SDimitry Andric     Ptr = N0;
14010b57cec5SDimitry Andric   }
14020b57cec5SDimitry Andric 
14030b57cec5SDimitry Andric   if (!C1) {
14040b57cec5SDimitry Andric     // No offset.
140506c3fb27SDimitry Andric     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
14060b57cec5SDimitry Andric     return true;
14070b57cec5SDimitry Andric   }
14080b57cec5SDimitry Andric 
14095f757f3fSDimitry Andric   const SIInstrInfo *TII = Subtarget->getInstrInfo();
14105f757f3fSDimitry Andric   if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
14110b57cec5SDimitry Andric     // Legal offset for instruction.
141206c3fb27SDimitry Andric     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
14130b57cec5SDimitry Andric     return true;
14140b57cec5SDimitry Andric   }
14150b57cec5SDimitry Andric 
14160b57cec5SDimitry Andric   // Illegal offset, store it in soffset.
141706c3fb27SDimitry Andric   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
14180b57cec5SDimitry Andric   SOffset =
14190b57cec5SDimitry Andric       SDValue(CurDAG->getMachineNode(
14200b57cec5SDimitry Andric                   AMDGPU::S_MOV_B32, DL, MVT::i32,
14210b57cec5SDimitry Andric                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
14220b57cec5SDimitry Andric               0);
14230b57cec5SDimitry Andric   return true;
14240b57cec5SDimitry Andric }
14250b57cec5SDimitry Andric 
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const14260b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
14270b57cec5SDimitry Andric                                            SDValue &VAddr, SDValue &SOffset,
1428fe6060f1SDimitry Andric                                            SDValue &Offset) const {
14290b57cec5SDimitry Andric   SDValue Ptr, Offen, Idxen, Addr64;
14300b57cec5SDimitry Andric 
14310b57cec5SDimitry Andric   // addr64 bit was removed for volcanic islands.
14325ffd83dbSDimitry Andric   // FIXME: This should be a pattern predicate and not reach here
14330b57cec5SDimitry Andric   if (!Subtarget->hasAddr64())
14340b57cec5SDimitry Andric     return false;
14350b57cec5SDimitry Andric 
1436fe6060f1SDimitry Andric   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
14370b57cec5SDimitry Andric     return false;
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
14400b57cec5SDimitry Andric   if (C->getSExtValue()) {
14410b57cec5SDimitry Andric     SDLoc DL(Addr);
14420b57cec5SDimitry Andric 
14430b57cec5SDimitry Andric     const SITargetLowering& Lowering =
14440b57cec5SDimitry Andric       *static_cast<const SITargetLowering*>(getTargetLowering());
14450b57cec5SDimitry Andric 
14460b57cec5SDimitry Andric     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
14470b57cec5SDimitry Andric     return true;
14480b57cec5SDimitry Andric   }
14490b57cec5SDimitry Andric 
14500b57cec5SDimitry Andric   return false;
14510b57cec5SDimitry Andric }
14520b57cec5SDimitry Andric 
foldFrameIndex(SDValue N) const14530b57cec5SDimitry Andric std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
14545ffd83dbSDimitry Andric   SDLoc DL(N);
14550b57cec5SDimitry Andric 
1456e8d8bef9SDimitry Andric   auto *FI = dyn_cast<FrameIndexSDNode>(N);
1457e8d8bef9SDimitry Andric   SDValue TFI =
1458e8d8bef9SDimitry Andric       FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
14590b57cec5SDimitry Andric 
1460e8d8bef9SDimitry Andric   // We rebase the base address into an absolute stack address and hence
1461e8d8bef9SDimitry Andric   // use constant 0 for soffset. This value must be retained until
1462e8d8bef9SDimitry Andric   // frame elimination and eliminateFrameIndex will choose the appropriate
1463e8d8bef9SDimitry Andric   // frame register if need be.
1464bdd1243dSDimitry Andric   return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
14650b57cec5SDimitry Andric }
14660b57cec5SDimitry Andric 
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const14670b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
14680b57cec5SDimitry Andric                                                  SDValue Addr, SDValue &Rsrc,
14690b57cec5SDimitry Andric                                                  SDValue &VAddr, SDValue &SOffset,
14700b57cec5SDimitry Andric                                                  SDValue &ImmOffset) const {
14710b57cec5SDimitry Andric 
14720b57cec5SDimitry Andric   SDLoc DL(Addr);
14730b57cec5SDimitry Andric   MachineFunction &MF = CurDAG->getMachineFunction();
14740b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
14750b57cec5SDimitry Andric 
14760b57cec5SDimitry Andric   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
14770b57cec5SDimitry Andric 
14780b57cec5SDimitry Andric   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
14795ffd83dbSDimitry Andric     int64_t Imm = CAddr->getSExtValue();
14805ffd83dbSDimitry Andric     const int64_t NullPtr =
14815ffd83dbSDimitry Andric         AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
14825ffd83dbSDimitry Andric     // Don't fold null pointer.
14835ffd83dbSDimitry Andric     if (Imm != NullPtr) {
14845f757f3fSDimitry Andric       const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
148506c3fb27SDimitry Andric       SDValue HighBits =
148606c3fb27SDimitry Andric           CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
14875ffd83dbSDimitry Andric       MachineSDNode *MovHighBits = CurDAG->getMachineNode(
14885ffd83dbSDimitry Andric         AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
14890b57cec5SDimitry Andric       VAddr = SDValue(MovHighBits, 0);
14900b57cec5SDimitry Andric 
1491fe6060f1SDimitry Andric       SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
149206c3fb27SDimitry Andric       ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
14930b57cec5SDimitry Andric       return true;
14940b57cec5SDimitry Andric     }
14955ffd83dbSDimitry Andric   }
14960b57cec5SDimitry Andric 
14970b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr)) {
14980b57cec5SDimitry Andric     // (add n0, c1)
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric     SDValue N0 = Addr.getOperand(0);
15010fca6ea1SDimitry Andric     uint64_t C1 = Addr.getConstantOperandVal(1);
15020b57cec5SDimitry Andric 
15030b57cec5SDimitry Andric     // Offsets in vaddr must be positive if range checking is enabled.
15040b57cec5SDimitry Andric     //
15050b57cec5SDimitry Andric     // The total computation of vaddr + soffset + offset must not overflow.  If
15060b57cec5SDimitry Andric     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
15070b57cec5SDimitry Andric     // overflowing.
15080b57cec5SDimitry Andric     //
15090b57cec5SDimitry Andric     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
15100b57cec5SDimitry Andric     // always perform a range check. If a negative vaddr base index was used,
15110b57cec5SDimitry Andric     // this would fail the range check. The overall address computation would
15120b57cec5SDimitry Andric     // compute a valid address, but this doesn't happen due to the range
15130b57cec5SDimitry Andric     // check. For out-of-bounds MUBUF loads, a 0 is returned.
15140b57cec5SDimitry Andric     //
15150b57cec5SDimitry Andric     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
15160b57cec5SDimitry Andric     // MUBUF vaddr, but not on older subtargets which can only do this if the
15170b57cec5SDimitry Andric     // sign bit is known 0.
15185f757f3fSDimitry Andric     const SIInstrInfo *TII = Subtarget->getInstrInfo();
15190fca6ea1SDimitry Andric     if (TII->isLegalMUBUFImmOffset(C1) &&
15200b57cec5SDimitry Andric         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
15210b57cec5SDimitry Andric          CurDAG->SignBitIsZero(N0))) {
15220b57cec5SDimitry Andric       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
15230fca6ea1SDimitry Andric       ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);
15240b57cec5SDimitry Andric       return true;
15250b57cec5SDimitry Andric     }
15260b57cec5SDimitry Andric   }
15270b57cec5SDimitry Andric 
15280b57cec5SDimitry Andric   // (node)
15290b57cec5SDimitry Andric   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
153006c3fb27SDimitry Andric   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
15310b57cec5SDimitry Andric   return true;
15320b57cec5SDimitry Andric }
15330b57cec5SDimitry Andric 
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)1534fe6060f1SDimitry Andric static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1535fe6060f1SDimitry Andric   if (Val.getOpcode() != ISD::CopyFromReg)
1536fe6060f1SDimitry Andric     return false;
1537bdd1243dSDimitry Andric   auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1538bdd1243dSDimitry Andric   if (!Reg.isPhysical())
1539bdd1243dSDimitry Andric     return false;
1540bdd1243dSDimitry Andric   auto RC = TRI.getPhysRegBaseClass(Reg);
1541fe6060f1SDimitry Andric   return RC && TRI.isSGPRClass(RC);
1542fe6060f1SDimitry Andric }
1543fe6060f1SDimitry Andric 
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const15440b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15450b57cec5SDimitry Andric                                                   SDValue Addr,
15460b57cec5SDimitry Andric                                                   SDValue &SRsrc,
15470b57cec5SDimitry Andric                                                   SDValue &SOffset,
15480b57cec5SDimitry Andric                                                   SDValue &Offset) const {
1549fe6060f1SDimitry Andric   const SIRegisterInfo *TRI =
1550fe6060f1SDimitry Andric       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
15515f757f3fSDimitry Andric   const SIInstrInfo *TII = Subtarget->getInstrInfo();
15520b57cec5SDimitry Andric   MachineFunction &MF = CurDAG->getMachineFunction();
15530b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1554fe6060f1SDimitry Andric   SDLoc DL(Addr);
1555fe6060f1SDimitry Andric 
1556fe6060f1SDimitry Andric   // CopyFromReg <sgpr>
1557fe6060f1SDimitry Andric   if (IsCopyFromSGPR(*TRI, Addr)) {
1558fe6060f1SDimitry Andric     SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1559fe6060f1SDimitry Andric     SOffset = Addr;
156006c3fb27SDimitry Andric     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1561fe6060f1SDimitry Andric     return true;
1562fe6060f1SDimitry Andric   }
1563fe6060f1SDimitry Andric 
1564fe6060f1SDimitry Andric   ConstantSDNode *CAddr;
1565fe6060f1SDimitry Andric   if (Addr.getOpcode() == ISD::ADD) {
1566fe6060f1SDimitry Andric     // Add (CopyFromReg <sgpr>) <constant>
1567fe6060f1SDimitry Andric     CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
15685f757f3fSDimitry Andric     if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1569fe6060f1SDimitry Andric       return false;
1570fe6060f1SDimitry Andric     if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1571fe6060f1SDimitry Andric       return false;
1572fe6060f1SDimitry Andric 
1573fe6060f1SDimitry Andric     SOffset = Addr.getOperand(0);
1574fe6060f1SDimitry Andric   } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
15755f757f3fSDimitry Andric              TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1576fe6060f1SDimitry Andric     // <constant>
1577fe6060f1SDimitry Andric     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1578fe6060f1SDimitry Andric   } else {
1579fe6060f1SDimitry Andric     return false;
1580fe6060f1SDimitry Andric   }
15810b57cec5SDimitry Andric 
15820b57cec5SDimitry Andric   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
15830b57cec5SDimitry Andric 
158406c3fb27SDimitry Andric   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32);
15850b57cec5SDimitry Andric   return true;
15860b57cec5SDimitry Andric }
15870b57cec5SDimitry Andric 
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const15880b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1589fe6060f1SDimitry Andric                                            SDValue &SOffset, SDValue &Offset
1590fe6060f1SDimitry Andric                                            ) const {
15910b57cec5SDimitry Andric   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
15925f757f3fSDimitry Andric   const SIInstrInfo *TII = Subtarget->getInstrInfo();
15930b57cec5SDimitry Andric 
1594fe6060f1SDimitry Andric   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
15950b57cec5SDimitry Andric     return false;
15960b57cec5SDimitry Andric 
15970b57cec5SDimitry Andric   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
15980b57cec5SDimitry Andric       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
15990b57cec5SDimitry Andric       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
16000b57cec5SDimitry Andric     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1601349cc55cSDimitry Andric                     APInt::getAllOnes(32).getZExtValue(); // Size
16020b57cec5SDimitry Andric     SDLoc DL(Addr);
16030b57cec5SDimitry Andric 
16040b57cec5SDimitry Andric     const SITargetLowering& Lowering =
16050b57cec5SDimitry Andric       *static_cast<const SITargetLowering*>(getTargetLowering());
16060b57cec5SDimitry Andric 
16070b57cec5SDimitry Andric     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
16080b57cec5SDimitry Andric     return true;
16090b57cec5SDimitry Andric   }
16100b57cec5SDimitry Andric   return false;
16110b57cec5SDimitry Andric }
16120b57cec5SDimitry Andric 
SelectBUFSOffset(SDValue ByteOffsetNode,SDValue & SOffset) const16135f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
16145f757f3fSDimitry Andric                                           SDValue &SOffset) const {
1615297eecfbSDimitry Andric   if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
16165f757f3fSDimitry Andric     SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
16175f757f3fSDimitry Andric     return true;
16185f757f3fSDimitry Andric   }
16195f757f3fSDimitry Andric 
16205f757f3fSDimitry Andric   SOffset = ByteOffsetNode;
16215f757f3fSDimitry Andric   return true;
16225f757f3fSDimitry Andric }
16235f757f3fSDimitry Andric 
16248bcb0991SDimitry Andric // Find a load or store from corresponding pattern root.
16258bcb0991SDimitry Andric // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)16268bcb0991SDimitry Andric static MemSDNode* findMemSDNode(SDNode *N) {
16278bcb0991SDimitry Andric   N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
16288bcb0991SDimitry Andric   if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
16298bcb0991SDimitry Andric     return MN;
16308bcb0991SDimitry Andric   assert(isa<BuildVectorSDNode>(N));
16318bcb0991SDimitry Andric   for (SDValue V : N->op_values())
16328bcb0991SDimitry Andric     if (MemSDNode *MN =
16338bcb0991SDimitry Andric           dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
16348bcb0991SDimitry Andric       return MN;
16358bcb0991SDimitry Andric   llvm_unreachable("cannot find MemSDNode in the pattern!");
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric 
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const1638fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1639fe6060f1SDimitry Andric                                               SDValue &VAddr, SDValue &Offset,
1640fe6060f1SDimitry Andric                                               uint64_t FlatVariant) const {
16418bcb0991SDimitry Andric   int64_t OffsetVal = 0;
16428bcb0991SDimitry Andric 
1643e8d8bef9SDimitry Andric   unsigned AS = findMemSDNode(N)->getAddressSpace();
1644e8d8bef9SDimitry Andric 
1645fe6060f1SDimitry Andric   bool CanHaveFlatSegmentOffsetBug =
1646fe6060f1SDimitry Andric       Subtarget->hasFlatSegmentOffsetBug() &&
1647fe6060f1SDimitry Andric       FlatVariant == SIInstrFlags::FLAT &&
1648fe6060f1SDimitry Andric       (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
1649fe6060f1SDimitry Andric 
1650fe6060f1SDimitry Andric   if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
16515ffd83dbSDimitry Andric     SDValue N0, N1;
165206c3fb27SDimitry Andric     if (isBaseWithConstantOffset64(Addr, N0, N1) &&
16535f757f3fSDimitry Andric         (FlatVariant != SIInstrFlags::FlatScratch ||
16545f757f3fSDimitry Andric          isFlatScratchBaseLegal(Addr))) {
1655fe6060f1SDimitry Andric       int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
16568bcb0991SDimitry Andric 
16578bcb0991SDimitry Andric       const SIInstrInfo *TII = Subtarget->getInstrInfo();
1658fe6060f1SDimitry Andric       if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
16598bcb0991SDimitry Andric         Addr = N0;
16608bcb0991SDimitry Andric         OffsetVal = COffsetVal;
16618bcb0991SDimitry Andric       } else {
16628bcb0991SDimitry Andric         // If the offset doesn't fit, put the low bits into the offset field and
16638bcb0991SDimitry Andric         // add the rest.
1664e8d8bef9SDimitry Andric         //
1665e8d8bef9SDimitry Andric         // For a FLAT instruction the hardware decides whether to access
1666e8d8bef9SDimitry Andric         // global/scratch/shared memory based on the high bits of vaddr,
1667e8d8bef9SDimitry Andric         // ignoring the offset field, so we have to ensure that when we add
1668e8d8bef9SDimitry Andric         // remainder to vaddr it still points into the same underlying object.
1669e8d8bef9SDimitry Andric         // The easiest way to do that is to make sure that we split the offset
1670e8d8bef9SDimitry Andric         // into two pieces that are both >= 0 or both <= 0.
16718bcb0991SDimitry Andric 
16728bcb0991SDimitry Andric         SDLoc DL(N);
1673e8d8bef9SDimitry Andric         uint64_t RemainderOffset;
16748bcb0991SDimitry Andric 
1675fe6060f1SDimitry Andric         std::tie(OffsetVal, RemainderOffset) =
1676fe6060f1SDimitry Andric             TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
16778bcb0991SDimitry Andric 
1678e8d8bef9SDimitry Andric         SDValue AddOffsetLo =
1679e8d8bef9SDimitry Andric             getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1680e8d8bef9SDimitry Andric         SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1681e8d8bef9SDimitry Andric 
1682e8d8bef9SDimitry Andric         if (Addr.getValueType().getSizeInBits() == 32) {
1683e8d8bef9SDimitry Andric           SmallVector<SDValue, 3> Opnds;
1684e8d8bef9SDimitry Andric           Opnds.push_back(N0);
1685e8d8bef9SDimitry Andric           Opnds.push_back(AddOffsetLo);
1686e8d8bef9SDimitry Andric           unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1687e8d8bef9SDimitry Andric           if (Subtarget->hasAddNoCarry()) {
1688e8d8bef9SDimitry Andric             AddOp = AMDGPU::V_ADD_U32_e64;
1689e8d8bef9SDimitry Andric             Opnds.push_back(Clamp);
16908bcb0991SDimitry Andric           }
1691e8d8bef9SDimitry Andric           Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
16928bcb0991SDimitry Andric         } else {
16935ffd83dbSDimitry Andric           // TODO: Should this try to use a scalar add pseudo if the base address
16945ffd83dbSDimitry Andric           // is uniform and saddr is usable?
16958bcb0991SDimitry Andric           SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
16968bcb0991SDimitry Andric           SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
16978bcb0991SDimitry Andric 
1698e8d8bef9SDimitry Andric           SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1699e8d8bef9SDimitry Andric                                                 DL, MVT::i32, N0, Sub0);
1700e8d8bef9SDimitry Andric           SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1701e8d8bef9SDimitry Andric                                                 DL, MVT::i32, N0, Sub1);
17028bcb0991SDimitry Andric 
17035ffd83dbSDimitry Andric           SDValue AddOffsetHi =
17045ffd83dbSDimitry Andric               getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
17058bcb0991SDimitry Andric 
17068bcb0991SDimitry Andric           SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
17078bcb0991SDimitry Andric 
17085ffd83dbSDimitry Andric           SDNode *Add =
1709e8d8bef9SDimitry Andric               CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
17108bcb0991SDimitry Andric                                      {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
17118bcb0991SDimitry Andric 
17128bcb0991SDimitry Andric           SDNode *Addc = CurDAG->getMachineNode(
17138bcb0991SDimitry Andric               AMDGPU::V_ADDC_U32_e64, DL, VTs,
17148bcb0991SDimitry Andric               {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
17158bcb0991SDimitry Andric 
17168bcb0991SDimitry Andric           SDValue RegSequenceArgs[] = {
17178bcb0991SDimitry Andric               CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
17185ffd83dbSDimitry Andric               SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
17198bcb0991SDimitry Andric 
17208bcb0991SDimitry Andric           Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
17215ffd83dbSDimitry Andric                                                 MVT::i64, RegSequenceArgs),
17225ffd83dbSDimitry Andric                          0);
17235ffd83dbSDimitry Andric         }
17248bcb0991SDimitry Andric       }
17258bcb0991SDimitry Andric     }
1726e8d8bef9SDimitry Andric   }
17278bcb0991SDimitry Andric 
17288bcb0991SDimitry Andric   VAddr = Addr;
17295f757f3fSDimitry Andric   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
17308bcb0991SDimitry Andric   return true;
17310b57cec5SDimitry Andric }
17320b57cec5SDimitry Andric 
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1733fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1734fe6060f1SDimitry Andric                                           SDValue &VAddr,
1735fe6060f1SDimitry Andric                                           SDValue &Offset) const {
1736fe6060f1SDimitry Andric   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1737fe6060f1SDimitry Andric }
1738fe6060f1SDimitry Andric 
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1739fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1740fe6060f1SDimitry Andric                                             SDValue &VAddr,
1741fe6060f1SDimitry Andric                                             SDValue &Offset) const {
1742fe6060f1SDimitry Andric   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1743fe6060f1SDimitry Andric }
1744fe6060f1SDimitry Andric 
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1745fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1746fe6060f1SDimitry Andric                                              SDValue &VAddr,
1747fe6060f1SDimitry Andric                                              SDValue &Offset) const {
1748fe6060f1SDimitry Andric   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1749fe6060f1SDimitry Andric                               SIInstrFlags::FlatScratch);
1750fe6060f1SDimitry Andric }
1751fe6060f1SDimitry Andric 
1752e8d8bef9SDimitry Andric // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)1753e8d8bef9SDimitry Andric static SDValue matchZExtFromI32(SDValue Op) {
1754e8d8bef9SDimitry Andric   if (Op.getOpcode() != ISD::ZERO_EXTEND)
1755e8d8bef9SDimitry Andric     return SDValue();
1756e8d8bef9SDimitry Andric 
1757e8d8bef9SDimitry Andric   SDValue ExtSrc = Op.getOperand(0);
1758e8d8bef9SDimitry Andric   return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
17590b57cec5SDimitry Andric }
17600b57cec5SDimitry Andric 
1761e8d8bef9SDimitry Andric // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const1762e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
17630b57cec5SDimitry Andric                                            SDValue Addr,
1764e8d8bef9SDimitry Andric                                            SDValue &SAddr,
1765e8d8bef9SDimitry Andric                                            SDValue &VOffset,
1766e8d8bef9SDimitry Andric                                            SDValue &Offset) const {
1767e8d8bef9SDimitry Andric   int64_t ImmOffset = 0;
1768e8d8bef9SDimitry Andric 
1769e8d8bef9SDimitry Andric   // Match the immediate offset first, which canonically is moved as low as
1770e8d8bef9SDimitry Andric   // possible.
1771e8d8bef9SDimitry Andric 
1772e8d8bef9SDimitry Andric   SDValue LHS, RHS;
1773e8d8bef9SDimitry Andric   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1774e8d8bef9SDimitry Andric     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1775e8d8bef9SDimitry Andric     const SIInstrInfo *TII = Subtarget->getInstrInfo();
1776e8d8bef9SDimitry Andric 
1777fe6060f1SDimitry Andric     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1778fe6060f1SDimitry Andric                                SIInstrFlags::FlatGlobal)) {
1779e8d8bef9SDimitry Andric       Addr = LHS;
1780e8d8bef9SDimitry Andric       ImmOffset = COffsetVal;
1781fe6060f1SDimitry Andric     } else if (!LHS->isDivergent()) {
1782fe6060f1SDimitry Andric       if (COffsetVal > 0) {
1783e8d8bef9SDimitry Andric         SDLoc SL(N);
1784fe6060f1SDimitry Andric         // saddr + large_offset -> saddr +
1785fe6060f1SDimitry Andric         //                         (voffset = large_offset & ~MaxOffset) +
1786e8d8bef9SDimitry Andric         //                         (large_offset & MaxOffset);
1787e8d8bef9SDimitry Andric         int64_t SplitImmOffset, RemainderOffset;
1788fe6060f1SDimitry Andric         std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1789fe6060f1SDimitry Andric             COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1790e8d8bef9SDimitry Andric 
1791e8d8bef9SDimitry Andric         if (isUInt<32>(RemainderOffset)) {
1792e8d8bef9SDimitry Andric           SDNode *VMov = CurDAG->getMachineNode(
1793e8d8bef9SDimitry Andric               AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1794e8d8bef9SDimitry Andric               CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1795e8d8bef9SDimitry Andric           VOffset = SDValue(VMov, 0);
1796e8d8bef9SDimitry Andric           SAddr = LHS;
17975f757f3fSDimitry Andric           Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1798e8d8bef9SDimitry Andric           return true;
1799e8d8bef9SDimitry Andric         }
1800e8d8bef9SDimitry Andric       }
1801fe6060f1SDimitry Andric 
1802fe6060f1SDimitry Andric       // We are adding a 64 bit SGPR and a constant. If constant bus limit
1803fe6060f1SDimitry Andric       // is 1 we would need to perform 1 or 2 extra moves for each half of
1804fe6060f1SDimitry Andric       // the constant and it is better to do a scalar add and then issue a
1805fe6060f1SDimitry Andric       // single VALU instruction to materialize zero. Otherwise it is less
1806fe6060f1SDimitry Andric       // instructions to perform VALU adds with immediates or inline literals.
1807fe6060f1SDimitry Andric       unsigned NumLiterals =
1808fe6060f1SDimitry Andric           !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1809fe6060f1SDimitry Andric           !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1810fe6060f1SDimitry Andric       if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1811fe6060f1SDimitry Andric         return false;
1812fe6060f1SDimitry Andric     }
1813e8d8bef9SDimitry Andric   }
1814e8d8bef9SDimitry Andric 
1815e8d8bef9SDimitry Andric   // Match the variable offset.
1816fe6060f1SDimitry Andric   if (Addr.getOpcode() == ISD::ADD) {
1817e8d8bef9SDimitry Andric     LHS = Addr.getOperand(0);
1818e8d8bef9SDimitry Andric     RHS = Addr.getOperand(1);
1819e8d8bef9SDimitry Andric 
1820e8d8bef9SDimitry Andric     if (!LHS->isDivergent()) {
1821e8d8bef9SDimitry Andric       // add (i64 sgpr), (zero_extend (i32 vgpr))
1822e8d8bef9SDimitry Andric       if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1823e8d8bef9SDimitry Andric         SAddr = LHS;
1824e8d8bef9SDimitry Andric         VOffset = ZextRHS;
1825e8d8bef9SDimitry Andric       }
1826e8d8bef9SDimitry Andric     }
1827e8d8bef9SDimitry Andric 
1828e8d8bef9SDimitry Andric     if (!SAddr && !RHS->isDivergent()) {
1829e8d8bef9SDimitry Andric       // add (zero_extend (i32 vgpr)), (i64 sgpr)
1830e8d8bef9SDimitry Andric       if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1831e8d8bef9SDimitry Andric         SAddr = RHS;
1832e8d8bef9SDimitry Andric         VOffset = ZextLHS;
1833e8d8bef9SDimitry Andric       }
1834e8d8bef9SDimitry Andric     }
1835e8d8bef9SDimitry Andric 
1836fe6060f1SDimitry Andric     if (SAddr) {
18375f757f3fSDimitry Andric       Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1838fe6060f1SDimitry Andric       return true;
1839fe6060f1SDimitry Andric     }
1840fe6060f1SDimitry Andric   }
1841fe6060f1SDimitry Andric 
1842fe6060f1SDimitry Andric   if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1843fe6060f1SDimitry Andric       isa<ConstantSDNode>(Addr))
1844e8d8bef9SDimitry Andric     return false;
1845e8d8bef9SDimitry Andric 
1846fe6060f1SDimitry Andric   // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1847fe6060f1SDimitry Andric   // moves required to copy a 64-bit SGPR to VGPR.
1848fe6060f1SDimitry Andric   SAddr = Addr;
1849fe6060f1SDimitry Andric   SDNode *VMov =
1850fe6060f1SDimitry Andric       CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1851fe6060f1SDimitry Andric                              CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1852fe6060f1SDimitry Andric   VOffset = SDValue(VMov, 0);
18535f757f3fSDimitry Andric   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1854e8d8bef9SDimitry Andric   return true;
1855e8d8bef9SDimitry Andric }
1856e8d8bef9SDimitry Andric 
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)1857fe6060f1SDimitry Andric static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1858e8d8bef9SDimitry Andric   if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1859e8d8bef9SDimitry Andric     SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1860e8d8bef9SDimitry Andric   } else if (SAddr.getOpcode() == ISD::ADD &&
1861e8d8bef9SDimitry Andric              isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1862e8d8bef9SDimitry Andric     // Materialize this into a scalar move for scalar address to avoid
1863e8d8bef9SDimitry Andric     // readfirstlane.
1864e8d8bef9SDimitry Andric     auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1865e8d8bef9SDimitry Andric     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1866e8d8bef9SDimitry Andric                                               FI->getValueType(0));
1867fe6060f1SDimitry Andric     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1868e8d8bef9SDimitry Andric                                            MVT::i32, TFI, SAddr.getOperand(1)),
1869e8d8bef9SDimitry Andric                     0);
1870e8d8bef9SDimitry Andric   }
1871e8d8bef9SDimitry Andric 
1872fe6060f1SDimitry Andric   return SAddr;
1873e8d8bef9SDimitry Andric }
1874e8d8bef9SDimitry Andric 
1875fe6060f1SDimitry Andric // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const1876fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1877fe6060f1SDimitry Andric                                             SDValue &SAddr,
1878fe6060f1SDimitry Andric                                             SDValue &Offset) const {
1879fe6060f1SDimitry Andric   if (Addr->isDivergent())
1880fe6060f1SDimitry Andric     return false;
1881fe6060f1SDimitry Andric 
1882fe6060f1SDimitry Andric   SDLoc DL(Addr);
1883fe6060f1SDimitry Andric 
1884fe6060f1SDimitry Andric   int64_t COffsetVal = 0;
1885fe6060f1SDimitry Andric 
18865f757f3fSDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
1887fe6060f1SDimitry Andric     COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1888fe6060f1SDimitry Andric     SAddr = Addr.getOperand(0);
1889fe6060f1SDimitry Andric   } else {
1890fe6060f1SDimitry Andric     SAddr = Addr;
1891fe6060f1SDimitry Andric   }
1892fe6060f1SDimitry Andric 
1893fe6060f1SDimitry Andric   SAddr = SelectSAddrFI(CurDAG, SAddr);
1894fe6060f1SDimitry Andric 
1895fe6060f1SDimitry Andric   const SIInstrInfo *TII = Subtarget->getInstrInfo();
1896fe6060f1SDimitry Andric 
1897fe6060f1SDimitry Andric   if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1898fe6060f1SDimitry Andric                               SIInstrFlags::FlatScratch)) {
1899fe6060f1SDimitry Andric     int64_t SplitImmOffset, RemainderOffset;
1900fe6060f1SDimitry Andric     std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1901fe6060f1SDimitry Andric         COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
1902fe6060f1SDimitry Andric 
1903fe6060f1SDimitry Andric     COffsetVal = SplitImmOffset;
1904fe6060f1SDimitry Andric 
1905fe6060f1SDimitry Andric     SDValue AddOffset =
1906fe6060f1SDimitry Andric         SAddr.getOpcode() == ISD::TargetFrameIndex
1907fe6060f1SDimitry Andric             ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1908fe6060f1SDimitry Andric             : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1909fe6060f1SDimitry Andric     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1910fe6060f1SDimitry Andric                                            SAddr, AddOffset),
1911fe6060f1SDimitry Andric                     0);
1912fe6060f1SDimitry Andric   }
1913fe6060f1SDimitry Andric 
1914*6e516c87SDimitry Andric   Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
1915e8d8bef9SDimitry Andric 
1916e8d8bef9SDimitry Andric   return true;
19170b57cec5SDimitry Andric }
19180b57cec5SDimitry Andric 
191981ad6265SDimitry Andric // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const192081ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
192181ad6265SDimitry Andric     SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
192281ad6265SDimitry Andric   if (!Subtarget->hasFlatScratchSVSSwizzleBug())
192381ad6265SDimitry Andric     return false;
192481ad6265SDimitry Andric 
192581ad6265SDimitry Andric   // The bug affects the swizzling of SVS accesses if there is any carry out
192681ad6265SDimitry Andric   // from the two low order bits (i.e. from bit 1 into bit 2) when adding
192781ad6265SDimitry Andric   // voffset to (soffset + inst_offset).
192881ad6265SDimitry Andric   KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
192981ad6265SDimitry Andric   KnownBits SKnown = KnownBits::computeForAddSub(
19300fca6ea1SDimitry Andric       /*Add=*/true, /*NSW=*/false, /*NUW=*/false,
19310fca6ea1SDimitry Andric       CurDAG->computeKnownBits(SAddr),
193281ad6265SDimitry Andric       KnownBits::makeConstant(APInt(32, ImmOffset)));
193381ad6265SDimitry Andric   uint64_t VMax = VKnown.getMaxValue().getZExtValue();
193481ad6265SDimitry Andric   uint64_t SMax = SKnown.getMaxValue().getZExtValue();
193581ad6265SDimitry Andric   return (VMax & 3) + (SMax & 3) >= 4;
193681ad6265SDimitry Andric }
193781ad6265SDimitry Andric 
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const193881ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
193981ad6265SDimitry Andric                                              SDValue &VAddr, SDValue &SAddr,
194081ad6265SDimitry Andric                                              SDValue &Offset) const  {
194181ad6265SDimitry Andric   int64_t ImmOffset = 0;
194281ad6265SDimitry Andric 
194381ad6265SDimitry Andric   SDValue LHS, RHS;
19445f757f3fSDimitry Andric   SDValue OrigAddr = Addr;
194581ad6265SDimitry Andric   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
194681ad6265SDimitry Andric     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
194781ad6265SDimitry Andric     const SIInstrInfo *TII = Subtarget->getInstrInfo();
194881ad6265SDimitry Andric 
194981ad6265SDimitry Andric     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
195081ad6265SDimitry Andric       Addr = LHS;
195181ad6265SDimitry Andric       ImmOffset = COffsetVal;
195281ad6265SDimitry Andric     } else if (!LHS->isDivergent() && COffsetVal > 0) {
195381ad6265SDimitry Andric       SDLoc SL(N);
195481ad6265SDimitry Andric       // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
195581ad6265SDimitry Andric       //                         (large_offset & MaxOffset);
195681ad6265SDimitry Andric       int64_t SplitImmOffset, RemainderOffset;
195781ad6265SDimitry Andric       std::tie(SplitImmOffset, RemainderOffset)
195881ad6265SDimitry Andric         = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
195981ad6265SDimitry Andric 
196081ad6265SDimitry Andric       if (isUInt<32>(RemainderOffset)) {
196181ad6265SDimitry Andric         SDNode *VMov = CurDAG->getMachineNode(
196281ad6265SDimitry Andric           AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
196381ad6265SDimitry Andric           CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
196481ad6265SDimitry Andric         VAddr = SDValue(VMov, 0);
196581ad6265SDimitry Andric         SAddr = LHS;
19665f757f3fSDimitry Andric         if (!isFlatScratchBaseLegal(Addr))
196706c3fb27SDimitry Andric           return false;
196881ad6265SDimitry Andric         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
196981ad6265SDimitry Andric           return false;
1970*6e516c87SDimitry Andric         Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
197181ad6265SDimitry Andric         return true;
197281ad6265SDimitry Andric       }
197381ad6265SDimitry Andric     }
197481ad6265SDimitry Andric   }
197581ad6265SDimitry Andric 
197681ad6265SDimitry Andric   if (Addr.getOpcode() != ISD::ADD)
197781ad6265SDimitry Andric     return false;
197881ad6265SDimitry Andric 
197981ad6265SDimitry Andric   LHS = Addr.getOperand(0);
198081ad6265SDimitry Andric   RHS = Addr.getOperand(1);
198181ad6265SDimitry Andric 
198281ad6265SDimitry Andric   if (!LHS->isDivergent() && RHS->isDivergent()) {
198381ad6265SDimitry Andric     SAddr = LHS;
198481ad6265SDimitry Andric     VAddr = RHS;
198581ad6265SDimitry Andric   } else if (!RHS->isDivergent() && LHS->isDivergent()) {
198681ad6265SDimitry Andric     SAddr = RHS;
198781ad6265SDimitry Andric     VAddr = LHS;
198881ad6265SDimitry Andric   } else {
198981ad6265SDimitry Andric     return false;
199081ad6265SDimitry Andric   }
199181ad6265SDimitry Andric 
19925f757f3fSDimitry Andric   if (OrigAddr != Addr) {
19935f757f3fSDimitry Andric     if (!isFlatScratchBaseLegalSVImm(OrigAddr))
199406c3fb27SDimitry Andric       return false;
19955f757f3fSDimitry Andric   } else {
19965f757f3fSDimitry Andric     if (!isFlatScratchBaseLegalSV(OrigAddr))
19975f757f3fSDimitry Andric       return false;
19985f757f3fSDimitry Andric   }
199906c3fb27SDimitry Andric 
200081ad6265SDimitry Andric   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
200181ad6265SDimitry Andric     return false;
200281ad6265SDimitry Andric   SAddr = SelectSAddrFI(CurDAG, SAddr);
2003*6e516c87SDimitry Andric   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
200481ad6265SDimitry Andric   return true;
200581ad6265SDimitry Andric }
200681ad6265SDimitry Andric 
20070fca6ea1SDimitry Andric // For unbuffered smem loads, it is illegal for the Immediate Offset to be
20080fca6ea1SDimitry Andric // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
20090fca6ea1SDimitry Andric // Handle the case where the Immediate Offset + SOffset is negative.
isSOffsetLegalWithImmOffset(SDValue * SOffset,bool Imm32Only,bool IsBuffer,int64_t ImmOffset) const20100fca6ea1SDimitry Andric bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
20110fca6ea1SDimitry Andric                                                      bool Imm32Only,
20120fca6ea1SDimitry Andric                                                      bool IsBuffer,
20130fca6ea1SDimitry Andric                                                      int64_t ImmOffset) const {
20140fca6ea1SDimitry Andric   if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
20150fca6ea1SDimitry Andric       AMDGPU::hasSMRDSignedImmOffset(*Subtarget)) {
20160fca6ea1SDimitry Andric     KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
20170fca6ea1SDimitry Andric     if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0)
20180fca6ea1SDimitry Andric       return false;
20190fca6ea1SDimitry Andric   }
20200fca6ea1SDimitry Andric 
20210fca6ea1SDimitry Andric   return true;
20220fca6ea1SDimitry Andric }
20230fca6ea1SDimitry Andric 
2024bdd1243dSDimitry Andric // Match an immediate (if Offset is not null) or an SGPR (if SOffset is
2025bdd1243dSDimitry Andric // not null) offset. If Imm32Only is true, match only 32-bit immediate
2026bdd1243dSDimitry Andric // offsets available on CI.
SelectSMRDOffset(SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer,bool HasSOffset,int64_t ImmOffset) const2027bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
2028fcaf7f86SDimitry Andric                                           SDValue *SOffset, SDValue *Offset,
20290fca6ea1SDimitry Andric                                           bool Imm32Only, bool IsBuffer,
20300fca6ea1SDimitry Andric                                           bool HasSOffset,
20310fca6ea1SDimitry Andric                                           int64_t ImmOffset) const {
2032bdd1243dSDimitry Andric   assert((!SOffset || !Offset) &&
2033bdd1243dSDimitry Andric          "Cannot match both soffset and offset at the same time!");
2034bdd1243dSDimitry Andric 
20350b57cec5SDimitry Andric   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
20365ffd83dbSDimitry Andric   if (!C) {
2037fcaf7f86SDimitry Andric     if (!SOffset)
2038753f127fSDimitry Andric       return false;
20390fca6ea1SDimitry Andric 
20405ffd83dbSDimitry Andric     if (ByteOffsetNode.getValueType().isScalarInteger() &&
20415ffd83dbSDimitry Andric         ByteOffsetNode.getValueType().getSizeInBits() == 32) {
2042fcaf7f86SDimitry Andric       *SOffset = ByteOffsetNode;
20430fca6ea1SDimitry Andric       return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
20440fca6ea1SDimitry Andric                                          ImmOffset);
20455ffd83dbSDimitry Andric     }
20465ffd83dbSDimitry Andric     if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
20475ffd83dbSDimitry Andric       if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
2048fcaf7f86SDimitry Andric         *SOffset = ByteOffsetNode.getOperand(0);
20490fca6ea1SDimitry Andric         return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
20500fca6ea1SDimitry Andric                                            ImmOffset);
20515ffd83dbSDimitry Andric       }
20525ffd83dbSDimitry Andric     }
20530b57cec5SDimitry Andric     return false;
20545ffd83dbSDimitry Andric   }
20550b57cec5SDimitry Andric 
20560b57cec5SDimitry Andric   SDLoc SL(ByteOffsetNode);
2057bdd1243dSDimitry Andric 
2058bdd1243dSDimitry Andric   // GFX9 and GFX10 have signed byte immediate offsets. The immediate
2059bdd1243dSDimitry Andric   // offset for S_BUFFER instructions is unsigned.
2060bdd1243dSDimitry Andric   int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
20610fca6ea1SDimitry Andric   std::optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(
20620fca6ea1SDimitry Andric       *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2063fcaf7f86SDimitry Andric   if (EncodedOffset && Offset && !Imm32Only) {
2064fcaf7f86SDimitry Andric     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
20650b57cec5SDimitry Andric     return true;
20660b57cec5SDimitry Andric   }
20670b57cec5SDimitry Andric 
20685ffd83dbSDimitry Andric   // SGPR and literal offsets are unsigned.
20695ffd83dbSDimitry Andric   if (ByteOffset < 0)
20700b57cec5SDimitry Andric     return false;
20710b57cec5SDimitry Andric 
20725ffd83dbSDimitry Andric   EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
2073fcaf7f86SDimitry Andric   if (EncodedOffset && Offset && Imm32Only) {
2074fcaf7f86SDimitry Andric     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
20755ffd83dbSDimitry Andric     return true;
20760b57cec5SDimitry Andric   }
20775ffd83dbSDimitry Andric 
20785ffd83dbSDimitry Andric   if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
20795ffd83dbSDimitry Andric     return false;
20805ffd83dbSDimitry Andric 
2081fcaf7f86SDimitry Andric   if (SOffset) {
20825ffd83dbSDimitry Andric     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2083fcaf7f86SDimitry Andric     *SOffset = SDValue(
20845ffd83dbSDimitry Andric         CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
20850b57cec5SDimitry Andric     return true;
20860b57cec5SDimitry Andric   }
20870b57cec5SDimitry Andric 
2088753f127fSDimitry Andric   return false;
2089753f127fSDimitry Andric }
2090753f127fSDimitry Andric 
Expand32BitAddress(SDValue Addr) const20910b57cec5SDimitry Andric SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
20920b57cec5SDimitry Andric   if (Addr.getValueType() != MVT::i32)
20930b57cec5SDimitry Andric     return Addr;
20940b57cec5SDimitry Andric 
20950b57cec5SDimitry Andric   // Zero-extend a 32-bit address.
20960b57cec5SDimitry Andric   SDLoc SL(Addr);
20970b57cec5SDimitry Andric 
20980b57cec5SDimitry Andric   const MachineFunction &MF = CurDAG->getMachineFunction();
20990b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
21000b57cec5SDimitry Andric   unsigned AddrHiVal = Info->get32BitAddressHighBits();
21010b57cec5SDimitry Andric   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
21020b57cec5SDimitry Andric 
21030b57cec5SDimitry Andric   const SDValue Ops[] = {
21040b57cec5SDimitry Andric     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
21050b57cec5SDimitry Andric     Addr,
21060b57cec5SDimitry Andric     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
21070b57cec5SDimitry Andric     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
21080b57cec5SDimitry Andric             0),
21090b57cec5SDimitry Andric     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
21100b57cec5SDimitry Andric   };
21110b57cec5SDimitry Andric 
21120b57cec5SDimitry Andric   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
21130b57cec5SDimitry Andric                                         Ops), 0);
21140b57cec5SDimitry Andric }
21150b57cec5SDimitry Andric 
2116bdd1243dSDimitry Andric // Match a base and an immediate (if Offset is not null) or an SGPR (if
2117bdd1243dSDimitry Andric // SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
2118bdd1243dSDimitry Andric // true, match only 32-bit immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer,bool HasSOffset,int64_t ImmOffset) const2119fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
2120fcaf7f86SDimitry Andric                                               SDValue *SOffset, SDValue *Offset,
21210fca6ea1SDimitry Andric                                               bool Imm32Only, bool IsBuffer,
21220fca6ea1SDimitry Andric                                               bool HasSOffset,
21230fca6ea1SDimitry Andric                                               int64_t ImmOffset) const {
2124fcaf7f86SDimitry Andric   if (SOffset && Offset) {
2125bdd1243dSDimitry Andric     assert(!Imm32Only && !IsBuffer);
2126fcaf7f86SDimitry Andric     SDValue B;
21270fca6ea1SDimitry Andric 
21280fca6ea1SDimitry Andric     if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
21290fca6ea1SDimitry Andric       return false;
21300fca6ea1SDimitry Andric 
21310fca6ea1SDimitry Andric     int64_t ImmOff = 0;
21320fca6ea1SDimitry Andric     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
21330fca6ea1SDimitry Andric       ImmOff = C->getSExtValue();
21340fca6ea1SDimitry Andric 
21350fca6ea1SDimitry Andric     return SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true,
21360fca6ea1SDimitry Andric                                 ImmOff);
2137fcaf7f86SDimitry Andric   }
2138fcaf7f86SDimitry Andric 
21390b57cec5SDimitry Andric   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
21400b57cec5SDimitry Andric   // wraparound, because s_load instructions perform the addition in 64 bits.
2141bdd1243dSDimitry Andric   if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
2142bdd1243dSDimitry Andric       !Addr->getFlags().hasNoUnsignedWrap())
2143bdd1243dSDimitry Andric     return false;
2144bdd1243dSDimitry Andric 
21455ffd83dbSDimitry Andric   SDValue N0, N1;
21465ffd83dbSDimitry Andric   // Extract the base and offset if possible.
2147bdd1243dSDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
21485ffd83dbSDimitry Andric     N0 = Addr.getOperand(0);
21495ffd83dbSDimitry Andric     N1 = Addr.getOperand(1);
21505ffd83dbSDimitry Andric   } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
21515ffd83dbSDimitry Andric     assert(N0 && N1 && isa<ConstantSDNode>(N1));
21525ffd83dbSDimitry Andric   }
2153bdd1243dSDimitry Andric   if (!N0 || !N1)
2154bdd1243dSDimitry Andric     return false;
21550fca6ea1SDimitry Andric 
21560fca6ea1SDimitry Andric   if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
21570fca6ea1SDimitry Andric                        ImmOffset)) {
2158fcaf7f86SDimitry Andric     SBase = N0;
2159fcaf7f86SDimitry Andric     return true;
2160fcaf7f86SDimitry Andric   }
21610fca6ea1SDimitry Andric   if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
21620fca6ea1SDimitry Andric                        ImmOffset)) {
2163fcaf7f86SDimitry Andric     SBase = N1;
21640b57cec5SDimitry Andric     return true;
21650b57cec5SDimitry Andric   }
2166753f127fSDimitry Andric   return false;
2167fcaf7f86SDimitry Andric }
2168fcaf7f86SDimitry Andric 
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2169fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2170fcaf7f86SDimitry Andric                                     SDValue *SOffset, SDValue *Offset,
2171fcaf7f86SDimitry Andric                                     bool Imm32Only) const {
2172bdd1243dSDimitry Andric   if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2173fcaf7f86SDimitry Andric     SBase = Expand32BitAddress(SBase);
21740b57cec5SDimitry Andric     return true;
21750b57cec5SDimitry Andric   }
21760b57cec5SDimitry Andric 
2177bdd1243dSDimitry Andric   if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
2178bdd1243dSDimitry Andric     SBase = Expand32BitAddress(Addr);
2179bdd1243dSDimitry Andric     *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2180bdd1243dSDimitry Andric     return true;
2181bdd1243dSDimitry Andric   }
2182bdd1243dSDimitry Andric 
2183bdd1243dSDimitry Andric   return false;
2184bdd1243dSDimitry Andric }
2185bdd1243dSDimitry Andric 
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const21860b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
21870b57cec5SDimitry Andric                                        SDValue &Offset) const {
2188fcaf7f86SDimitry Andric   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
21890b57cec5SDimitry Andric }
21900b57cec5SDimitry Andric 
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const21910b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
21920b57cec5SDimitry Andric                                          SDValue &Offset) const {
21935ffd83dbSDimitry Andric   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2194fcaf7f86SDimitry Andric   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2195fcaf7f86SDimitry Andric                     /* Imm32Only */ true);
21960b57cec5SDimitry Andric }
21970b57cec5SDimitry Andric 
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const21980b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2199fcaf7f86SDimitry Andric                                         SDValue &SOffset) const {
2200fcaf7f86SDimitry Andric   return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2201fcaf7f86SDimitry Andric }
2202fcaf7f86SDimitry Andric 
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2203fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2204fcaf7f86SDimitry Andric                                            SDValue &SOffset,
22050b57cec5SDimitry Andric                                            SDValue &Offset) const {
2206fcaf7f86SDimitry Andric   return SelectSMRD(Addr, SBase, &SOffset, &Offset);
22070b57cec5SDimitry Andric }
22080b57cec5SDimitry Andric 
SelectSMRDBufferImm(SDValue N,SDValue & Offset) const2209bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2210bdd1243dSDimitry Andric   return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2211bdd1243dSDimitry Andric                           /* Imm32Only */ false, /* IsBuffer */ true);
22125ffd83dbSDimitry Andric }
22135ffd83dbSDimitry Andric 
SelectSMRDBufferImm32(SDValue N,SDValue & Offset) const2214bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
22150b57cec5SDimitry Andric                                                SDValue &Offset) const {
22165ffd83dbSDimitry Andric   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2217bdd1243dSDimitry Andric   return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2218bdd1243dSDimitry Andric                           /* Imm32Only */ true, /* IsBuffer */ true);
22195ffd83dbSDimitry Andric }
22200b57cec5SDimitry Andric 
SelectSMRDBufferSgprImm(SDValue N,SDValue & SOffset,SDValue & Offset) const2221bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2222bdd1243dSDimitry Andric                                                  SDValue &Offset) const {
2223bdd1243dSDimitry Andric   // Match the (soffset + offset) pair as a 32-bit register base and
2224bdd1243dSDimitry Andric   // an immediate offset.
2225bdd1243dSDimitry Andric   return N.getValueType() == MVT::i32 &&
2226bdd1243dSDimitry Andric          SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2227bdd1243dSDimitry Andric                               &Offset, /* Imm32Only */ false,
2228bdd1243dSDimitry Andric                               /* IsBuffer */ true);
22290b57cec5SDimitry Andric }
22300b57cec5SDimitry Andric 
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const22310b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
22320b57cec5SDimitry Andric                                             SDValue &Base,
22330b57cec5SDimitry Andric                                             SDValue &Offset) const {
22340b57cec5SDimitry Andric   SDLoc DL(Index);
22350b57cec5SDimitry Andric 
22360b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Index)) {
22370b57cec5SDimitry Andric     SDValue N0 = Index.getOperand(0);
22380b57cec5SDimitry Andric     SDValue N1 = Index.getOperand(1);
22390b57cec5SDimitry Andric     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
22400b57cec5SDimitry Andric 
22410b57cec5SDimitry Andric     // (add n0, c0)
22420b57cec5SDimitry Andric     // Don't peel off the offset (c0) if doing so could possibly lead
22430b57cec5SDimitry Andric     // the base (n0) to be negative.
22445ffd83dbSDimitry Andric     // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
22455ffd83dbSDimitry Andric     if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
22465ffd83dbSDimitry Andric         (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
22470b57cec5SDimitry Andric       Base = N0;
22480b57cec5SDimitry Andric       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
22490b57cec5SDimitry Andric       return true;
22500b57cec5SDimitry Andric     }
22510b57cec5SDimitry Andric   }
22520b57cec5SDimitry Andric 
22530b57cec5SDimitry Andric   if (isa<ConstantSDNode>(Index))
22540b57cec5SDimitry Andric     return false;
22550b57cec5SDimitry Andric 
22560b57cec5SDimitry Andric   Base = Index;
22570b57cec5SDimitry Andric   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
22580b57cec5SDimitry Andric   return true;
22590b57cec5SDimitry Andric }
22600b57cec5SDimitry Andric 
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)2261349cc55cSDimitry Andric SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
22620b57cec5SDimitry Andric                                      SDValue Val, uint32_t Offset,
22630b57cec5SDimitry Andric                                      uint32_t Width) {
2264349cc55cSDimitry Andric   if (Val->isDivergent()) {
2265349cc55cSDimitry Andric     unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2266349cc55cSDimitry Andric     SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
2267349cc55cSDimitry Andric     SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
2268349cc55cSDimitry Andric 
2269349cc55cSDimitry Andric     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2270349cc55cSDimitry Andric   }
2271349cc55cSDimitry Andric   unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
22720b57cec5SDimitry Andric   // Transformation function, pack the offset and width of a BFE into
22730b57cec5SDimitry Andric   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
22740b57cec5SDimitry Andric   // source, bits [5:0] contain the offset and bits [22:16] the width.
22750b57cec5SDimitry Andric   uint32_t PackedVal = Offset | (Width << 16);
22760b57cec5SDimitry Andric   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
22770b57cec5SDimitry Andric 
22780b57cec5SDimitry Andric   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
22790b57cec5SDimitry Andric }
22800b57cec5SDimitry Andric 
SelectS_BFEFromShifts(SDNode * N)22810b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
22820b57cec5SDimitry Andric   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
22830b57cec5SDimitry Andric   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
22840b57cec5SDimitry Andric   // Predicate: 0 < b <= c < 32
22850b57cec5SDimitry Andric 
22860b57cec5SDimitry Andric   const SDValue &Shl = N->getOperand(0);
22870b57cec5SDimitry Andric   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
22880b57cec5SDimitry Andric   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
22890b57cec5SDimitry Andric 
22900b57cec5SDimitry Andric   if (B && C) {
22910b57cec5SDimitry Andric     uint32_t BVal = B->getZExtValue();
22920b57cec5SDimitry Andric     uint32_t CVal = C->getZExtValue();
22930b57cec5SDimitry Andric 
22940b57cec5SDimitry Andric     if (0 < BVal && BVal <= CVal && CVal < 32) {
22950b57cec5SDimitry Andric       bool Signed = N->getOpcode() == ISD::SRA;
2296349cc55cSDimitry Andric       ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
22970b57cec5SDimitry Andric                   32 - CVal));
22980b57cec5SDimitry Andric       return;
22990b57cec5SDimitry Andric     }
23000b57cec5SDimitry Andric   }
23010b57cec5SDimitry Andric   SelectCode(N);
23020b57cec5SDimitry Andric }
23030b57cec5SDimitry Andric 
SelectS_BFE(SDNode * N)23040b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
23050b57cec5SDimitry Andric   switch (N->getOpcode()) {
23060b57cec5SDimitry Andric   case ISD::AND:
23070b57cec5SDimitry Andric     if (N->getOperand(0).getOpcode() == ISD::SRL) {
23080b57cec5SDimitry Andric       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
23090b57cec5SDimitry Andric       // Predicate: isMask(mask)
23100b57cec5SDimitry Andric       const SDValue &Srl = N->getOperand(0);
23110b57cec5SDimitry Andric       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
23120b57cec5SDimitry Andric       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
23130b57cec5SDimitry Andric 
23140b57cec5SDimitry Andric       if (Shift && Mask) {
23150b57cec5SDimitry Andric         uint32_t ShiftVal = Shift->getZExtValue();
23160b57cec5SDimitry Andric         uint32_t MaskVal = Mask->getZExtValue();
23170b57cec5SDimitry Andric 
23180b57cec5SDimitry Andric         if (isMask_32(MaskVal)) {
2319bdd1243dSDimitry Andric           uint32_t WidthVal = llvm::popcount(MaskVal);
2320349cc55cSDimitry Andric           ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2321349cc55cSDimitry Andric                                   WidthVal));
23220b57cec5SDimitry Andric           return;
23230b57cec5SDimitry Andric         }
23240b57cec5SDimitry Andric       }
23250b57cec5SDimitry Andric     }
23260b57cec5SDimitry Andric     break;
23270b57cec5SDimitry Andric   case ISD::SRL:
23280b57cec5SDimitry Andric     if (N->getOperand(0).getOpcode() == ISD::AND) {
23290b57cec5SDimitry Andric       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
23300b57cec5SDimitry Andric       // Predicate: isMask(mask >> b)
23310b57cec5SDimitry Andric       const SDValue &And = N->getOperand(0);
23320b57cec5SDimitry Andric       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
23330b57cec5SDimitry Andric       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
23340b57cec5SDimitry Andric 
23350b57cec5SDimitry Andric       if (Shift && Mask) {
23360b57cec5SDimitry Andric         uint32_t ShiftVal = Shift->getZExtValue();
23370b57cec5SDimitry Andric         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
23380b57cec5SDimitry Andric 
23390b57cec5SDimitry Andric         if (isMask_32(MaskVal)) {
2340bdd1243dSDimitry Andric           uint32_t WidthVal = llvm::popcount(MaskVal);
2341349cc55cSDimitry Andric           ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2342349cc55cSDimitry Andric                       WidthVal));
23430b57cec5SDimitry Andric           return;
23440b57cec5SDimitry Andric         }
23450b57cec5SDimitry Andric       }
23460b57cec5SDimitry Andric     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
23470b57cec5SDimitry Andric       SelectS_BFEFromShifts(N);
23480b57cec5SDimitry Andric       return;
23490b57cec5SDimitry Andric     }
23500b57cec5SDimitry Andric     break;
23510b57cec5SDimitry Andric   case ISD::SRA:
23520b57cec5SDimitry Andric     if (N->getOperand(0).getOpcode() == ISD::SHL) {
23530b57cec5SDimitry Andric       SelectS_BFEFromShifts(N);
23540b57cec5SDimitry Andric       return;
23550b57cec5SDimitry Andric     }
23560b57cec5SDimitry Andric     break;
23570b57cec5SDimitry Andric 
23580b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG: {
23590b57cec5SDimitry Andric     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
23600b57cec5SDimitry Andric     SDValue Src = N->getOperand(0);
23610b57cec5SDimitry Andric     if (Src.getOpcode() != ISD::SRL)
23620b57cec5SDimitry Andric       break;
23630b57cec5SDimitry Andric 
23640b57cec5SDimitry Andric     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
23650b57cec5SDimitry Andric     if (!Amt)
23660b57cec5SDimitry Andric       break;
23670b57cec5SDimitry Andric 
23680b57cec5SDimitry Andric     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2369349cc55cSDimitry Andric     ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
23700b57cec5SDimitry Andric                             Amt->getZExtValue(), Width));
23710b57cec5SDimitry Andric     return;
23720b57cec5SDimitry Andric   }
23730b57cec5SDimitry Andric   }
23740b57cec5SDimitry Andric 
23750b57cec5SDimitry Andric   SelectCode(N);
23760b57cec5SDimitry Andric }
23770b57cec5SDimitry Andric 
isCBranchSCC(const SDNode * N) const23780b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
23790b57cec5SDimitry Andric   assert(N->getOpcode() == ISD::BRCOND);
23800b57cec5SDimitry Andric   if (!N->hasOneUse())
23810b57cec5SDimitry Andric     return false;
23820b57cec5SDimitry Andric 
23830b57cec5SDimitry Andric   SDValue Cond = N->getOperand(1);
23840b57cec5SDimitry Andric   if (Cond.getOpcode() == ISD::CopyToReg)
23850b57cec5SDimitry Andric     Cond = Cond.getOperand(2);
23860b57cec5SDimitry Andric 
23870b57cec5SDimitry Andric   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
23880b57cec5SDimitry Andric     return false;
23890b57cec5SDimitry Andric 
23900b57cec5SDimitry Andric   MVT VT = Cond.getOperand(0).getSimpleValueType();
23910b57cec5SDimitry Andric   if (VT == MVT::i32)
23920b57cec5SDimitry Andric     return true;
23930b57cec5SDimitry Andric 
23940b57cec5SDimitry Andric   if (VT == MVT::i64) {
23950b57cec5SDimitry Andric     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
23960b57cec5SDimitry Andric 
23970b57cec5SDimitry Andric     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
23980b57cec5SDimitry Andric     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
23990b57cec5SDimitry Andric   }
24000b57cec5SDimitry Andric 
24010b57cec5SDimitry Andric   return false;
24020b57cec5SDimitry Andric }
24030b57cec5SDimitry Andric 
combineBallotPattern(SDValue VCMP,bool & Negate)24045f757f3fSDimitry Andric static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) {
24055f757f3fSDimitry Andric   assert(VCMP->getOpcode() == AMDGPUISD::SETCC);
24065f757f3fSDimitry Andric   // Special case for amdgcn.ballot:
24075f757f3fSDimitry Andric   // %Cond = i1 (and/or combination of i1 ISD::SETCCs)
24085f757f3fSDimitry Andric   // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq
24095f757f3fSDimitry Andric   // =>
24105f757f3fSDimitry Andric   // Use i1 %Cond value instead of i(WaveSize) %VCMP.
24115f757f3fSDimitry Andric   // This is possible because divergent ISD::SETCC is selected as V_CMP and
24125f757f3fSDimitry Andric   // Cond becomes a i(WaveSize) full mask value.
24135f757f3fSDimitry Andric   // Note that ballot doesn't use SETEQ condition but its easy to support it
24145f757f3fSDimitry Andric   // here for completeness, so in this case Negate is set true on return.
24155f757f3fSDimitry Andric   auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
24165f757f3fSDimitry Andric   if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) &&
24175f757f3fSDimitry Andric       isNullConstant(VCMP.getOperand(1))) {
24185f757f3fSDimitry Andric 
24195f757f3fSDimitry Andric     auto Cond = VCMP.getOperand(0);
24205f757f3fSDimitry Andric     if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension.
24215f757f3fSDimitry Andric       Cond = Cond.getOperand(0);
24225f757f3fSDimitry Andric 
24235f757f3fSDimitry Andric     if (isBoolSGPR(Cond)) {
24245f757f3fSDimitry Andric       Negate = VCMP_CC == ISD::SETEQ;
24255f757f3fSDimitry Andric       return Cond;
24265f757f3fSDimitry Andric     }
24275f757f3fSDimitry Andric   }
24285f757f3fSDimitry Andric   return SDValue();
24295f757f3fSDimitry Andric }
24305f757f3fSDimitry Andric 
SelectBRCOND(SDNode * N)24310b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
24320b57cec5SDimitry Andric   SDValue Cond = N->getOperand(1);
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric   if (Cond.isUndef()) {
24350b57cec5SDimitry Andric     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
24360b57cec5SDimitry Andric                          N->getOperand(2), N->getOperand(0));
24370b57cec5SDimitry Andric     return;
24380b57cec5SDimitry Andric   }
24390b57cec5SDimitry Andric 
24400b57cec5SDimitry Andric   const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
24410b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST->getRegisterInfo();
24420b57cec5SDimitry Andric 
24430b57cec5SDimitry Andric   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
24445f757f3fSDimitry Andric   bool AndExec = !UseSCCBr;
24455f757f3fSDimitry Andric   bool Negate = false;
24465f757f3fSDimitry Andric 
24475f757f3fSDimitry Andric   if (Cond.getOpcode() == ISD::SETCC &&
24485f757f3fSDimitry Andric       Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
24495f757f3fSDimitry Andric     SDValue VCMP = Cond->getOperand(0);
24505f757f3fSDimitry Andric     auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
24515f757f3fSDimitry Andric     if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
24525f757f3fSDimitry Andric         isNullConstant(Cond->getOperand(1)) &&
24530fca6ea1SDimitry Andric         // We may encounter ballot.i64 in wave32 mode on -O0.
24545f757f3fSDimitry Andric         VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
24555f757f3fSDimitry Andric       // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
24565f757f3fSDimitry Andric       // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
24575f757f3fSDimitry Andric       // BRCOND i1 %C, %BB
24585f757f3fSDimitry Andric       // =>
24595f757f3fSDimitry Andric       // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
24605f757f3fSDimitry Andric       // VCC = COPY i(WaveSize) %VCMP
24615f757f3fSDimitry Andric       // S_CBRANCH_VCCNZ/VCCZ %BB
24625f757f3fSDimitry Andric       Negate = CC == ISD::SETEQ;
24635f757f3fSDimitry Andric       bool NegatedBallot = false;
24645f757f3fSDimitry Andric       if (auto BallotCond = combineBallotPattern(VCMP, NegatedBallot)) {
24655f757f3fSDimitry Andric         Cond = BallotCond;
24665f757f3fSDimitry Andric         UseSCCBr = !BallotCond->isDivergent();
24675f757f3fSDimitry Andric         Negate = Negate ^ NegatedBallot;
24685f757f3fSDimitry Andric       } else {
24695f757f3fSDimitry Andric         // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always
24705f757f3fSDimitry Andric         // selected as V_CMP, but this may change for uniform condition.
24715f757f3fSDimitry Andric         Cond = VCMP;
24725f757f3fSDimitry Andric         UseSCCBr = false;
24735f757f3fSDimitry Andric       }
24745f757f3fSDimitry Andric     }
24755f757f3fSDimitry Andric     // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of
24765f757f3fSDimitry Andric     // V_CMPs resulted from ballot or ballot has uniform condition and SCC is
24775f757f3fSDimitry Andric     // used.
24785f757f3fSDimitry Andric     AndExec = false;
24795f757f3fSDimitry Andric   }
24805f757f3fSDimitry Andric 
24815f757f3fSDimitry Andric   unsigned BrOp =
24825f757f3fSDimitry Andric       UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
24835f757f3fSDimitry Andric                : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
24845ffd83dbSDimitry Andric   Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
24850b57cec5SDimitry Andric   SDLoc SL(N);
24860b57cec5SDimitry Andric 
24875f757f3fSDimitry Andric   if (AndExec) {
24880b57cec5SDimitry Andric     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
24890b57cec5SDimitry Andric     // analyzed what generates the vcc value, so we do not know whether vcc
24900b57cec5SDimitry Andric     // bits for disabled lanes are 0.  Thus we need to mask out bits for
24910b57cec5SDimitry Andric     // disabled lanes.
24920b57cec5SDimitry Andric     //
24930b57cec5SDimitry Andric     // For the case that we select S_CBRANCH_SCC1 and it gets
24940b57cec5SDimitry Andric     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
24950b57cec5SDimitry Andric     // SIInstrInfo::moveToVALU which inserts the S_AND).
24960b57cec5SDimitry Andric     //
24970b57cec5SDimitry Andric     // We could add an analysis of what generates the vcc value here and omit
24980b57cec5SDimitry Andric     // the S_AND when is unnecessary. But it would be better to add a separate
24990b57cec5SDimitry Andric     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
25000b57cec5SDimitry Andric     // catches both cases.
25010b57cec5SDimitry Andric     Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
25020b57cec5SDimitry Andric                                                          : AMDGPU::S_AND_B64,
25030b57cec5SDimitry Andric                      SL, MVT::i1,
25040b57cec5SDimitry Andric                      CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
25050b57cec5SDimitry Andric                                                         : AMDGPU::EXEC,
25060b57cec5SDimitry Andric                                          MVT::i1),
25070b57cec5SDimitry Andric                     Cond),
25080b57cec5SDimitry Andric                    0);
25090b57cec5SDimitry Andric   }
25100b57cec5SDimitry Andric 
25110b57cec5SDimitry Andric   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
25120b57cec5SDimitry Andric   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
25130b57cec5SDimitry Andric                        N->getOperand(2), // Basic Block
25140b57cec5SDimitry Andric                        VCC.getValue(0));
25150b57cec5SDimitry Andric }
25160b57cec5SDimitry Andric 
SelectFP_EXTEND(SDNode * N)25175f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
25185f757f3fSDimitry Andric   if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
25195f757f3fSDimitry Andric       !N->isDivergent()) {
25205f757f3fSDimitry Andric     SDValue Src = N->getOperand(0);
25215f757f3fSDimitry Andric     if (Src.getValueType() == MVT::f16) {
25225f757f3fSDimitry Andric       if (isExtractHiElt(Src, Src)) {
25235f757f3fSDimitry Andric         CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
25245f757f3fSDimitry Andric                              {Src});
25255f757f3fSDimitry Andric         return;
25265f757f3fSDimitry Andric       }
25275f757f3fSDimitry Andric     }
25285f757f3fSDimitry Andric   }
25295f757f3fSDimitry Andric 
25305f757f3fSDimitry Andric   SelectCode(N);
25315f757f3fSDimitry Andric }
25325f757f3fSDimitry Andric 
SelectDSAppendConsume(SDNode * N,unsigned IntrID)25330b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
25340b57cec5SDimitry Andric   // The address is assumed to be uniform, so if it ends up in a VGPR, it will
25350b57cec5SDimitry Andric   // be copied to an SGPR with readfirstlane.
25360b57cec5SDimitry Andric   unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
25370b57cec5SDimitry Andric     AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
25380b57cec5SDimitry Andric 
25390b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
25400b57cec5SDimitry Andric   SDValue Ptr = N->getOperand(2);
25410b57cec5SDimitry Andric   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
25420b57cec5SDimitry Andric   MachineMemOperand *MMO = M->getMemOperand();
25430b57cec5SDimitry Andric   bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
25440b57cec5SDimitry Andric 
25450b57cec5SDimitry Andric   SDValue Offset;
25460b57cec5SDimitry Andric   if (CurDAG->isBaseWithConstantOffset(Ptr)) {
25470b57cec5SDimitry Andric     SDValue PtrBase = Ptr.getOperand(0);
25480b57cec5SDimitry Andric     SDValue PtrOffset = Ptr.getOperand(1);
25490b57cec5SDimitry Andric 
2550297eecfbSDimitry Andric     const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
2551e8d8bef9SDimitry Andric     if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
25520b57cec5SDimitry Andric       N = glueCopyToM0(N, PtrBase);
25530b57cec5SDimitry Andric       Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
25540b57cec5SDimitry Andric     }
25550b57cec5SDimitry Andric   }
25560b57cec5SDimitry Andric 
25570b57cec5SDimitry Andric   if (!Offset) {
25580b57cec5SDimitry Andric     N = glueCopyToM0(N, Ptr);
25590b57cec5SDimitry Andric     Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
25600b57cec5SDimitry Andric   }
25610b57cec5SDimitry Andric 
25620b57cec5SDimitry Andric   SDValue Ops[] = {
25630b57cec5SDimitry Andric     Offset,
25640b57cec5SDimitry Andric     CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
25650b57cec5SDimitry Andric     Chain,
25660b57cec5SDimitry Andric     N->getOperand(N->getNumOperands() - 1) // New glue
25670b57cec5SDimitry Andric   };
25680b57cec5SDimitry Andric 
25690b57cec5SDimitry Andric   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
25700b57cec5SDimitry Andric   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
25710b57cec5SDimitry Andric }
25720b57cec5SDimitry Andric 
2573bdd1243dSDimitry Andric // We need to handle this here because tablegen doesn't support matching
2574bdd1243dSDimitry Andric // instructions with multiple outputs.
SelectDSBvhStackIntrinsic(SDNode * N)2575bdd1243dSDimitry Andric void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2576bdd1243dSDimitry Andric   unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2577bdd1243dSDimitry Andric   SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2578bdd1243dSDimitry Andric                    N->getOperand(5), N->getOperand(0)};
2579bdd1243dSDimitry Andric 
2580bdd1243dSDimitry Andric   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2581bdd1243dSDimitry Andric   MachineMemOperand *MMO = M->getMemOperand();
2582bdd1243dSDimitry Andric   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2583bdd1243dSDimitry Andric   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2584bdd1243dSDimitry Andric }
2585bdd1243dSDimitry Andric 
gwsIntrinToOpcode(unsigned IntrID)25860b57cec5SDimitry Andric static unsigned gwsIntrinToOpcode(unsigned IntrID) {
25870b57cec5SDimitry Andric   switch (IntrID) {
25880b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_init:
25890b57cec5SDimitry Andric     return AMDGPU::DS_GWS_INIT;
25900b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_barrier:
25910b57cec5SDimitry Andric     return AMDGPU::DS_GWS_BARRIER;
25920b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_v:
25930b57cec5SDimitry Andric     return AMDGPU::DS_GWS_SEMA_V;
25940b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_br:
25950b57cec5SDimitry Andric     return AMDGPU::DS_GWS_SEMA_BR;
25960b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_p:
25970b57cec5SDimitry Andric     return AMDGPU::DS_GWS_SEMA_P;
25980b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_release_all:
25990b57cec5SDimitry Andric     return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
26000b57cec5SDimitry Andric   default:
26010b57cec5SDimitry Andric     llvm_unreachable("not a gws intrinsic");
26020b57cec5SDimitry Andric   }
26030b57cec5SDimitry Andric }
26040b57cec5SDimitry Andric 
SelectDS_GWS(SDNode * N,unsigned IntrID)26050b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
26065f757f3fSDimitry Andric   if (!Subtarget->hasGWS() ||
26075f757f3fSDimitry Andric       (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
26085f757f3fSDimitry Andric        !Subtarget->hasGWSSemaReleaseAll())) {
26090b57cec5SDimitry Andric     // Let this error.
26100b57cec5SDimitry Andric     SelectCode(N);
26110b57cec5SDimitry Andric     return;
26120b57cec5SDimitry Andric   }
26130b57cec5SDimitry Andric 
26140b57cec5SDimitry Andric   // Chain, intrinsic ID, vsrc, offset
26150b57cec5SDimitry Andric   const bool HasVSrc = N->getNumOperands() == 4;
26160b57cec5SDimitry Andric   assert(HasVSrc || N->getNumOperands() == 3);
26170b57cec5SDimitry Andric 
26180b57cec5SDimitry Andric   SDLoc SL(N);
26190b57cec5SDimitry Andric   SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
26200b57cec5SDimitry Andric   int ImmOffset = 0;
26210b57cec5SDimitry Andric   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
26220b57cec5SDimitry Andric   MachineMemOperand *MMO = M->getMemOperand();
26230b57cec5SDimitry Andric 
26240b57cec5SDimitry Andric   // Don't worry if the offset ends up in a VGPR. Only one lane will have
26250b57cec5SDimitry Andric   // effect, so SIFixSGPRCopies will validly insert readfirstlane.
26260b57cec5SDimitry Andric 
26270b57cec5SDimitry Andric   // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
26280b57cec5SDimitry Andric   // offset field) % 64. Some versions of the programming guide omit the m0
26290b57cec5SDimitry Andric   // part, or claim it's from offset 0.
26300b57cec5SDimitry Andric   if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
26318bcb0991SDimitry Andric     // If we have a constant offset, try to use the 0 in m0 as the base.
26328bcb0991SDimitry Andric     // TODO: Look into changing the default m0 initialization value. If the
26338bcb0991SDimitry Andric     // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
26348bcb0991SDimitry Andric     // the immediate offset.
26358bcb0991SDimitry Andric     glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
26368bcb0991SDimitry Andric     ImmOffset = ConstOffset->getZExtValue();
26370b57cec5SDimitry Andric   } else {
26380b57cec5SDimitry Andric     if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
26390b57cec5SDimitry Andric       ImmOffset = BaseOffset.getConstantOperandVal(1);
26400b57cec5SDimitry Andric       BaseOffset = BaseOffset.getOperand(0);
26410b57cec5SDimitry Andric     }
26420b57cec5SDimitry Andric 
26430b57cec5SDimitry Andric     // Prefer to do the shift in an SGPR since it should be possible to use m0
26440b57cec5SDimitry Andric     // as the result directly. If it's already an SGPR, it will be eliminated
26450b57cec5SDimitry Andric     // later.
26460b57cec5SDimitry Andric     SDNode *SGPROffset
26470b57cec5SDimitry Andric       = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
26480b57cec5SDimitry Andric                                BaseOffset);
26490b57cec5SDimitry Andric     // Shift to offset in m0
26500b57cec5SDimitry Andric     SDNode *M0Base
26510b57cec5SDimitry Andric       = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
26520b57cec5SDimitry Andric                                SDValue(SGPROffset, 0),
26530b57cec5SDimitry Andric                                CurDAG->getTargetConstant(16, SL, MVT::i32));
26540b57cec5SDimitry Andric     glueCopyToM0(N, SDValue(M0Base, 0));
26550b57cec5SDimitry Andric   }
26560b57cec5SDimitry Andric 
26570b57cec5SDimitry Andric   SDValue Chain = N->getOperand(0);
26580b57cec5SDimitry Andric   SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
26590b57cec5SDimitry Andric 
26600b57cec5SDimitry Andric   const unsigned Opc = gwsIntrinToOpcode(IntrID);
26610b57cec5SDimitry Andric   SmallVector<SDValue, 5> Ops;
26620b57cec5SDimitry Andric   if (HasVSrc)
26638bcb0991SDimitry Andric     Ops.push_back(N->getOperand(2));
26640b57cec5SDimitry Andric   Ops.push_back(OffsetField);
26650b57cec5SDimitry Andric   Ops.push_back(Chain);
26660b57cec5SDimitry Andric 
26670b57cec5SDimitry Andric   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
26680b57cec5SDimitry Andric   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
26690b57cec5SDimitry Andric }
26700b57cec5SDimitry Andric 
SelectInterpP1F16(SDNode * N)26715ffd83dbSDimitry Andric void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
26725ffd83dbSDimitry Andric   if (Subtarget->getLDSBankCount() != 16) {
26735ffd83dbSDimitry Andric     // This is a single instruction with a pattern.
26745ffd83dbSDimitry Andric     SelectCode(N);
26755ffd83dbSDimitry Andric     return;
26765ffd83dbSDimitry Andric   }
26775ffd83dbSDimitry Andric 
26785ffd83dbSDimitry Andric   SDLoc DL(N);
26795ffd83dbSDimitry Andric 
26805ffd83dbSDimitry Andric   // This requires 2 instructions. It is possible to write a pattern to support
26815ffd83dbSDimitry Andric   // this, but the generated isel emitter doesn't correctly deal with multiple
26825ffd83dbSDimitry Andric   // output instructions using the same physical register input. The copy to m0
26835ffd83dbSDimitry Andric   // is incorrectly placed before the second instruction.
26845ffd83dbSDimitry Andric   //
26855ffd83dbSDimitry Andric   // TODO: Match source modifiers.
26865ffd83dbSDimitry Andric   //
26875ffd83dbSDimitry Andric   // def : Pat <
26885ffd83dbSDimitry Andric   //   (int_amdgcn_interp_p1_f16
26895ffd83dbSDimitry Andric   //    (VOP3Mods f32:$src0, i32:$src0_modifiers),
26905ffd83dbSDimitry Andric   //                             (i32 timm:$attrchan), (i32 timm:$attr),
26915ffd83dbSDimitry Andric   //                             (i1 timm:$high), M0),
26925ffd83dbSDimitry Andric   //   (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
26935ffd83dbSDimitry Andric   //       timm:$attrchan, 0,
26945ffd83dbSDimitry Andric   //       (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
26955ffd83dbSDimitry Andric   //   let Predicates = [has16BankLDS];
26965ffd83dbSDimitry Andric   // }
26975ffd83dbSDimitry Andric 
26985ffd83dbSDimitry Andric   // 16 bank LDS
26995ffd83dbSDimitry Andric   SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
27005ffd83dbSDimitry Andric                                       N->getOperand(5), SDValue());
27015ffd83dbSDimitry Andric 
27025ffd83dbSDimitry Andric   SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
27035ffd83dbSDimitry Andric 
27045ffd83dbSDimitry Andric   SDNode *InterpMov =
27055ffd83dbSDimitry Andric     CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
27065ffd83dbSDimitry Andric         CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
27075ffd83dbSDimitry Andric         N->getOperand(3),  // Attr
27085ffd83dbSDimitry Andric         N->getOperand(2),  // Attrchan
27095ffd83dbSDimitry Andric         ToM0.getValue(1) // In glue
27105ffd83dbSDimitry Andric   });
27115ffd83dbSDimitry Andric 
27125ffd83dbSDimitry Andric   SDNode *InterpP1LV =
27135ffd83dbSDimitry Andric     CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
27145ffd83dbSDimitry Andric         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
27155ffd83dbSDimitry Andric         N->getOperand(1), // Src0
27165ffd83dbSDimitry Andric         N->getOperand(3), // Attr
27175ffd83dbSDimitry Andric         N->getOperand(2), // Attrchan
27185ffd83dbSDimitry Andric         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
27195ffd83dbSDimitry Andric         SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
27205ffd83dbSDimitry Andric         N->getOperand(4), // high
27215ffd83dbSDimitry Andric         CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
27225ffd83dbSDimitry Andric         CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
27235ffd83dbSDimitry Andric         SDValue(InterpMov, 1)
27245ffd83dbSDimitry Andric   });
27255ffd83dbSDimitry Andric 
27265ffd83dbSDimitry Andric   CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
27275ffd83dbSDimitry Andric }
27285ffd83dbSDimitry Andric 
SelectINTRINSIC_W_CHAIN(SDNode * N)27290b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2730647cbc5dSDimitry Andric   unsigned IntrID = N->getConstantOperandVal(1);
27310b57cec5SDimitry Andric   switch (IntrID) {
27320b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_append:
27330b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_consume: {
27340b57cec5SDimitry Andric     if (N->getValueType(0) != MVT::i32)
27350b57cec5SDimitry Andric       break;
27360b57cec5SDimitry Andric     SelectDSAppendConsume(N, IntrID);
27370b57cec5SDimitry Andric     return;
27380b57cec5SDimitry Andric   }
2739bdd1243dSDimitry Andric   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2740bdd1243dSDimitry Andric     SelectDSBvhStackIntrinsic(N);
2741bdd1243dSDimitry Andric     return;
27420b57cec5SDimitry Andric   }
27430b57cec5SDimitry Andric 
27440b57cec5SDimitry Andric   SelectCode(N);
27450b57cec5SDimitry Andric }
27460b57cec5SDimitry Andric 
SelectINTRINSIC_WO_CHAIN(SDNode * N)27478bcb0991SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2748647cbc5dSDimitry Andric   unsigned IntrID = N->getConstantOperandVal(0);
27490fca6ea1SDimitry Andric   unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
27500fca6ea1SDimitry Andric   SDNode *ConvGlueNode = N->getGluedNode();
27510fca6ea1SDimitry Andric   if (ConvGlueNode) {
27520fca6ea1SDimitry Andric     // FIXME: Possibly iterate over multiple glue nodes?
27530fca6ea1SDimitry Andric     assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
27540fca6ea1SDimitry Andric     ConvGlueNode = ConvGlueNode->getOperand(0).getNode();
27550fca6ea1SDimitry Andric     ConvGlueNode =
27560fca6ea1SDimitry Andric         CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
27570fca6ea1SDimitry Andric                                MVT::Glue, SDValue(ConvGlueNode, 0));
27580fca6ea1SDimitry Andric   } else {
27590fca6ea1SDimitry Andric     ConvGlueNode = nullptr;
27600fca6ea1SDimitry Andric   }
27618bcb0991SDimitry Andric   switch (IntrID) {
27628bcb0991SDimitry Andric   case Intrinsic::amdgcn_wqm:
27638bcb0991SDimitry Andric     Opcode = AMDGPU::WQM;
27648bcb0991SDimitry Andric     break;
27658bcb0991SDimitry Andric   case Intrinsic::amdgcn_softwqm:
27668bcb0991SDimitry Andric     Opcode = AMDGPU::SOFT_WQM;
27678bcb0991SDimitry Andric     break;
27688bcb0991SDimitry Andric   case Intrinsic::amdgcn_wwm:
2769fe6060f1SDimitry Andric   case Intrinsic::amdgcn_strict_wwm:
2770fe6060f1SDimitry Andric     Opcode = AMDGPU::STRICT_WWM;
2771fe6060f1SDimitry Andric     break;
2772fe6060f1SDimitry Andric   case Intrinsic::amdgcn_strict_wqm:
2773fe6060f1SDimitry Andric     Opcode = AMDGPU::STRICT_WQM;
27748bcb0991SDimitry Andric     break;
27755ffd83dbSDimitry Andric   case Intrinsic::amdgcn_interp_p1_f16:
27765ffd83dbSDimitry Andric     SelectInterpP1F16(N);
27775ffd83dbSDimitry Andric     return;
27788bcb0991SDimitry Andric   default:
27798bcb0991SDimitry Andric     SelectCode(N);
27800fca6ea1SDimitry Andric     break;
27818bcb0991SDimitry Andric   }
27828bcb0991SDimitry Andric 
27830fca6ea1SDimitry Andric   if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
27848bcb0991SDimitry Andric     SDValue Src = N->getOperand(1);
27858bcb0991SDimitry Andric     CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
27868bcb0991SDimitry Andric   }
27878bcb0991SDimitry Andric 
27880fca6ea1SDimitry Andric   if (ConvGlueNode) {
27890fca6ea1SDimitry Andric     SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
27900fca6ea1SDimitry Andric     NewOps.push_back(SDValue(ConvGlueNode, 0));
27910fca6ea1SDimitry Andric     CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
27920fca6ea1SDimitry Andric   }
27930fca6ea1SDimitry Andric }
27940fca6ea1SDimitry Andric 
SelectINTRINSIC_VOID(SDNode * N)27950b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2796647cbc5dSDimitry Andric   unsigned IntrID = N->getConstantOperandVal(1);
27970b57cec5SDimitry Andric   switch (IntrID) {
27980b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_init:
27990b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_barrier:
28000b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_v:
28010b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_br:
28020b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_p:
28030b57cec5SDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_release_all:
28040b57cec5SDimitry Andric     SelectDS_GWS(N, IntrID);
28050b57cec5SDimitry Andric     return;
28060b57cec5SDimitry Andric   default:
28070b57cec5SDimitry Andric     break;
28080b57cec5SDimitry Andric   }
28090b57cec5SDimitry Andric 
28100b57cec5SDimitry Andric   SelectCode(N);
28110b57cec5SDimitry Andric }
28120b57cec5SDimitry Andric 
SelectWAVE_ADDRESS(SDNode * N)28135f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {
28145f757f3fSDimitry Andric   SDValue Log2WaveSize =
28155f757f3fSDimitry Andric     CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);
28165f757f3fSDimitry Andric   CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),
28175f757f3fSDimitry Andric                        {N->getOperand(0), Log2WaveSize});
28185f757f3fSDimitry Andric }
28195f757f3fSDimitry Andric 
SelectSTACKRESTORE(SDNode * N)28205f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {
28215f757f3fSDimitry Andric   SDValue SrcVal = N->getOperand(1);
28225f757f3fSDimitry Andric   if (SrcVal.getValueType() != MVT::i32) {
28235f757f3fSDimitry Andric     SelectCode(N); // Emit default error
28245f757f3fSDimitry Andric     return;
28255f757f3fSDimitry Andric   }
28265f757f3fSDimitry Andric 
28275f757f3fSDimitry Andric   SDValue CopyVal;
28285f757f3fSDimitry Andric   Register SP = TLI->getStackPointerRegisterToSaveRestore();
28295f757f3fSDimitry Andric   SDLoc SL(N);
28305f757f3fSDimitry Andric 
28315f757f3fSDimitry Andric   if (SrcVal.getOpcode() == AMDGPUISD::WAVE_ADDRESS) {
28325f757f3fSDimitry Andric     CopyVal = SrcVal.getOperand(0);
28335f757f3fSDimitry Andric   } else {
28345f757f3fSDimitry Andric     SDValue Log2WaveSize = CurDAG->getTargetConstant(
28355f757f3fSDimitry Andric         Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
28365f757f3fSDimitry Andric 
28375f757f3fSDimitry Andric     if (N->isDivergent()) {
28385f757f3fSDimitry Andric       SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
28395f757f3fSDimitry Andric                                               MVT::i32, SrcVal),
28405f757f3fSDimitry Andric                        0);
28415f757f3fSDimitry Andric     }
28425f757f3fSDimitry Andric 
28435f757f3fSDimitry Andric     CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
28445f757f3fSDimitry Andric                                              {SrcVal, Log2WaveSize}),
28455f757f3fSDimitry Andric                       0);
28465f757f3fSDimitry Andric   }
28475f757f3fSDimitry Andric 
28485f757f3fSDimitry Andric   SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);
28495f757f3fSDimitry Andric   CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);
28505f757f3fSDimitry Andric }
28515f757f3fSDimitry Andric 
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool IsCanonicalizing,bool AllowAbs) const28520b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2853e8d8bef9SDimitry Andric                                             unsigned &Mods,
285406c3fb27SDimitry Andric                                             bool IsCanonicalizing,
2855e8d8bef9SDimitry Andric                                             bool AllowAbs) const {
285606c3fb27SDimitry Andric   Mods = SISrcMods::NONE;
28570b57cec5SDimitry Andric   Src = In;
28580b57cec5SDimitry Andric 
28590b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::FNEG) {
28600b57cec5SDimitry Andric     Mods |= SISrcMods::NEG;
28610b57cec5SDimitry Andric     Src = Src.getOperand(0);
286206c3fb27SDimitry Andric   } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {
286306c3fb27SDimitry Andric     // Fold fsub [+-]0 into fneg. This may not have folded depending on the
286406c3fb27SDimitry Andric     // denormal mode, but we're implicitly canonicalizing in a source operand.
286506c3fb27SDimitry Andric     auto *LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
286606c3fb27SDimitry Andric     if (LHS && LHS->isZero()) {
286706c3fb27SDimitry Andric       Mods |= SISrcMods::NEG;
286806c3fb27SDimitry Andric       Src = Src.getOperand(1);
286906c3fb27SDimitry Andric     }
28700b57cec5SDimitry Andric   }
28710b57cec5SDimitry Andric 
2872e8d8bef9SDimitry Andric   if (AllowAbs && Src.getOpcode() == ISD::FABS) {
28730b57cec5SDimitry Andric     Mods |= SISrcMods::ABS;
28740b57cec5SDimitry Andric     Src = Src.getOperand(0);
28750b57cec5SDimitry Andric   }
28760b57cec5SDimitry Andric 
28770b57cec5SDimitry Andric   return true;
28780b57cec5SDimitry Andric }
28790b57cec5SDimitry Andric 
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const28800b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
28810b57cec5SDimitry Andric                                         SDValue &SrcMods) const {
28820b57cec5SDimitry Andric   unsigned Mods;
288306c3fb27SDimitry Andric   if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/true,
288406c3fb27SDimitry Andric                          /*AllowAbs=*/true)) {
288506c3fb27SDimitry Andric     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
288606c3fb27SDimitry Andric     return true;
288706c3fb27SDimitry Andric   }
288806c3fb27SDimitry Andric 
288906c3fb27SDimitry Andric   return false;
289006c3fb27SDimitry Andric }
289106c3fb27SDimitry Andric 
SelectVOP3ModsNonCanonicalizing(SDValue In,SDValue & Src,SDValue & SrcMods) const289206c3fb27SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
289306c3fb27SDimitry Andric     SDValue In, SDValue &Src, SDValue &SrcMods) const {
289406c3fb27SDimitry Andric   unsigned Mods;
289506c3fb27SDimitry Andric   if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/false,
289606c3fb27SDimitry Andric                          /*AllowAbs=*/true)) {
28970b57cec5SDimitry Andric     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28980b57cec5SDimitry Andric     return true;
28990b57cec5SDimitry Andric   }
29000b57cec5SDimitry Andric 
29010b57cec5SDimitry Andric   return false;
29020b57cec5SDimitry Andric }
29030b57cec5SDimitry Andric 
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2904e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2905e8d8bef9SDimitry Andric                                          SDValue &SrcMods) const {
2906e8d8bef9SDimitry Andric   unsigned Mods;
290706c3fb27SDimitry Andric   if (SelectVOP3ModsImpl(In, Src, Mods,
290806c3fb27SDimitry Andric                          /*IsCanonicalizing=*/true,
290906c3fb27SDimitry Andric                          /*AllowAbs=*/false)) {
2910e8d8bef9SDimitry Andric     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2911e8d8bef9SDimitry Andric     return true;
2912e8d8bef9SDimitry Andric   }
2913e8d8bef9SDimitry Andric 
2914e8d8bef9SDimitry Andric   return false;
2915e8d8bef9SDimitry Andric }
2916e8d8bef9SDimitry Andric 
SelectVOP3NoMods(SDValue In,SDValue & Src) const29170b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
29180b57cec5SDimitry Andric   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
29190b57cec5SDimitry Andric     return false;
29200b57cec5SDimitry Andric 
29210b57cec5SDimitry Andric   Src = In;
29220b57cec5SDimitry Andric   return true;
29230b57cec5SDimitry Andric }
29240b57cec5SDimitry Andric 
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const292581ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
292681ad6265SDimitry Andric                                                SDValue &SrcMods,
292781ad6265SDimitry Andric                                                bool OpSel) const {
292881ad6265SDimitry Andric   unsigned Mods;
292906c3fb27SDimitry Andric   if (SelectVOP3ModsImpl(In, Src, Mods,
293006c3fb27SDimitry Andric                          /*IsCanonicalizing=*/true,
293106c3fb27SDimitry Andric                          /*AllowAbs=*/false)) {
293281ad6265SDimitry Andric     if (OpSel)
293381ad6265SDimitry Andric       Mods |= SISrcMods::OP_SEL_0;
293481ad6265SDimitry Andric     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
293581ad6265SDimitry Andric     return true;
293681ad6265SDimitry Andric   }
293781ad6265SDimitry Andric 
293881ad6265SDimitry Andric   return false;
293981ad6265SDimitry Andric }
294081ad6265SDimitry Andric 
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const294181ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
294281ad6265SDimitry Andric                                            SDValue &SrcMods) const {
294381ad6265SDimitry Andric   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
294481ad6265SDimitry Andric }
294581ad6265SDimitry Andric 
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const294681ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
294781ad6265SDimitry Andric                                              SDValue &SrcMods) const {
294881ad6265SDimitry Andric   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
294981ad6265SDimitry Andric }
295081ad6265SDimitry Andric 
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const29510b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
29520b57cec5SDimitry Andric                                          SDValue &SrcMods, SDValue &Clamp,
29530b57cec5SDimitry Andric                                          SDValue &Omod) const {
29540b57cec5SDimitry Andric   SDLoc DL(In);
29550b57cec5SDimitry Andric   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
29560b57cec5SDimitry Andric   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
29570b57cec5SDimitry Andric 
29580b57cec5SDimitry Andric   return SelectVOP3Mods(In, Src, SrcMods);
29590b57cec5SDimitry Andric }
29600b57cec5SDimitry Andric 
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const2961e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2962e8d8bef9SDimitry Andric                                           SDValue &SrcMods, SDValue &Clamp,
2963e8d8bef9SDimitry Andric                                           SDValue &Omod) const {
2964e8d8bef9SDimitry Andric   SDLoc DL(In);
2965e8d8bef9SDimitry Andric   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2966e8d8bef9SDimitry Andric   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2967e8d8bef9SDimitry Andric 
2968e8d8bef9SDimitry Andric   return SelectVOP3BMods(In, Src, SrcMods);
2969e8d8bef9SDimitry Andric }
2970e8d8bef9SDimitry Andric 
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const29710b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
29720b57cec5SDimitry Andric                                          SDValue &Clamp, SDValue &Omod) const {
29730b57cec5SDimitry Andric   Src = In;
29740b57cec5SDimitry Andric 
29750b57cec5SDimitry Andric   SDLoc DL(In);
29760b57cec5SDimitry Andric   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
29770b57cec5SDimitry Andric   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
29780b57cec5SDimitry Andric 
29790b57cec5SDimitry Andric   return true;
29800b57cec5SDimitry Andric }
29810b57cec5SDimitry Andric 
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const29820b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
298381ad6265SDimitry Andric                                          SDValue &SrcMods, bool IsDOT) const {
298406c3fb27SDimitry Andric   unsigned Mods = SISrcMods::NONE;
29850b57cec5SDimitry Andric   Src = In;
29860b57cec5SDimitry Andric 
298706c3fb27SDimitry Andric   // TODO: Handle G_FSUB 0 as fneg
29880b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::FNEG) {
29890b57cec5SDimitry Andric     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
29900b57cec5SDimitry Andric     Src = Src.getOperand(0);
29910b57cec5SDimitry Andric   }
29920b57cec5SDimitry Andric 
2993bdd1243dSDimitry Andric   if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
299481ad6265SDimitry Andric       (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
29950b57cec5SDimitry Andric     unsigned VecMods = Mods;
29960b57cec5SDimitry Andric 
29970b57cec5SDimitry Andric     SDValue Lo = stripBitcast(Src.getOperand(0));
29980b57cec5SDimitry Andric     SDValue Hi = stripBitcast(Src.getOperand(1));
29990b57cec5SDimitry Andric 
30000b57cec5SDimitry Andric     if (Lo.getOpcode() == ISD::FNEG) {
30010b57cec5SDimitry Andric       Lo = stripBitcast(Lo.getOperand(0));
30020b57cec5SDimitry Andric       Mods ^= SISrcMods::NEG;
30030b57cec5SDimitry Andric     }
30040b57cec5SDimitry Andric 
30050b57cec5SDimitry Andric     if (Hi.getOpcode() == ISD::FNEG) {
30060b57cec5SDimitry Andric       Hi = stripBitcast(Hi.getOperand(0));
30070b57cec5SDimitry Andric       Mods ^= SISrcMods::NEG_HI;
30080b57cec5SDimitry Andric     }
30090b57cec5SDimitry Andric 
30100b57cec5SDimitry Andric     if (isExtractHiElt(Lo, Lo))
30110b57cec5SDimitry Andric       Mods |= SISrcMods::OP_SEL_0;
30120b57cec5SDimitry Andric 
30130b57cec5SDimitry Andric     if (isExtractHiElt(Hi, Hi))
30140b57cec5SDimitry Andric       Mods |= SISrcMods::OP_SEL_1;
30150b57cec5SDimitry Andric 
3016fe6060f1SDimitry Andric     unsigned VecSize = Src.getValueSizeInBits();
30170b57cec5SDimitry Andric     Lo = stripExtractLoElt(Lo);
30180b57cec5SDimitry Andric     Hi = stripExtractLoElt(Hi);
30190b57cec5SDimitry Andric 
3020fe6060f1SDimitry Andric     if (Lo.getValueSizeInBits() > VecSize) {
3021fe6060f1SDimitry Andric       Lo = CurDAG->getTargetExtractSubreg(
3022fe6060f1SDimitry Andric         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3023fe6060f1SDimitry Andric         MVT::getIntegerVT(VecSize), Lo);
3024fe6060f1SDimitry Andric     }
3025fe6060f1SDimitry Andric 
3026fe6060f1SDimitry Andric     if (Hi.getValueSizeInBits() > VecSize) {
3027fe6060f1SDimitry Andric       Hi = CurDAG->getTargetExtractSubreg(
3028fe6060f1SDimitry Andric         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3029fe6060f1SDimitry Andric         MVT::getIntegerVT(VecSize), Hi);
3030fe6060f1SDimitry Andric     }
3031fe6060f1SDimitry Andric 
3032fe6060f1SDimitry Andric     assert(Lo.getValueSizeInBits() <= VecSize &&
3033fe6060f1SDimitry Andric            Hi.getValueSizeInBits() <= VecSize);
3034fe6060f1SDimitry Andric 
30350b57cec5SDimitry Andric     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
30360b57cec5SDimitry Andric       // Really a scalar input. Just select from the low half of the register to
30370b57cec5SDimitry Andric       // avoid packing.
30380b57cec5SDimitry Andric 
3039fe6060f1SDimitry Andric       if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
30400b57cec5SDimitry Andric         Src = Lo;
3041fe6060f1SDimitry Andric       } else {
3042fe6060f1SDimitry Andric         assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
3043fe6060f1SDimitry Andric 
3044fe6060f1SDimitry Andric         SDLoc SL(In);
3045fe6060f1SDimitry Andric         SDValue Undef = SDValue(
3046fe6060f1SDimitry Andric           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3047fe6060f1SDimitry Andric                                  Lo.getValueType()), 0);
3048fe6060f1SDimitry Andric         auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3049fe6060f1SDimitry Andric                                     : AMDGPU::SReg_64RegClassID;
3050fe6060f1SDimitry Andric         const SDValue Ops[] = {
3051fe6060f1SDimitry Andric           CurDAG->getTargetConstant(RC, SL, MVT::i32),
3052fe6060f1SDimitry Andric           Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3053fe6060f1SDimitry Andric           Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3054fe6060f1SDimitry Andric 
3055fe6060f1SDimitry Andric         Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3056fe6060f1SDimitry Andric                                              Src.getValueType(), Ops), 0);
3057fe6060f1SDimitry Andric       }
30580b57cec5SDimitry Andric       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
30590b57cec5SDimitry Andric       return true;
30600b57cec5SDimitry Andric     }
30610b57cec5SDimitry Andric 
3062fe6060f1SDimitry Andric     if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
3063fe6060f1SDimitry Andric       uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
3064fe6060f1SDimitry Andric                       .bitcastToAPInt().getZExtValue();
3065fe6060f1SDimitry Andric       if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
306606c3fb27SDimitry Andric         Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
3067fe6060f1SDimitry Andric         SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3068fe6060f1SDimitry Andric         return true;
3069fe6060f1SDimitry Andric       }
3070fe6060f1SDimitry Andric     }
3071fe6060f1SDimitry Andric 
30720b57cec5SDimitry Andric     Mods = VecMods;
30730b57cec5SDimitry Andric   }
30740b57cec5SDimitry Andric 
30750b57cec5SDimitry Andric   // Packed instructions do not have abs modifiers.
30760b57cec5SDimitry Andric   Mods |= SISrcMods::OP_SEL_1;
30770b57cec5SDimitry Andric 
30780b57cec5SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
30790b57cec5SDimitry Andric   return true;
30800b57cec5SDimitry Andric }
30810b57cec5SDimitry Andric 
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const308281ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
308381ad6265SDimitry Andric                                             SDValue &SrcMods) const {
308481ad6265SDimitry Andric   return SelectVOP3PMods(In, Src, SrcMods, true);
308581ad6265SDimitry Andric }
308681ad6265SDimitry Andric 
SelectVOP3PModsNeg(SDValue In,SDValue & Src) const30877a6dacacSDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
308881ad6265SDimitry Andric   const ConstantSDNode *C = cast<ConstantSDNode>(In);
308981ad6265SDimitry Andric   // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
309081ad6265SDimitry Andric   // 1 promotes packed values to signed, 0 treats them as unsigned.
309181ad6265SDimitry Andric   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
309281ad6265SDimitry Andric 
309381ad6265SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
309406c3fb27SDimitry Andric   unsigned SrcSign = C->getZExtValue();
309581ad6265SDimitry Andric   if (SrcSign == 1)
309681ad6265SDimitry Andric     Mods ^= SISrcMods::NEG;
309781ad6265SDimitry Andric 
309881ad6265SDimitry Andric   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
309981ad6265SDimitry Andric   return true;
310081ad6265SDimitry Andric }
310181ad6265SDimitry Andric 
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const310281ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
310381ad6265SDimitry Andric                                                   SDValue &Src) const {
310481ad6265SDimitry Andric   const ConstantSDNode *C = cast<ConstantSDNode>(In);
310581ad6265SDimitry Andric   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
310681ad6265SDimitry Andric 
310781ad6265SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
310806c3fb27SDimitry Andric   unsigned SrcVal = C->getZExtValue();
310981ad6265SDimitry Andric   if (SrcVal == 1)
311081ad6265SDimitry Andric     Mods |= SISrcMods::OP_SEL_0;
311181ad6265SDimitry Andric 
311281ad6265SDimitry Andric   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
311381ad6265SDimitry Andric   return true;
311481ad6265SDimitry Andric }
311581ad6265SDimitry Andric 
buildRegSequence32(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL)3116b3edf446SDimitry Andric static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts,
3117b3edf446SDimitry Andric                                          llvm::SelectionDAG *CurDAG,
3118b3edf446SDimitry Andric                                          const SDLoc &DL) {
3119b3edf446SDimitry Andric   unsigned DstRegClass;
3120b3edf446SDimitry Andric   EVT DstTy;
3121b3edf446SDimitry Andric   switch (Elts.size()) {
3122b3edf446SDimitry Andric   case 8:
3123b3edf446SDimitry Andric     DstRegClass = AMDGPU::VReg_256RegClassID;
3124b3edf446SDimitry Andric     DstTy = MVT::v8i32;
3125b3edf446SDimitry Andric     break;
3126b3edf446SDimitry Andric   case 4:
3127b3edf446SDimitry Andric     DstRegClass = AMDGPU::VReg_128RegClassID;
3128b3edf446SDimitry Andric     DstTy = MVT::v4i32;
3129b3edf446SDimitry Andric     break;
3130b3edf446SDimitry Andric   case 2:
3131b3edf446SDimitry Andric     DstRegClass = AMDGPU::VReg_64RegClassID;
3132b3edf446SDimitry Andric     DstTy = MVT::v2i32;
3133b3edf446SDimitry Andric     break;
3134b3edf446SDimitry Andric   default:
3135b3edf446SDimitry Andric     llvm_unreachable("unhandled Reg sequence size");
3136b3edf446SDimitry Andric   }
3137b3edf446SDimitry Andric 
3138b3edf446SDimitry Andric   SmallVector<SDValue, 17> Ops;
3139b3edf446SDimitry Andric   Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32));
3140b3edf446SDimitry Andric   for (unsigned i = 0; i < Elts.size(); ++i) {
3141b3edf446SDimitry Andric     Ops.push_back(Elts[i]);
3142b3edf446SDimitry Andric     Ops.push_back(CurDAG->getTargetConstant(
3143b3edf446SDimitry Andric         SIRegisterInfo::getSubRegFromChannel(i), DL, MVT::i32));
3144b3edf446SDimitry Andric   }
3145b3edf446SDimitry Andric   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);
3146b3edf446SDimitry Andric }
3147b3edf446SDimitry Andric 
buildRegSequence16(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL)3148b3edf446SDimitry Andric static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts,
3149b3edf446SDimitry Andric                                          llvm::SelectionDAG *CurDAG,
3150b3edf446SDimitry Andric                                          const SDLoc &DL) {
3151b3edf446SDimitry Andric   SmallVector<SDValue, 8> PackedElts;
3152b3edf446SDimitry Andric   assert("unhandled Reg sequence size" &&
3153b3edf446SDimitry Andric          (Elts.size() == 8 || Elts.size() == 16));
3154b3edf446SDimitry Andric 
3155b3edf446SDimitry Andric   // Pack 16-bit elements in pairs into 32-bit register. If both elements are
3156b3edf446SDimitry Andric   // unpacked from 32-bit source use it, otherwise pack them using v_perm.
3157b3edf446SDimitry Andric   for (unsigned i = 0; i < Elts.size(); i += 2) {
3158b3edf446SDimitry Andric     SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3159b3edf446SDimitry Andric     SDValue HiSrc;
3160b3edf446SDimitry Andric     if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {
3161b3edf446SDimitry Andric       PackedElts.push_back(HiSrc);
3162b3edf446SDimitry Andric     } else {
3163b3edf446SDimitry Andric       SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32);
3164b3edf446SDimitry Andric       MachineSDNode *Packed =
3165b3edf446SDimitry Andric           CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,
3166b3edf446SDimitry Andric                                  {Elts[i + 1], Elts[i], PackLoLo});
3167b3edf446SDimitry Andric       PackedElts.push_back(SDValue(Packed, 0));
3168b3edf446SDimitry Andric     }
3169b3edf446SDimitry Andric   }
3170b3edf446SDimitry Andric 
3171b3edf446SDimitry Andric   return buildRegSequence32(PackedElts, CurDAG, DL);
3172b3edf446SDimitry Andric }
3173b3edf446SDimitry Andric 
buildRegSequence(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL,unsigned ElementSize)3174b3edf446SDimitry Andric static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts,
3175b3edf446SDimitry Andric                                        llvm::SelectionDAG *CurDAG,
3176b3edf446SDimitry Andric                                        const SDLoc &DL, unsigned ElementSize) {
3177b3edf446SDimitry Andric   if (ElementSize == 16)
3178b3edf446SDimitry Andric     return buildRegSequence16(Elts, CurDAG, DL);
3179b3edf446SDimitry Andric   if (ElementSize == 32)
3180b3edf446SDimitry Andric     return buildRegSequence32(Elts, CurDAG, DL);
3181b3edf446SDimitry Andric   llvm_unreachable("Unhandled element size");
3182b3edf446SDimitry Andric }
3183b3edf446SDimitry Andric 
selectWMMAModsNegAbs(unsigned ModOpcode,unsigned & Mods,SmallVectorImpl<SDValue> & Elts,SDValue & Src,llvm::SelectionDAG * CurDAG,const SDLoc & DL,unsigned ElementSize)3184b3edf446SDimitry Andric static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
3185b3edf446SDimitry Andric                                  SmallVectorImpl<SDValue> &Elts, SDValue &Src,
3186b3edf446SDimitry Andric                                  llvm::SelectionDAG *CurDAG, const SDLoc &DL,
3187b3edf446SDimitry Andric                                  unsigned ElementSize) {
3188b3edf446SDimitry Andric   if (ModOpcode == ISD::FNEG) {
3189b3edf446SDimitry Andric     Mods |= SISrcMods::NEG;
3190b3edf446SDimitry Andric     // Check if all elements also have abs modifier
3191b3edf446SDimitry Andric     SmallVector<SDValue, 8> NegAbsElts;
3192b3edf446SDimitry Andric     for (auto El : Elts) {
3193b3edf446SDimitry Andric       if (El.getOpcode() != ISD::FABS)
3194b3edf446SDimitry Andric         break;
3195b3edf446SDimitry Andric       NegAbsElts.push_back(El->getOperand(0));
3196b3edf446SDimitry Andric     }
3197b3edf446SDimitry Andric     if (Elts.size() != NegAbsElts.size()) {
3198b3edf446SDimitry Andric       // Neg
3199b3edf446SDimitry Andric       Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
3200b3edf446SDimitry Andric     } else {
3201b3edf446SDimitry Andric       // Neg and Abs
3202b3edf446SDimitry Andric       Mods |= SISrcMods::NEG_HI;
3203b3edf446SDimitry Andric       Src = SDValue(buildRegSequence(NegAbsElts, CurDAG, DL, ElementSize), 0);
3204b3edf446SDimitry Andric     }
3205b3edf446SDimitry Andric   } else {
3206b3edf446SDimitry Andric     assert(ModOpcode == ISD::FABS);
3207b3edf446SDimitry Andric     // Abs
3208b3edf446SDimitry Andric     Mods |= SISrcMods::NEG_HI;
3209b3edf446SDimitry Andric     Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
3210b3edf446SDimitry Andric   }
3211b3edf446SDimitry Andric }
3212b3edf446SDimitry Andric 
3213b3edf446SDimitry Andric // Check all f16 elements for modifiers while looking through b32 and v2b16
3214b3edf446SDimitry Andric // build vector, stop if element does not satisfy ModifierCheck.
3215b3edf446SDimitry Andric static void
checkWMMAElementsModifiersF16(BuildVectorSDNode * BV,std::function<bool (SDValue)> ModifierCheck)3216b3edf446SDimitry Andric checkWMMAElementsModifiersF16(BuildVectorSDNode *BV,
3217b3edf446SDimitry Andric                               std::function<bool(SDValue)> ModifierCheck) {
3218b3edf446SDimitry Andric   for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3219b3edf446SDimitry Andric     if (auto *F16Pair =
3220b3edf446SDimitry Andric             dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) {
3221b3edf446SDimitry Andric       for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3222b3edf446SDimitry Andric         SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3223b3edf446SDimitry Andric         if (!ModifierCheck(ElF16))
3224b3edf446SDimitry Andric           break;
3225b3edf446SDimitry Andric       }
3226b3edf446SDimitry Andric     }
3227b3edf446SDimitry Andric   }
3228b3edf446SDimitry Andric }
3229b3edf446SDimitry Andric 
SelectWMMAModsF16Neg(SDValue In,SDValue & Src,SDValue & SrcMods) const3230b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,
3231b3edf446SDimitry Andric                                               SDValue &SrcMods) const {
3232b3edf446SDimitry Andric   Src = In;
3233b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
3234b3edf446SDimitry Andric 
3235b3edf446SDimitry Andric   // mods are on f16 elements
3236b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3237b3edf446SDimitry Andric     SmallVector<SDValue, 8> EltsF16;
3238b3edf446SDimitry Andric 
3239b3edf446SDimitry Andric     checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool {
3240b3edf446SDimitry Andric       if (Element.getOpcode() != ISD::FNEG)
3241b3edf446SDimitry Andric         return false;
3242b3edf446SDimitry Andric       EltsF16.push_back(Element.getOperand(0));
3243b3edf446SDimitry Andric       return true;
3244b3edf446SDimitry Andric     });
3245b3edf446SDimitry Andric 
3246b3edf446SDimitry Andric     // All elements have neg modifier
3247b3edf446SDimitry Andric     if (BV->getNumOperands() * 2 == EltsF16.size()) {
3248b3edf446SDimitry Andric       Src = SDValue(buildRegSequence16(EltsF16, CurDAG, SDLoc(In)), 0);
3249b3edf446SDimitry Andric       Mods |= SISrcMods::NEG;
3250b3edf446SDimitry Andric       Mods |= SISrcMods::NEG_HI;
3251b3edf446SDimitry Andric     }
3252b3edf446SDimitry Andric   }
3253b3edf446SDimitry Andric 
3254b3edf446SDimitry Andric   // mods are on v2f16 elements
3255b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3256b3edf446SDimitry Andric     SmallVector<SDValue, 8> EltsV2F16;
3257b3edf446SDimitry Andric     for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3258b3edf446SDimitry Andric       SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3259b3edf446SDimitry Andric       // Based on first element decide which mod we match, neg or abs
3260b3edf446SDimitry Andric       if (ElV2f16.getOpcode() != ISD::FNEG)
3261b3edf446SDimitry Andric         break;
3262b3edf446SDimitry Andric       EltsV2F16.push_back(ElV2f16.getOperand(0));
3263b3edf446SDimitry Andric     }
3264b3edf446SDimitry Andric 
3265b3edf446SDimitry Andric     // All pairs of elements have neg modifier
3266b3edf446SDimitry Andric     if (BV->getNumOperands() == EltsV2F16.size()) {
3267b3edf446SDimitry Andric       Src = SDValue(buildRegSequence32(EltsV2F16, CurDAG, SDLoc(In)), 0);
3268b3edf446SDimitry Andric       Mods |= SISrcMods::NEG;
3269b3edf446SDimitry Andric       Mods |= SISrcMods::NEG_HI;
3270b3edf446SDimitry Andric     }
3271b3edf446SDimitry Andric   }
3272b3edf446SDimitry Andric 
3273b3edf446SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3274b3edf446SDimitry Andric   return true;
3275b3edf446SDimitry Andric }
3276b3edf446SDimitry Andric 
SelectWMMAModsF16NegAbs(SDValue In,SDValue & Src,SDValue & SrcMods) const3277b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
3278b3edf446SDimitry Andric                                                  SDValue &SrcMods) const {
3279b3edf446SDimitry Andric   Src = In;
3280b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
3281b3edf446SDimitry Andric   unsigned ModOpcode;
3282b3edf446SDimitry Andric 
3283b3edf446SDimitry Andric   // mods are on f16 elements
3284b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3285b3edf446SDimitry Andric     SmallVector<SDValue, 8> EltsF16;
3286b3edf446SDimitry Andric     checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool {
3287b3edf446SDimitry Andric       // Based on first element decide which mod we match, neg or abs
3288b3edf446SDimitry Andric       if (EltsF16.empty())
3289b3edf446SDimitry Andric         ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3290b3edf446SDimitry Andric       if (ElF16.getOpcode() != ModOpcode)
3291b3edf446SDimitry Andric         return false;
3292b3edf446SDimitry Andric       EltsF16.push_back(ElF16.getOperand(0));
3293b3edf446SDimitry Andric       return true;
3294b3edf446SDimitry Andric     });
3295b3edf446SDimitry Andric 
3296b3edf446SDimitry Andric     // All elements have ModOpcode modifier
3297b3edf446SDimitry Andric     if (BV->getNumOperands() * 2 == EltsF16.size())
3298b3edf446SDimitry Andric       selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, CurDAG, SDLoc(In),
3299b3edf446SDimitry Andric                            16);
3300b3edf446SDimitry Andric   }
3301b3edf446SDimitry Andric 
3302b3edf446SDimitry Andric   // mods are on v2f16 elements
3303b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3304b3edf446SDimitry Andric     SmallVector<SDValue, 8> EltsV2F16;
3305b3edf446SDimitry Andric 
3306b3edf446SDimitry Andric     for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3307b3edf446SDimitry Andric       SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3308b3edf446SDimitry Andric       // Based on first element decide which mod we match, neg or abs
3309b3edf446SDimitry Andric       if (EltsV2F16.empty())
3310b3edf446SDimitry Andric         ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3311b3edf446SDimitry Andric       if (ElV2f16->getOpcode() != ModOpcode)
3312b3edf446SDimitry Andric         break;
3313b3edf446SDimitry Andric       EltsV2F16.push_back(ElV2f16->getOperand(0));
3314b3edf446SDimitry Andric     }
3315b3edf446SDimitry Andric 
3316b3edf446SDimitry Andric     // All elements have ModOpcode modifier
3317b3edf446SDimitry Andric     if (BV->getNumOperands() == EltsV2F16.size())
3318b3edf446SDimitry Andric       selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, CurDAG, SDLoc(In),
3319b3edf446SDimitry Andric                            32);
3320b3edf446SDimitry Andric   }
3321b3edf446SDimitry Andric 
3322b3edf446SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3323b3edf446SDimitry Andric   return true;
3324b3edf446SDimitry Andric }
3325b3edf446SDimitry Andric 
SelectWMMAModsF32NegAbs(SDValue In,SDValue & Src,SDValue & SrcMods) const3326b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
3327b3edf446SDimitry Andric                                                  SDValue &SrcMods) const {
3328b3edf446SDimitry Andric   Src = In;
3329b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
3330b3edf446SDimitry Andric   SmallVector<SDValue, 8> EltsF32;
3331b3edf446SDimitry Andric 
3332b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
33330fca6ea1SDimitry Andric     assert(BV->getNumOperands() > 0);
33340fca6ea1SDimitry Andric     // Based on first element decide which mod we match, neg or abs
33350fca6ea1SDimitry Andric     SDValue ElF32 = stripBitcast(BV->getOperand(0));
33360fca6ea1SDimitry Andric     unsigned ModOpcode =
33370fca6ea1SDimitry Andric         (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3338b3edf446SDimitry Andric     for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3339b3edf446SDimitry Andric       SDValue ElF32 = stripBitcast(BV->getOperand(i));
3340b3edf446SDimitry Andric       if (ElF32.getOpcode() != ModOpcode)
3341b3edf446SDimitry Andric         break;
3342b3edf446SDimitry Andric       EltsF32.push_back(ElF32.getOperand(0));
3343b3edf446SDimitry Andric     }
3344b3edf446SDimitry Andric 
3345b3edf446SDimitry Andric     // All elements had ModOpcode modifier
3346b3edf446SDimitry Andric     if (BV->getNumOperands() == EltsF32.size())
3347b3edf446SDimitry Andric       selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, CurDAG, SDLoc(In),
3348b3edf446SDimitry Andric                            32);
3349b3edf446SDimitry Andric   }
3350b3edf446SDimitry Andric 
3351b3edf446SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3352b3edf446SDimitry Andric   return true;
3353b3edf446SDimitry Andric }
3354b3edf446SDimitry Andric 
SelectWMMAVISrc(SDValue In,SDValue & Src) const3355b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {
3356b3edf446SDimitry Andric   if (auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3357b3edf446SDimitry Andric     BitVector UndefElements;
3358b3edf446SDimitry Andric     if (SDValue Splat = BV->getSplatValue(&UndefElements))
3359b3edf446SDimitry Andric       if (isInlineImmediate(Splat.getNode())) {
3360b3edf446SDimitry Andric         if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) {
3361b3edf446SDimitry Andric           unsigned Imm = C->getAPIntValue().getSExtValue();
3362b3edf446SDimitry Andric           Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3363b3edf446SDimitry Andric           return true;
3364b3edf446SDimitry Andric         }
3365b3edf446SDimitry Andric         if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat)) {
3366b3edf446SDimitry Andric           unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
3367b3edf446SDimitry Andric           Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3368b3edf446SDimitry Andric           return true;
3369b3edf446SDimitry Andric         }
3370b3edf446SDimitry Andric         llvm_unreachable("unhandled Constant node");
3371b3edf446SDimitry Andric       }
3372b3edf446SDimitry Andric   }
3373b3edf446SDimitry Andric 
3374b3edf446SDimitry Andric   // 16 bit splat
3375b3edf446SDimitry Andric   SDValue SplatSrc32 = stripBitcast(In);
33760fca6ea1SDimitry Andric   if (auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3377b3edf446SDimitry Andric     if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3378b3edf446SDimitry Andric       SDValue SplatSrc16 = stripBitcast(Splat32);
33790fca6ea1SDimitry Andric       if (auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3380b3edf446SDimitry Andric         if (SDValue Splat = SplatSrc16BV->getSplatValue()) {
3381b3edf446SDimitry Andric           const SIInstrInfo *TII = Subtarget->getInstrInfo();
33820fca6ea1SDimitry Andric           std::optional<APInt> RawValue;
33830fca6ea1SDimitry Andric           if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat))
33840fca6ea1SDimitry Andric             RawValue = C->getValueAPF().bitcastToAPInt();
33850fca6ea1SDimitry Andric           else if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat))
33860fca6ea1SDimitry Andric             RawValue = C->getAPIntValue();
33870fca6ea1SDimitry Andric 
33880fca6ea1SDimitry Andric           if (RawValue.has_value()) {
33890fca6ea1SDimitry Andric             EVT VT = In.getValueType().getScalarType();
33900fca6ea1SDimitry Andric             if (VT.getSimpleVT() == MVT::f16 || VT.getSimpleVT() == MVT::bf16) {
33910fca6ea1SDimitry Andric               APFloat FloatVal(VT.getSimpleVT() == MVT::f16
33920fca6ea1SDimitry Andric                                    ? APFloatBase::IEEEhalf()
33930fca6ea1SDimitry Andric                                    : APFloatBase::BFloat(),
33940fca6ea1SDimitry Andric                                RawValue.value());
33950fca6ea1SDimitry Andric               if (TII->isInlineConstant(FloatVal)) {
33960fca6ea1SDimitry Andric                 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
33970fca6ea1SDimitry Andric                                                 MVT::i16);
3398b3edf446SDimitry Andric                 return true;
3399b3edf446SDimitry Andric               }
34000fca6ea1SDimitry Andric             } else if (VT.getSimpleVT() == MVT::i16) {
34010fca6ea1SDimitry Andric               if (TII->isInlineConstant(RawValue.value())) {
34020fca6ea1SDimitry Andric                 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
34030fca6ea1SDimitry Andric                                                 MVT::i16);
34040fca6ea1SDimitry Andric                 return true;
3405b3edf446SDimitry Andric               }
34060fca6ea1SDimitry Andric             } else
34070fca6ea1SDimitry Andric               llvm_unreachable("unknown 16-bit type");
3408b3edf446SDimitry Andric           }
3409b3edf446SDimitry Andric         }
3410b3edf446SDimitry Andric     }
3411b3edf446SDimitry Andric 
3412b3edf446SDimitry Andric   return false;
3413b3edf446SDimitry Andric }
3414b3edf446SDimitry Andric 
SelectSWMMACIndex8(SDValue In,SDValue & Src,SDValue & IndexKey) const3415b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,
3416b3edf446SDimitry Andric                                             SDValue &IndexKey) const {
3417b3edf446SDimitry Andric   unsigned Key = 0;
3418b3edf446SDimitry Andric   Src = In;
3419b3edf446SDimitry Andric 
3420b3edf446SDimitry Andric   if (In.getOpcode() == ISD::SRL) {
3421b3edf446SDimitry Andric     const llvm::SDValue &ShiftSrc = In.getOperand(0);
3422b3edf446SDimitry Andric     ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
3423b3edf446SDimitry Andric     if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
3424b3edf446SDimitry Andric         ShiftAmt->getZExtValue() % 8 == 0) {
3425b3edf446SDimitry Andric       Key = ShiftAmt->getZExtValue() / 8;
3426b3edf446SDimitry Andric       Src = ShiftSrc;
3427b3edf446SDimitry Andric     }
3428b3edf446SDimitry Andric   }
3429b3edf446SDimitry Andric 
3430b3edf446SDimitry Andric   IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3431b3edf446SDimitry Andric   return true;
3432b3edf446SDimitry Andric }
3433b3edf446SDimitry Andric 
SelectSWMMACIndex16(SDValue In,SDValue & Src,SDValue & IndexKey) const3434b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
3435b3edf446SDimitry Andric                                              SDValue &IndexKey) const {
3436b3edf446SDimitry Andric   unsigned Key = 0;
3437b3edf446SDimitry Andric   Src = In;
3438b3edf446SDimitry Andric 
3439b3edf446SDimitry Andric   if (In.getOpcode() == ISD::SRL) {
3440b3edf446SDimitry Andric     const llvm::SDValue &ShiftSrc = In.getOperand(0);
3441b3edf446SDimitry Andric     ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
3442b3edf446SDimitry Andric     if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
3443b3edf446SDimitry Andric         ShiftAmt->getZExtValue() == 16) {
3444b3edf446SDimitry Andric       Key = 1;
3445b3edf446SDimitry Andric       Src = ShiftSrc;
3446b3edf446SDimitry Andric     }
3447b3edf446SDimitry Andric   }
3448b3edf446SDimitry Andric 
3449b3edf446SDimitry Andric   IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3450b3edf446SDimitry Andric   return true;
3451b3edf446SDimitry Andric }
3452b3edf446SDimitry Andric 
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const34530b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
34540b57cec5SDimitry Andric                                          SDValue &SrcMods) const {
34550b57cec5SDimitry Andric   Src = In;
34560b57cec5SDimitry Andric   // FIXME: Handle op_sel
34570b57cec5SDimitry Andric   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
34580b57cec5SDimitry Andric   return true;
34590b57cec5SDimitry Andric }
34600b57cec5SDimitry Andric 
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const34610b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
34620b57cec5SDimitry Andric                                              SDValue &SrcMods) const {
34630b57cec5SDimitry Andric   // FIXME: Handle op_sel
34640b57cec5SDimitry Andric   return SelectVOP3Mods(In, Src, SrcMods);
34650b57cec5SDimitry Andric }
34660b57cec5SDimitry Andric 
34670b57cec5SDimitry Andric // The return value is not whether the match is possible (which it always is),
34680b57cec5SDimitry Andric // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const34690b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
34700b57cec5SDimitry Andric                                                    unsigned &Mods) const {
34710b57cec5SDimitry Andric   Mods = 0;
34720b57cec5SDimitry Andric   SelectVOP3ModsImpl(In, Src, Mods);
34730b57cec5SDimitry Andric 
34740b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::FP_EXTEND) {
34750b57cec5SDimitry Andric     Src = Src.getOperand(0);
34760b57cec5SDimitry Andric     assert(Src.getValueType() == MVT::f16);
34770b57cec5SDimitry Andric     Src = stripBitcast(Src);
34780b57cec5SDimitry Andric 
34790b57cec5SDimitry Andric     // Be careful about folding modifiers if we already have an abs. fneg is
34800b57cec5SDimitry Andric     // applied last, so we don't want to apply an earlier fneg.
34810b57cec5SDimitry Andric     if ((Mods & SISrcMods::ABS) == 0) {
34820b57cec5SDimitry Andric       unsigned ModsTmp;
34830b57cec5SDimitry Andric       SelectVOP3ModsImpl(Src, Src, ModsTmp);
34840b57cec5SDimitry Andric 
34850b57cec5SDimitry Andric       if ((ModsTmp & SISrcMods::NEG) != 0)
34860b57cec5SDimitry Andric         Mods ^= SISrcMods::NEG;
34870b57cec5SDimitry Andric 
34880b57cec5SDimitry Andric       if ((ModsTmp & SISrcMods::ABS) != 0)
34890b57cec5SDimitry Andric         Mods |= SISrcMods::ABS;
34900b57cec5SDimitry Andric     }
34910b57cec5SDimitry Andric 
34920b57cec5SDimitry Andric     // op_sel/op_sel_hi decide the source type and source.
34930b57cec5SDimitry Andric     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
34940b57cec5SDimitry Andric     // If the sources's op_sel is set, it picks the high half of the source
34950b57cec5SDimitry Andric     // register.
34960b57cec5SDimitry Andric 
34970b57cec5SDimitry Andric     Mods |= SISrcMods::OP_SEL_1;
34980b57cec5SDimitry Andric     if (isExtractHiElt(Src, Src)) {
34990b57cec5SDimitry Andric       Mods |= SISrcMods::OP_SEL_0;
35000b57cec5SDimitry Andric 
35010b57cec5SDimitry Andric       // TODO: Should we try to look for neg/abs here?
35020b57cec5SDimitry Andric     }
35030b57cec5SDimitry Andric 
35040b57cec5SDimitry Andric     return true;
35050b57cec5SDimitry Andric   }
35060b57cec5SDimitry Andric 
35070b57cec5SDimitry Andric   return false;
35080b57cec5SDimitry Andric }
35090b57cec5SDimitry Andric 
SelectVOP3PMadMixModsExt(SDValue In,SDValue & Src,SDValue & SrcMods) const351006c3fb27SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
351106c3fb27SDimitry Andric                                                   SDValue &SrcMods) const {
351206c3fb27SDimitry Andric   unsigned Mods = 0;
351306c3fb27SDimitry Andric   if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
351406c3fb27SDimitry Andric     return false;
351506c3fb27SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
351606c3fb27SDimitry Andric   return true;
351706c3fb27SDimitry Andric }
351806c3fb27SDimitry Andric 
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const35190b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
35200b57cec5SDimitry Andric                                                SDValue &SrcMods) const {
35210b57cec5SDimitry Andric   unsigned Mods = 0;
35220b57cec5SDimitry Andric   SelectVOP3PMadMixModsImpl(In, Src, Mods);
35230b57cec5SDimitry Andric   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
35240b57cec5SDimitry Andric   return true;
35250b57cec5SDimitry Andric }
35260b57cec5SDimitry Andric 
getHi16Elt(SDValue In) const35270b57cec5SDimitry Andric SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
35280b57cec5SDimitry Andric   if (In.isUndef())
35290b57cec5SDimitry Andric     return CurDAG->getUNDEF(MVT::i32);
35300b57cec5SDimitry Andric 
35310b57cec5SDimitry Andric   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
35320b57cec5SDimitry Andric     SDLoc SL(In);
35330b57cec5SDimitry Andric     return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
35340b57cec5SDimitry Andric   }
35350b57cec5SDimitry Andric 
35360b57cec5SDimitry Andric   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
35370b57cec5SDimitry Andric     SDLoc SL(In);
35380b57cec5SDimitry Andric     return CurDAG->getConstant(
35390b57cec5SDimitry Andric       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
35400b57cec5SDimitry Andric   }
35410b57cec5SDimitry Andric 
35420b57cec5SDimitry Andric   SDValue Src;
35430b57cec5SDimitry Andric   if (isExtractHiElt(In, Src))
35440b57cec5SDimitry Andric     return Src;
35450b57cec5SDimitry Andric 
35460b57cec5SDimitry Andric   return SDValue();
35470b57cec5SDimitry Andric }
35480b57cec5SDimitry Andric 
isVGPRImm(const SDNode * N) const35490b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
35500b57cec5SDimitry Andric   assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
35510b57cec5SDimitry Andric 
35520b57cec5SDimitry Andric   const SIRegisterInfo *SIRI =
35530b57cec5SDimitry Andric     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
35540b57cec5SDimitry Andric   const SIInstrInfo * SII =
35550b57cec5SDimitry Andric     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
35560b57cec5SDimitry Andric 
35570b57cec5SDimitry Andric   unsigned Limit = 0;
35580b57cec5SDimitry Andric   bool AllUsesAcceptSReg = true;
35590b57cec5SDimitry Andric   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
35600b57cec5SDimitry Andric     Limit < 10 && U != E; ++U, ++Limit) {
35610b57cec5SDimitry Andric     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
35620b57cec5SDimitry Andric 
35630b57cec5SDimitry Andric     // If the register class is unknown, it could be an unknown
35640b57cec5SDimitry Andric     // register class that needs to be an SGPR, e.g. an inline asm
35650b57cec5SDimitry Andric     // constraint
35660b57cec5SDimitry Andric     if (!RC || SIRI->isSGPRClass(RC))
35670b57cec5SDimitry Andric       return false;
35680b57cec5SDimitry Andric 
35695f757f3fSDimitry Andric     if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
35700b57cec5SDimitry Andric       AllUsesAcceptSReg = false;
35710b57cec5SDimitry Andric       SDNode * User = *U;
35720b57cec5SDimitry Andric       if (User->isMachineOpcode()) {
35730b57cec5SDimitry Andric         unsigned Opc = User->getMachineOpcode();
3574bdd1243dSDimitry Andric         const MCInstrDesc &Desc = SII->get(Opc);
35750b57cec5SDimitry Andric         if (Desc.isCommutable()) {
35760b57cec5SDimitry Andric           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
35770b57cec5SDimitry Andric           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
35780b57cec5SDimitry Andric           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
35790b57cec5SDimitry Andric             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
35800b57cec5SDimitry Andric             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
35815f757f3fSDimitry Andric             if (CommutedRC == &AMDGPU::VS_32RegClass ||
35825f757f3fSDimitry Andric                 CommutedRC == &AMDGPU::VS_64RegClass)
35830b57cec5SDimitry Andric               AllUsesAcceptSReg = true;
35840b57cec5SDimitry Andric           }
35850b57cec5SDimitry Andric         }
35860b57cec5SDimitry Andric       }
358781ad6265SDimitry Andric       // If "AllUsesAcceptSReg == false" so far we haven't succeeded
35880b57cec5SDimitry Andric       // commuting current user. This means have at least one use
35890b57cec5SDimitry Andric       // that strictly require VGPR. Thus, we will not attempt to commute
35900b57cec5SDimitry Andric       // other user instructions.
35910b57cec5SDimitry Andric       if (!AllUsesAcceptSReg)
35920b57cec5SDimitry Andric         break;
35930b57cec5SDimitry Andric     }
35940b57cec5SDimitry Andric   }
35950b57cec5SDimitry Andric   return !AllUsesAcceptSReg && (Limit < 10);
35960b57cec5SDimitry Andric }
35970b57cec5SDimitry Andric 
isUniformLoad(const SDNode * N) const35980b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {
35990b57cec5SDimitry Andric   auto Ld = cast<LoadSDNode>(N);
36000b57cec5SDimitry Andric 
36017a6dacacSDimitry Andric   const MachineMemOperand *MMO = Ld->getMemOperand();
36027a6dacacSDimitry Andric   if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(MMO))
3603bdd1243dSDimitry Andric     return false;
3604bdd1243dSDimitry Andric 
36050fca6ea1SDimitry Andric   return MMO->getSize().hasValue() &&
36060fca6ea1SDimitry Andric          Ld->getAlign() >=
36070fca6ea1SDimitry Andric              Align(std::min(MMO->getSize().getValue().getKnownMinValue(),
36080fca6ea1SDimitry Andric                             uint64_t(4))) &&
3609bdd1243dSDimitry Andric          ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3610bdd1243dSDimitry Andric            Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
361181ad6265SDimitry Andric           (Subtarget->getScalarizeGlobalBehavior() &&
36120b57cec5SDimitry Andric            Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3613bdd1243dSDimitry Andric            Ld->isSimple() &&
361481ad6265SDimitry Andric            static_cast<const SITargetLowering *>(getTargetLowering())
361581ad6265SDimitry Andric                ->isMemOpHasNoClobberedMemOperand(N)));
36160b57cec5SDimitry Andric }
36170b57cec5SDimitry Andric 
PostprocessISelDAG()36180b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
36190b57cec5SDimitry Andric   const AMDGPUTargetLowering& Lowering =
36200b57cec5SDimitry Andric     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
36210b57cec5SDimitry Andric   bool IsModified = false;
36220b57cec5SDimitry Andric   do {
36230b57cec5SDimitry Andric     IsModified = false;
36240b57cec5SDimitry Andric 
36250b57cec5SDimitry Andric     // Go over all selected nodes and try to fold them a bit more
36260b57cec5SDimitry Andric     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
36270b57cec5SDimitry Andric     while (Position != CurDAG->allnodes_end()) {
36280b57cec5SDimitry Andric       SDNode *Node = &*Position++;
36290b57cec5SDimitry Andric       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
36300b57cec5SDimitry Andric       if (!MachineNode)
36310b57cec5SDimitry Andric         continue;
36320b57cec5SDimitry Andric 
36330b57cec5SDimitry Andric       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
36340b57cec5SDimitry Andric       if (ResNode != Node) {
36350b57cec5SDimitry Andric         if (ResNode)
36360b57cec5SDimitry Andric           ReplaceUses(Node, ResNode);
36370b57cec5SDimitry Andric         IsModified = true;
36380b57cec5SDimitry Andric       }
36390b57cec5SDimitry Andric     }
36400b57cec5SDimitry Andric     CurDAG->RemoveDeadNodes();
36410b57cec5SDimitry Andric   } while (IsModified);
36420b57cec5SDimitry Andric }
3643bdd1243dSDimitry Andric 
AMDGPUDAGToDAGISelLegacy(TargetMachine & TM,CodeGenOptLevel OptLevel)36440fca6ea1SDimitry Andric AMDGPUDAGToDAGISelLegacy::AMDGPUDAGToDAGISelLegacy(TargetMachine &TM,
36450fca6ea1SDimitry Andric                                                    CodeGenOptLevel OptLevel)
36460fca6ea1SDimitry Andric     : SelectionDAGISelLegacy(
36470fca6ea1SDimitry Andric           ID, std::make_unique<AMDGPUDAGToDAGISel>(TM, OptLevel)) {}
36480fca6ea1SDimitry Andric 
36490fca6ea1SDimitry Andric char AMDGPUDAGToDAGISelLegacy::ID = 0;
3650