10b57cec5SDimitry Andric //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Defines an instruction selector for the AMDGPU target.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric
14349cc55cSDimitry Andric #include "AMDGPUISelDAGToDAG.h"
150b57cec5SDimitry Andric #include "AMDGPU.h"
16bdd1243dSDimitry Andric #include "AMDGPUInstrInfo.h"
1781ad6265SDimitry Andric #include "AMDGPUSubtarget.h"
180b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
1981ad6265SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20349cc55cSDimitry Andric #include "MCTargetDesc/R600MCTargetDesc.h"
21349cc55cSDimitry Andric #include "R600RegisterInfo.h"
225f757f3fSDimitry Andric #include "SIISelLowering.h"
230b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
2406c3fb27SDimitry Andric #include "llvm/Analysis/UniformityAnalysis.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/FunctionLoweringInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAG.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGISel.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
31480093f4SDimitry Andric #include "llvm/InitializePasses.h"
3206c3fb27SDimitry Andric #include "llvm/Support/ErrorHandling.h"
33e8d8bef9SDimitry Andric
340b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
35e8d8bef9SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
360b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
370b57cec5SDimitry Andric #endif
380b57cec5SDimitry Andric
39fcaf7f86SDimitry Andric #define DEBUG_TYPE "amdgpu-isel"
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric using namespace llvm;
420b57cec5SDimitry Andric
430b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
440b57cec5SDimitry Andric // Instruction Selector Implementation
450b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric namespace {
stripBitcast(SDValue Val)480b57cec5SDimitry Andric static SDValue stripBitcast(SDValue Val) {
490b57cec5SDimitry Andric return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
500b57cec5SDimitry Andric }
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)530b57cec5SDimitry Andric static bool isExtractHiElt(SDValue In, SDValue &Out) {
540b57cec5SDimitry Andric In = stripBitcast(In);
55fe6060f1SDimitry Andric
56fe6060f1SDimitry Andric if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
57fe6060f1SDimitry Andric if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
58fe6060f1SDimitry Andric if (!Idx->isOne())
59fe6060f1SDimitry Andric return false;
60fe6060f1SDimitry Andric Out = In.getOperand(0);
61fe6060f1SDimitry Andric return true;
62fe6060f1SDimitry Andric }
63fe6060f1SDimitry Andric }
64fe6060f1SDimitry Andric
650b57cec5SDimitry Andric if (In.getOpcode() != ISD::TRUNCATE)
660b57cec5SDimitry Andric return false;
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric SDValue Srl = In.getOperand(0);
690b57cec5SDimitry Andric if (Srl.getOpcode() == ISD::SRL) {
700b57cec5SDimitry Andric if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
710b57cec5SDimitry Andric if (ShiftAmt->getZExtValue() == 16) {
720b57cec5SDimitry Andric Out = stripBitcast(Srl.getOperand(0));
730b57cec5SDimitry Andric return true;
740b57cec5SDimitry Andric }
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric return false;
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric // Look through operations that obscure just looking at the low 16-bits of the
820b57cec5SDimitry Andric // same register.
stripExtractLoElt(SDValue In)830b57cec5SDimitry Andric static SDValue stripExtractLoElt(SDValue In) {
84fe6060f1SDimitry Andric if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
855f757f3fSDimitry Andric SDValue Idx = In.getOperand(1);
865f757f3fSDimitry Andric if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)
87fe6060f1SDimitry Andric return In.getOperand(0);
88fe6060f1SDimitry Andric }
89fe6060f1SDimitry Andric
900b57cec5SDimitry Andric if (In.getOpcode() == ISD::TRUNCATE) {
910b57cec5SDimitry Andric SDValue Src = In.getOperand(0);
920b57cec5SDimitry Andric if (Src.getValueType().getSizeInBits() == 32)
930b57cec5SDimitry Andric return stripBitcast(Src);
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric return In;
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric
990b57cec5SDimitry Andric } // end anonymous namespace
1000b57cec5SDimitry Andric
1010fca6ea1SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
1020fca6ea1SDimitry Andric "AMDGPU DAG->DAG Pattern Instruction Selection", false,
1030fca6ea1SDimitry Andric false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)1040b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
1050b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10606c3fb27SDimitry Andric INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
1070b57cec5SDimitry Andric #ifdef EXPENSIVE_CHECKS
1080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1100b57cec5SDimitry Andric #endif
1110fca6ea1SDimitry Andric INITIALIZE_PASS_END(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel",
1120fca6ea1SDimitry Andric "AMDGPU DAG->DAG Pattern Instruction Selection", false,
1130fca6ea1SDimitry Andric false)
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric /// This pass converts a legalized DAG into a AMDGPU-specific
1160b57cec5SDimitry Andric // DAG, ready for instruction scheduling.
117bdd1243dSDimitry Andric FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
1185f757f3fSDimitry Andric CodeGenOptLevel OptLevel) {
1190fca6ea1SDimitry Andric return new AMDGPUDAGToDAGISelLegacy(TM, OptLevel);
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric
AMDGPUDAGToDAGISel(TargetMachine & TM,CodeGenOptLevel OptLevel)122bdd1243dSDimitry Andric AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
1235f757f3fSDimitry Andric CodeGenOptLevel OptLevel)
1240fca6ea1SDimitry Andric : SelectionDAGISel(TM, OptLevel) {
125349cc55cSDimitry Andric EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)1280b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
1290b57cec5SDimitry Andric Subtarget = &MF.getSubtarget<GCNSubtarget>();
1300fca6ea1SDimitry Andric Subtarget->checkSubtargetFeatures(MF.getFunction());
1315f757f3fSDimitry Andric Mode = SIModeRegisterDefaults(MF.getFunction(), *Subtarget);
1320b57cec5SDimitry Andric return SelectionDAGISel::runOnMachineFunction(MF);
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
fp16SrcZerosHighBits(unsigned Opc) const135fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
136fe6060f1SDimitry Andric // XXX - only need to list legal operations.
137fe6060f1SDimitry Andric switch (Opc) {
138fe6060f1SDimitry Andric case ISD::FADD:
139fe6060f1SDimitry Andric case ISD::FSUB:
140fe6060f1SDimitry Andric case ISD::FMUL:
141fe6060f1SDimitry Andric case ISD::FDIV:
142fe6060f1SDimitry Andric case ISD::FREM:
143fe6060f1SDimitry Andric case ISD::FCANONICALIZE:
144fe6060f1SDimitry Andric case ISD::UINT_TO_FP:
145fe6060f1SDimitry Andric case ISD::SINT_TO_FP:
146fe6060f1SDimitry Andric case ISD::FABS:
147fe6060f1SDimitry Andric // Fabs is lowered to a bit operation, but it's an and which will clear the
148fe6060f1SDimitry Andric // high bits anyway.
149fe6060f1SDimitry Andric case ISD::FSQRT:
150fe6060f1SDimitry Andric case ISD::FSIN:
151fe6060f1SDimitry Andric case ISD::FCOS:
152fe6060f1SDimitry Andric case ISD::FPOWI:
153fe6060f1SDimitry Andric case ISD::FPOW:
154fe6060f1SDimitry Andric case ISD::FLOG:
155fe6060f1SDimitry Andric case ISD::FLOG2:
156fe6060f1SDimitry Andric case ISD::FLOG10:
157fe6060f1SDimitry Andric case ISD::FEXP:
158fe6060f1SDimitry Andric case ISD::FEXP2:
159fe6060f1SDimitry Andric case ISD::FCEIL:
160fe6060f1SDimitry Andric case ISD::FTRUNC:
161fe6060f1SDimitry Andric case ISD::FRINT:
162fe6060f1SDimitry Andric case ISD::FNEARBYINT:
1635f757f3fSDimitry Andric case ISD::FROUNDEVEN:
164fe6060f1SDimitry Andric case ISD::FROUND:
165fe6060f1SDimitry Andric case ISD::FFLOOR:
166fe6060f1SDimitry Andric case ISD::FMINNUM:
167fe6060f1SDimitry Andric case ISD::FMAXNUM:
16806c3fb27SDimitry Andric case ISD::FLDEXP:
169fe6060f1SDimitry Andric case AMDGPUISD::FRACT:
170fe6060f1SDimitry Andric case AMDGPUISD::CLAMP:
171fe6060f1SDimitry Andric case AMDGPUISD::COS_HW:
172fe6060f1SDimitry Andric case AMDGPUISD::SIN_HW:
173fe6060f1SDimitry Andric case AMDGPUISD::FMIN3:
174fe6060f1SDimitry Andric case AMDGPUISD::FMAX3:
175fe6060f1SDimitry Andric case AMDGPUISD::FMED3:
176fe6060f1SDimitry Andric case AMDGPUISD::FMAD_FTZ:
177fe6060f1SDimitry Andric case AMDGPUISD::RCP:
178fe6060f1SDimitry Andric case AMDGPUISD::RSQ:
179fe6060f1SDimitry Andric case AMDGPUISD::RCP_IFLAG:
180fe6060f1SDimitry Andric // On gfx10, all 16-bit instructions preserve the high bits.
181fe6060f1SDimitry Andric return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
182fe6060f1SDimitry Andric case ISD::FP_ROUND:
183fe6060f1SDimitry Andric // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
184fe6060f1SDimitry Andric // high bits on gfx9.
185fe6060f1SDimitry Andric // TODO: If we had the source node we could see if the source was fma/mad
186fe6060f1SDimitry Andric return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
187fe6060f1SDimitry Andric case ISD::FMA:
188fe6060f1SDimitry Andric case ISD::FMAD:
189fe6060f1SDimitry Andric case AMDGPUISD::DIV_FIXUP:
190fe6060f1SDimitry Andric return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
191fe6060f1SDimitry Andric default:
192fe6060f1SDimitry Andric // fcopysign, select and others may be lowered to 32-bit bit operations
193fe6060f1SDimitry Andric // which don't zero the high bits.
194fe6060f1SDimitry Andric return false;
195fe6060f1SDimitry Andric }
196fe6060f1SDimitry Andric }
197fe6060f1SDimitry Andric
runOnMachineFunction(MachineFunction & MF)1980fca6ea1SDimitry Andric bool AMDGPUDAGToDAGISelLegacy::runOnMachineFunction(MachineFunction &MF) {
1990fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
2000fca6ea1SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2010fca6ea1SDimitry Andric LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2020fca6ea1SDimitry Andric for (auto &L : LI->getLoopsInPreorder()) {
2030fca6ea1SDimitry Andric assert(L->isLCSSAForm(DT));
2040fca6ea1SDimitry Andric }
2050fca6ea1SDimitry Andric #endif
2060fca6ea1SDimitry Andric return SelectionDAGISelLegacy::runOnMachineFunction(MF);
2070fca6ea1SDimitry Andric }
2080fca6ea1SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const2090fca6ea1SDimitry Andric void AMDGPUDAGToDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
210349cc55cSDimitry Andric AU.addRequired<AMDGPUArgumentUsageInfo>();
21106c3fb27SDimitry Andric AU.addRequired<UniformityInfoWrapperPass>();
212349cc55cSDimitry Andric #ifdef EXPENSIVE_CHECKS
213349cc55cSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
214349cc55cSDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
215349cc55cSDimitry Andric #endif
2160fca6ea1SDimitry Andric SelectionDAGISelLegacy::getAnalysisUsage(AU);
217349cc55cSDimitry Andric }
218349cc55cSDimitry Andric
matchLoadD16FromBuildVector(SDNode * N) const2190b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
2200b57cec5SDimitry Andric assert(Subtarget->d16PreservesUnusedBits());
2210b57cec5SDimitry Andric MVT VT = N->getValueType(0).getSimpleVT();
2220b57cec5SDimitry Andric if (VT != MVT::v2i16 && VT != MVT::v2f16)
2230b57cec5SDimitry Andric return false;
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric SDValue Lo = N->getOperand(0);
2260b57cec5SDimitry Andric SDValue Hi = N->getOperand(1);
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
2310b57cec5SDimitry Andric // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
2320b57cec5SDimitry Andric // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
2330b57cec5SDimitry Andric
2340b57cec5SDimitry Andric // Need to check for possible indirect dependencies on the other half of the
2350b57cec5SDimitry Andric // vector to avoid introducing a cycle.
2360b57cec5SDimitry Andric if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
2370b57cec5SDimitry Andric SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
2400b57cec5SDimitry Andric SDValue Ops[] = {
2410b57cec5SDimitry Andric LdHi->getChain(), LdHi->getBasePtr(), TiedIn
2420b57cec5SDimitry Andric };
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
2450b57cec5SDimitry Andric if (LdHi->getMemoryVT() == MVT::i8) {
2460b57cec5SDimitry Andric LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
2470b57cec5SDimitry Andric AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
2480b57cec5SDimitry Andric } else {
2490b57cec5SDimitry Andric assert(LdHi->getMemoryVT() == MVT::i16);
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric
2520b57cec5SDimitry Andric SDValue NewLoadHi =
2530b57cec5SDimitry Andric CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
2540b57cec5SDimitry Andric Ops, LdHi->getMemoryVT(),
2550b57cec5SDimitry Andric LdHi->getMemOperand());
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
2580b57cec5SDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
2590b57cec5SDimitry Andric return true;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric
2620b57cec5SDimitry Andric // build_vector (load ptr), hi -> load_d16_lo ptr, hi
2630b57cec5SDimitry Andric // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
2640b57cec5SDimitry Andric // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
2650b57cec5SDimitry Andric LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
2660b57cec5SDimitry Andric if (LdLo && Lo.hasOneUse()) {
2670b57cec5SDimitry Andric SDValue TiedIn = getHi16Elt(Hi);
2680b57cec5SDimitry Andric if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
2690b57cec5SDimitry Andric return false;
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
2720b57cec5SDimitry Andric unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
2730b57cec5SDimitry Andric if (LdLo->getMemoryVT() == MVT::i8) {
2740b57cec5SDimitry Andric LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
2750b57cec5SDimitry Andric AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
2760b57cec5SDimitry Andric } else {
2770b57cec5SDimitry Andric assert(LdLo->getMemoryVT() == MVT::i16);
2780b57cec5SDimitry Andric }
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
2810b57cec5SDimitry Andric
2820b57cec5SDimitry Andric SDValue Ops[] = {
2830b57cec5SDimitry Andric LdLo->getChain(), LdLo->getBasePtr(), TiedIn
2840b57cec5SDimitry Andric };
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric SDValue NewLoadLo =
2870b57cec5SDimitry Andric CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
2880b57cec5SDimitry Andric Ops, LdLo->getMemoryVT(),
2890b57cec5SDimitry Andric LdLo->getMemOperand());
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
2920b57cec5SDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
2930b57cec5SDimitry Andric return true;
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric return false;
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric
PreprocessISelDAG()2990b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
3000b57cec5SDimitry Andric if (!Subtarget->d16PreservesUnusedBits())
3010b57cec5SDimitry Andric return;
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric bool MadeChange = false;
3060b57cec5SDimitry Andric while (Position != CurDAG->allnodes_begin()) {
3070b57cec5SDimitry Andric SDNode *N = &*--Position;
3080b57cec5SDimitry Andric if (N->use_empty())
3090b57cec5SDimitry Andric continue;
3100b57cec5SDimitry Andric
3110b57cec5SDimitry Andric switch (N->getOpcode()) {
3120b57cec5SDimitry Andric case ISD::BUILD_VECTOR:
3131db9f3b2SDimitry Andric // TODO: Match load d16 from shl (extload:i16), 16
3140b57cec5SDimitry Andric MadeChange |= matchLoadD16FromBuildVector(N);
3150b57cec5SDimitry Andric break;
3160b57cec5SDimitry Andric default:
3170b57cec5SDimitry Andric break;
3180b57cec5SDimitry Andric }
3190b57cec5SDimitry Andric }
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric if (MadeChange) {
3220b57cec5SDimitry Andric CurDAG->RemoveDeadNodes();
3230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After PreProcess:\n";
3240b57cec5SDimitry Andric CurDAG->dump(););
3250b57cec5SDimitry Andric }
3260b57cec5SDimitry Andric }
3270b57cec5SDimitry Andric
isInlineImmediate(const SDNode * N) const3281db9f3b2SDimitry Andric bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
3290b57cec5SDimitry Andric if (N->isUndef())
3300b57cec5SDimitry Andric return true;
3310b57cec5SDimitry Andric
3320b57cec5SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
3330b57cec5SDimitry Andric if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
3340b57cec5SDimitry Andric return TII->isInlineConstant(C->getAPIntValue());
3350b57cec5SDimitry Andric
3360b57cec5SDimitry Andric if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
3370fca6ea1SDimitry Andric return TII->isInlineConstant(C->getValueAPF());
3380b57cec5SDimitry Andric
3390b57cec5SDimitry Andric return false;
3400b57cec5SDimitry Andric }
3410b57cec5SDimitry Andric
3420b57cec5SDimitry Andric /// Determine the register class for \p OpNo
3430b57cec5SDimitry Andric /// \returns The register class of the virtual register that will be used for
3440b57cec5SDimitry Andric /// the given operand number \OpNo or NULL if the register class cannot be
3450b57cec5SDimitry Andric /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const3460b57cec5SDimitry Andric const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
3470b57cec5SDimitry Andric unsigned OpNo) const {
3480b57cec5SDimitry Andric if (!N->isMachineOpcode()) {
3490b57cec5SDimitry Andric if (N->getOpcode() == ISD::CopyToReg) {
350e8d8bef9SDimitry Andric Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
351e8d8bef9SDimitry Andric if (Reg.isVirtual()) {
3520b57cec5SDimitry Andric MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
3530b57cec5SDimitry Andric return MRI.getRegClass(Reg);
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric
3560b57cec5SDimitry Andric const SIRegisterInfo *TRI
3570b57cec5SDimitry Andric = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358bdd1243dSDimitry Andric return TRI->getPhysRegBaseClass(Reg);
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric
3610b57cec5SDimitry Andric return nullptr;
3620b57cec5SDimitry Andric }
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric switch (N->getMachineOpcode()) {
3650b57cec5SDimitry Andric default: {
3660b57cec5SDimitry Andric const MCInstrDesc &Desc =
3670b57cec5SDimitry Andric Subtarget->getInstrInfo()->get(N->getMachineOpcode());
3680b57cec5SDimitry Andric unsigned OpIdx = Desc.getNumDefs() + OpNo;
3690b57cec5SDimitry Andric if (OpIdx >= Desc.getNumOperands())
3700b57cec5SDimitry Andric return nullptr;
371bdd1243dSDimitry Andric int RegClass = Desc.operands()[OpIdx].RegClass;
3720b57cec5SDimitry Andric if (RegClass == -1)
3730b57cec5SDimitry Andric return nullptr;
3740b57cec5SDimitry Andric
3750b57cec5SDimitry Andric return Subtarget->getRegisterInfo()->getRegClass(RegClass);
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric case AMDGPU::REG_SEQUENCE: {
378647cbc5dSDimitry Andric unsigned RCID = N->getConstantOperandVal(0);
3790b57cec5SDimitry Andric const TargetRegisterClass *SuperRC =
3800b57cec5SDimitry Andric Subtarget->getRegisterInfo()->getRegClass(RCID);
3810b57cec5SDimitry Andric
3820b57cec5SDimitry Andric SDValue SubRegOp = N->getOperand(OpNo + 1);
3831db9f3b2SDimitry Andric unsigned SubRegIdx = SubRegOp->getAsZExtVal();
3840b57cec5SDimitry Andric return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
3850b57cec5SDimitry Andric SubRegIdx);
3860b57cec5SDimitry Andric }
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const3908bcb0991SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
3918bcb0991SDimitry Andric SDValue Glue) const {
3928bcb0991SDimitry Andric SmallVector <SDValue, 8> Ops;
3938bcb0991SDimitry Andric Ops.push_back(NewChain); // Replace the chain.
3948bcb0991SDimitry Andric for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
3958bcb0991SDimitry Andric Ops.push_back(N->getOperand(i));
3968bcb0991SDimitry Andric
3978bcb0991SDimitry Andric Ops.push_back(Glue);
3988bcb0991SDimitry Andric return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
3998bcb0991SDimitry Andric }
4008bcb0991SDimitry Andric
glueCopyToM0(SDNode * N,SDValue Val) const4010b57cec5SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
4020b57cec5SDimitry Andric const SITargetLowering& Lowering =
4030b57cec5SDimitry Andric *static_cast<const SITargetLowering*>(getTargetLowering());
4040b57cec5SDimitry Andric
4050b57cec5SDimitry Andric assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
4060b57cec5SDimitry Andric
4078bcb0991SDimitry Andric SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
4088bcb0991SDimitry Andric return glueCopyToOp(N, M0, M0.getValue(1));
4090b57cec5SDimitry Andric }
4100b57cec5SDimitry Andric
glueCopyToM0LDSInit(SDNode * N) const4110b57cec5SDimitry Andric SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
4120b57cec5SDimitry Andric unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
4130b57cec5SDimitry Andric if (AS == AMDGPUAS::LOCAL_ADDRESS) {
4140b57cec5SDimitry Andric if (Subtarget->ldsRequiresM0Init())
4150b57cec5SDimitry Andric return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
4160b57cec5SDimitry Andric } else if (AS == AMDGPUAS::REGION_ADDRESS) {
4170b57cec5SDimitry Andric MachineFunction &MF = CurDAG->getMachineFunction();
4180b57cec5SDimitry Andric unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
4190b57cec5SDimitry Andric return
4200b57cec5SDimitry Andric glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
4210b57cec5SDimitry Andric }
4220b57cec5SDimitry Andric return N;
4230b57cec5SDimitry Andric }
4240b57cec5SDimitry Andric
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const4250b57cec5SDimitry Andric MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
4260b57cec5SDimitry Andric EVT VT) const {
4270b57cec5SDimitry Andric SDNode *Lo = CurDAG->getMachineNode(
4280b57cec5SDimitry Andric AMDGPU::S_MOV_B32, DL, MVT::i32,
4290b57cec5SDimitry Andric CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
4300b57cec5SDimitry Andric SDNode *Hi =
4310b57cec5SDimitry Andric CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
4320b57cec5SDimitry Andric CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
4330b57cec5SDimitry Andric const SDValue Ops[] = {
4340b57cec5SDimitry Andric CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
4350b57cec5SDimitry Andric SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
4360b57cec5SDimitry Andric SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andric return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
4390b57cec5SDimitry Andric }
4400b57cec5SDimitry Andric
SelectBuildVector(SDNode * N,unsigned RegClassID)4410b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
4420b57cec5SDimitry Andric EVT VT = N->getValueType(0);
4430b57cec5SDimitry Andric unsigned NumVectorElts = VT.getVectorNumElements();
4440b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType();
4450b57cec5SDimitry Andric SDLoc DL(N);
4460b57cec5SDimitry Andric SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andric if (NumVectorElts == 1) {
4490b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
4500b57cec5SDimitry Andric RegClass);
4510b57cec5SDimitry Andric return;
4520b57cec5SDimitry Andric }
4530b57cec5SDimitry Andric
4540b57cec5SDimitry Andric assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
4550b57cec5SDimitry Andric "supported yet");
4560b57cec5SDimitry Andric // 32 = Max Num Vector Elements
4570b57cec5SDimitry Andric // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
4580b57cec5SDimitry Andric // 1 = Vector Register Class
4590b57cec5SDimitry Andric SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
4600b57cec5SDimitry Andric
4615ffd83dbSDimitry Andric bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
4625ffd83dbSDimitry Andric Triple::amdgcn;
4630b57cec5SDimitry Andric RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
4640b57cec5SDimitry Andric bool IsRegSeq = true;
4650b57cec5SDimitry Andric unsigned NOps = N->getNumOperands();
4660b57cec5SDimitry Andric for (unsigned i = 0; i < NOps; i++) {
4670b57cec5SDimitry Andric // XXX: Why is this here?
4680b57cec5SDimitry Andric if (isa<RegisterSDNode>(N->getOperand(i))) {
4690b57cec5SDimitry Andric IsRegSeq = false;
4700b57cec5SDimitry Andric break;
4710b57cec5SDimitry Andric }
4725ffd83dbSDimitry Andric unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
4735ffd83dbSDimitry Andric : R600RegisterInfo::getSubRegFromChannel(i);
4740b57cec5SDimitry Andric RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
4750b57cec5SDimitry Andric RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric if (NOps != NumVectorElts) {
4780b57cec5SDimitry Andric // Fill in the missing undef elements if this was a scalar_to_vector.
4790b57cec5SDimitry Andric assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
4800b57cec5SDimitry Andric MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
4810b57cec5SDimitry Andric DL, EltVT);
4820b57cec5SDimitry Andric for (unsigned i = NOps; i < NumVectorElts; ++i) {
4835ffd83dbSDimitry Andric unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
4845ffd83dbSDimitry Andric : R600RegisterInfo::getSubRegFromChannel(i);
4850b57cec5SDimitry Andric RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
4860b57cec5SDimitry Andric RegSeqArgs[1 + (2 * i) + 1] =
4870b57cec5SDimitry Andric CurDAG->getTargetConstant(Sub, DL, MVT::i32);
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric
4910b57cec5SDimitry Andric if (!IsRegSeq)
4920b57cec5SDimitry Andric SelectCode(N);
4930b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric
Select(SDNode * N)4960b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::Select(SDNode *N) {
4970b57cec5SDimitry Andric unsigned int Opc = N->getOpcode();
4980b57cec5SDimitry Andric if (N->isMachineOpcode()) {
4990b57cec5SDimitry Andric N->setNodeId(-1);
5000b57cec5SDimitry Andric return; // Already selected.
5010b57cec5SDimitry Andric }
5020b57cec5SDimitry Andric
5038bcb0991SDimitry Andric // isa<MemSDNode> almost works but is slightly too permissive for some DS
5048bcb0991SDimitry Andric // intrinsics.
5050fca6ea1SDimitry Andric if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N)) {
5060b57cec5SDimitry Andric N = glueCopyToM0LDSInit(N);
5078bcb0991SDimitry Andric SelectCode(N);
5088bcb0991SDimitry Andric return;
5098bcb0991SDimitry Andric }
5100b57cec5SDimitry Andric
5110b57cec5SDimitry Andric switch (Opc) {
5120b57cec5SDimitry Andric default:
5130b57cec5SDimitry Andric break;
5140b57cec5SDimitry Andric // We are selecting i64 ADD here instead of custom lower it during
5150b57cec5SDimitry Andric // DAG legalization, so we can fold some i64 ADDs used for address
5160b57cec5SDimitry Andric // calculation into the LOAD and STORE instructions.
5170b57cec5SDimitry Andric case ISD::ADDC:
5180b57cec5SDimitry Andric case ISD::ADDE:
5190b57cec5SDimitry Andric case ISD::SUBC:
5200b57cec5SDimitry Andric case ISD::SUBE: {
5210b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i64)
5220b57cec5SDimitry Andric break;
5230b57cec5SDimitry Andric
5240b57cec5SDimitry Andric SelectADD_SUB_I64(N);
5250b57cec5SDimitry Andric return;
5260b57cec5SDimitry Andric }
52706c3fb27SDimitry Andric case ISD::UADDO_CARRY:
52806c3fb27SDimitry Andric case ISD::USUBO_CARRY:
5290b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i32)
5300b57cec5SDimitry Andric break;
5310b57cec5SDimitry Andric
5320b57cec5SDimitry Andric SelectAddcSubb(N);
5330b57cec5SDimitry Andric return;
5340b57cec5SDimitry Andric case ISD::UADDO:
5350b57cec5SDimitry Andric case ISD::USUBO: {
5360b57cec5SDimitry Andric SelectUADDO_USUBO(N);
5370b57cec5SDimitry Andric return;
5380b57cec5SDimitry Andric }
5390b57cec5SDimitry Andric case AMDGPUISD::FMUL_W_CHAIN: {
5400b57cec5SDimitry Andric SelectFMUL_W_CHAIN(N);
5410b57cec5SDimitry Andric return;
5420b57cec5SDimitry Andric }
5430b57cec5SDimitry Andric case AMDGPUISD::FMA_W_CHAIN: {
5440b57cec5SDimitry Andric SelectFMA_W_CHAIN(N);
5450b57cec5SDimitry Andric return;
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric case ISD::SCALAR_TO_VECTOR:
5490b57cec5SDimitry Andric case ISD::BUILD_VECTOR: {
5500b57cec5SDimitry Andric EVT VT = N->getValueType(0);
5510b57cec5SDimitry Andric unsigned NumVectorElts = VT.getVectorNumElements();
5520b57cec5SDimitry Andric if (VT.getScalarSizeInBits() == 16) {
5530b57cec5SDimitry Andric if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
5540b57cec5SDimitry Andric if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
5550b57cec5SDimitry Andric ReplaceNode(N, Packed);
5560b57cec5SDimitry Andric return;
5570b57cec5SDimitry Andric }
5580b57cec5SDimitry Andric }
5590b57cec5SDimitry Andric
5600b57cec5SDimitry Andric break;
5610b57cec5SDimitry Andric }
5620b57cec5SDimitry Andric
5630b57cec5SDimitry Andric assert(VT.getVectorElementType().bitsEq(MVT::i32));
5645ffd83dbSDimitry Andric unsigned RegClassID =
5655ffd83dbSDimitry Andric SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
5660b57cec5SDimitry Andric SelectBuildVector(N, RegClassID);
5670b57cec5SDimitry Andric return;
5680b57cec5SDimitry Andric }
5690b57cec5SDimitry Andric case ISD::BUILD_PAIR: {
5700b57cec5SDimitry Andric SDValue RC, SubReg0, SubReg1;
5710b57cec5SDimitry Andric SDLoc DL(N);
5720b57cec5SDimitry Andric if (N->getValueType(0) == MVT::i128) {
5738bcb0991SDimitry Andric RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
5740b57cec5SDimitry Andric SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
5750b57cec5SDimitry Andric SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
5760b57cec5SDimitry Andric } else if (N->getValueType(0) == MVT::i64) {
5770b57cec5SDimitry Andric RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
5780b57cec5SDimitry Andric SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
5790b57cec5SDimitry Andric SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
5800b57cec5SDimitry Andric } else {
5810b57cec5SDimitry Andric llvm_unreachable("Unhandled value type for BUILD_PAIR");
5820b57cec5SDimitry Andric }
5830b57cec5SDimitry Andric const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
5840b57cec5SDimitry Andric N->getOperand(1), SubReg1 };
5850b57cec5SDimitry Andric ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
5860b57cec5SDimitry Andric N->getValueType(0), Ops));
5870b57cec5SDimitry Andric return;
5880b57cec5SDimitry Andric }
5890b57cec5SDimitry Andric
5900b57cec5SDimitry Andric case ISD::Constant:
5910b57cec5SDimitry Andric case ISD::ConstantFP: {
5920b57cec5SDimitry Andric if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
5930b57cec5SDimitry Andric break;
5940b57cec5SDimitry Andric
5950b57cec5SDimitry Andric uint64_t Imm;
5965f757f3fSDimitry Andric if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) {
5970b57cec5SDimitry Andric Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
5985f757f3fSDimitry Andric if (AMDGPU::isValid32BitLiteral(Imm, true))
5995f757f3fSDimitry Andric break;
6005f757f3fSDimitry Andric } else {
6010b57cec5SDimitry Andric ConstantSDNode *C = cast<ConstantSDNode>(N);
6020b57cec5SDimitry Andric Imm = C->getZExtValue();
6035f757f3fSDimitry Andric if (AMDGPU::isValid32BitLiteral(Imm, false))
6045f757f3fSDimitry Andric break;
6050b57cec5SDimitry Andric }
6060b57cec5SDimitry Andric
6070b57cec5SDimitry Andric SDLoc DL(N);
6080b57cec5SDimitry Andric ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
6090b57cec5SDimitry Andric return;
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric case AMDGPUISD::BFE_I32:
6120b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: {
6130b57cec5SDimitry Andric // There is a scalar version available, but unlike the vector version which
6140b57cec5SDimitry Andric // has a separate operand for the offset and width, the scalar version packs
6150b57cec5SDimitry Andric // the width and offset into a single operand. Try to move to the scalar
6160b57cec5SDimitry Andric // version if the offsets are constant, so that we can try to keep extended
6170b57cec5SDimitry Andric // loads of kernel arguments in SGPRs.
6180b57cec5SDimitry Andric
6190b57cec5SDimitry Andric // TODO: Technically we could try to pattern match scalar bitshifts of
6200b57cec5SDimitry Andric // dynamic values, but it's probably not useful.
6210b57cec5SDimitry Andric ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
6220b57cec5SDimitry Andric if (!Offset)
6230b57cec5SDimitry Andric break;
6240b57cec5SDimitry Andric
6250b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
6260b57cec5SDimitry Andric if (!Width)
6270b57cec5SDimitry Andric break;
6280b57cec5SDimitry Andric
6290b57cec5SDimitry Andric bool Signed = Opc == AMDGPUISD::BFE_I32;
6300b57cec5SDimitry Andric
6310b57cec5SDimitry Andric uint32_t OffsetVal = Offset->getZExtValue();
6320b57cec5SDimitry Andric uint32_t WidthVal = Width->getZExtValue();
6330b57cec5SDimitry Andric
634349cc55cSDimitry Andric ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
635349cc55cSDimitry Andric WidthVal));
6360b57cec5SDimitry Andric return;
6370b57cec5SDimitry Andric }
6380b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: {
6390b57cec5SDimitry Andric SelectDIV_SCALE(N);
6400b57cec5SDimitry Andric return;
6410b57cec5SDimitry Andric }
6420b57cec5SDimitry Andric case AMDGPUISD::MAD_I64_I32:
6430b57cec5SDimitry Andric case AMDGPUISD::MAD_U64_U32: {
6440b57cec5SDimitry Andric SelectMAD_64_32(N);
6450b57cec5SDimitry Andric return;
6460b57cec5SDimitry Andric }
6474824e7fdSDimitry Andric case ISD::SMUL_LOHI:
6484824e7fdSDimitry Andric case ISD::UMUL_LOHI:
6494824e7fdSDimitry Andric return SelectMUL_LOHI(N);
6500b57cec5SDimitry Andric case ISD::CopyToReg: {
6510b57cec5SDimitry Andric const SITargetLowering& Lowering =
6520b57cec5SDimitry Andric *static_cast<const SITargetLowering*>(getTargetLowering());
6530b57cec5SDimitry Andric N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
6540b57cec5SDimitry Andric break;
6550b57cec5SDimitry Andric }
6560b57cec5SDimitry Andric case ISD::AND:
6570b57cec5SDimitry Andric case ISD::SRL:
6580b57cec5SDimitry Andric case ISD::SRA:
6590b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG:
6600b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i32)
6610b57cec5SDimitry Andric break;
6620b57cec5SDimitry Andric
6630b57cec5SDimitry Andric SelectS_BFE(N);
6640b57cec5SDimitry Andric return;
6650b57cec5SDimitry Andric case ISD::BRCOND:
6660b57cec5SDimitry Andric SelectBRCOND(N);
6670b57cec5SDimitry Andric return;
6685f757f3fSDimitry Andric case ISD::FP_EXTEND:
6695f757f3fSDimitry Andric SelectFP_EXTEND(N);
6705f757f3fSDimitry Andric return;
6710b57cec5SDimitry Andric case AMDGPUISD::CVT_PKRTZ_F16_F32:
6720b57cec5SDimitry Andric case AMDGPUISD::CVT_PKNORM_I16_F32:
6730b57cec5SDimitry Andric case AMDGPUISD::CVT_PKNORM_U16_F32:
6740b57cec5SDimitry Andric case AMDGPUISD::CVT_PK_U16_U32:
6750b57cec5SDimitry Andric case AMDGPUISD::CVT_PK_I16_I32: {
6760b57cec5SDimitry Andric // Hack around using a legal type if f16 is illegal.
6770b57cec5SDimitry Andric if (N->getValueType(0) == MVT::i32) {
6780b57cec5SDimitry Andric MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
6790b57cec5SDimitry Andric N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
6800b57cec5SDimitry Andric { N->getOperand(0), N->getOperand(1) });
6810b57cec5SDimitry Andric SelectCode(N);
6820b57cec5SDimitry Andric return;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric break;
6860b57cec5SDimitry Andric }
6870b57cec5SDimitry Andric case ISD::INTRINSIC_W_CHAIN: {
6880b57cec5SDimitry Andric SelectINTRINSIC_W_CHAIN(N);
6890b57cec5SDimitry Andric return;
6900b57cec5SDimitry Andric }
6918bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: {
6928bcb0991SDimitry Andric SelectINTRINSIC_WO_CHAIN(N);
6938bcb0991SDimitry Andric return;
6948bcb0991SDimitry Andric }
6950b57cec5SDimitry Andric case ISD::INTRINSIC_VOID: {
6960b57cec5SDimitry Andric SelectINTRINSIC_VOID(N);
6970b57cec5SDimitry Andric return;
6980b57cec5SDimitry Andric }
6995f757f3fSDimitry Andric case AMDGPUISD::WAVE_ADDRESS: {
7005f757f3fSDimitry Andric SelectWAVE_ADDRESS(N);
7015f757f3fSDimitry Andric return;
7025f757f3fSDimitry Andric }
7035f757f3fSDimitry Andric case ISD::STACKRESTORE: {
7045f757f3fSDimitry Andric SelectSTACKRESTORE(N);
7055f757f3fSDimitry Andric return;
7065f757f3fSDimitry Andric }
7070b57cec5SDimitry Andric }
7080b57cec5SDimitry Andric
7090b57cec5SDimitry Andric SelectCode(N);
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric
isUniformBr(const SDNode * N) const7120b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
7130b57cec5SDimitry Andric const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
7140b57cec5SDimitry Andric const Instruction *Term = BB->getTerminator();
7150b57cec5SDimitry Andric return Term->getMetadata("amdgpu.uniform") ||
7160b57cec5SDimitry Andric Term->getMetadata("structurizecfg.uniform");
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const7194824e7fdSDimitry Andric bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
7204824e7fdSDimitry Andric unsigned ShAmtBits) const {
7214824e7fdSDimitry Andric assert(N->getOpcode() == ISD::AND);
7224824e7fdSDimitry Andric
723647cbc5dSDimitry Andric const APInt &RHS = N->getConstantOperandAPInt(1);
72406c3fb27SDimitry Andric if (RHS.countr_one() >= ShAmtBits)
7254824e7fdSDimitry Andric return true;
7264824e7fdSDimitry Andric
7274824e7fdSDimitry Andric const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
72806c3fb27SDimitry Andric return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;
7294824e7fdSDimitry Andric }
7304824e7fdSDimitry Andric
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)731e8d8bef9SDimitry Andric static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
732e8d8bef9SDimitry Andric SDValue &N0, SDValue &N1) {
733e8d8bef9SDimitry Andric if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
734e8d8bef9SDimitry Andric Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
735e8d8bef9SDimitry Andric // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
736e8d8bef9SDimitry Andric // (i64 (bitcast (v2i32 (build_vector
737e8d8bef9SDimitry Andric // (or (extract_vector_elt V, 0), OFFSET),
738e8d8bef9SDimitry Andric // (extract_vector_elt V, 1)))))
739e8d8bef9SDimitry Andric SDValue Lo = Addr.getOperand(0).getOperand(0);
740e8d8bef9SDimitry Andric if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
741e8d8bef9SDimitry Andric SDValue BaseLo = Lo.getOperand(0);
742e8d8bef9SDimitry Andric SDValue BaseHi = Addr.getOperand(0).getOperand(1);
743e8d8bef9SDimitry Andric // Check that split base (Lo and Hi) are extracted from the same one.
744e8d8bef9SDimitry Andric if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
745e8d8bef9SDimitry Andric BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
746e8d8bef9SDimitry Andric BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
747e8d8bef9SDimitry Andric // Lo is statically extracted from index 0.
748e8d8bef9SDimitry Andric isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
749e8d8bef9SDimitry Andric BaseLo.getConstantOperandVal(1) == 0 &&
750e8d8bef9SDimitry Andric // Hi is statically extracted from index 0.
751e8d8bef9SDimitry Andric isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
752e8d8bef9SDimitry Andric BaseHi.getConstantOperandVal(1) == 1) {
753e8d8bef9SDimitry Andric N0 = BaseLo.getOperand(0).getOperand(0);
754e8d8bef9SDimitry Andric N1 = Lo.getOperand(1);
755e8d8bef9SDimitry Andric return true;
756e8d8bef9SDimitry Andric }
757e8d8bef9SDimitry Andric }
758e8d8bef9SDimitry Andric }
759e8d8bef9SDimitry Andric return false;
760e8d8bef9SDimitry Andric }
761e8d8bef9SDimitry Andric
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const762e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
763e8d8bef9SDimitry Andric SDValue &RHS) const {
764e8d8bef9SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr)) {
765e8d8bef9SDimitry Andric LHS = Addr.getOperand(0);
766e8d8bef9SDimitry Andric RHS = Addr.getOperand(1);
767e8d8bef9SDimitry Andric return true;
768e8d8bef9SDimitry Andric }
769e8d8bef9SDimitry Andric
770e8d8bef9SDimitry Andric if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
771e8d8bef9SDimitry Andric assert(LHS && RHS && isa<ConstantSDNode>(RHS));
772e8d8bef9SDimitry Andric return true;
773e8d8bef9SDimitry Andric }
774e8d8bef9SDimitry Andric
775e8d8bef9SDimitry Andric return false;
776e8d8bef9SDimitry Andric }
777e8d8bef9SDimitry Andric
getPassName() const7780fca6ea1SDimitry Andric StringRef AMDGPUDAGToDAGISelLegacy::getPassName() const {
7790b57cec5SDimitry Andric return "AMDGPU DAG->DAG Pattern Instruction Selection";
7800b57cec5SDimitry Andric }
7810b57cec5SDimitry Andric
AMDGPUISelDAGToDAGPass(TargetMachine & TM)7820fca6ea1SDimitry Andric AMDGPUISelDAGToDAGPass::AMDGPUISelDAGToDAGPass(TargetMachine &TM)
7830fca6ea1SDimitry Andric : SelectionDAGISelPass(
7840fca6ea1SDimitry Andric std::make_unique<AMDGPUDAGToDAGISel>(TM, TM.getOptLevel())) {}
7850fca6ea1SDimitry Andric
7860fca6ea1SDimitry Andric PreservedAnalyses
run(MachineFunction & MF,MachineFunctionAnalysisManager & MFAM)7870fca6ea1SDimitry Andric AMDGPUISelDAGToDAGPass::run(MachineFunction &MF,
7880fca6ea1SDimitry Andric MachineFunctionAnalysisManager &MFAM) {
7890fca6ea1SDimitry Andric #ifdef EXPENSIVE_CHECKS
7900fca6ea1SDimitry Andric auto &FAM = MFAM.getResult<FunctionAnalysisManagerMachineFunctionProxy>(MF)
7910fca6ea1SDimitry Andric .getManager();
7920fca6ea1SDimitry Andric auto &F = MF.getFunction();
7930fca6ea1SDimitry Andric DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
7940fca6ea1SDimitry Andric LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
7950fca6ea1SDimitry Andric for (auto &L : LI.getLoopsInPreorder())
7960fca6ea1SDimitry Andric assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!");
7970fca6ea1SDimitry Andric #endif
7980fca6ea1SDimitry Andric return SelectionDAGISelPass::run(MF, MFAM);
7990fca6ea1SDimitry Andric }
8000fca6ea1SDimitry Andric
8010b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8020b57cec5SDimitry Andric // Complex Patterns
8030b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8040b57cec5SDimitry Andric
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)8050b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
8060b57cec5SDimitry Andric SDValue &Offset) {
8070b57cec5SDimitry Andric return false;
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)8100b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
8110b57cec5SDimitry Andric SDValue &Offset) {
8120b57cec5SDimitry Andric ConstantSDNode *C;
8130b57cec5SDimitry Andric SDLoc DL(Addr);
8140b57cec5SDimitry Andric
8150b57cec5SDimitry Andric if ((C = dyn_cast<ConstantSDNode>(Addr))) {
8160b57cec5SDimitry Andric Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
8170b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8180b57cec5SDimitry Andric } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
8190b57cec5SDimitry Andric (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
8200b57cec5SDimitry Andric Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
8210b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8220b57cec5SDimitry Andric } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
8230b57cec5SDimitry Andric (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
8240b57cec5SDimitry Andric Base = Addr.getOperand(0);
8250b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
8260b57cec5SDimitry Andric } else {
8270b57cec5SDimitry Andric Base = Addr;
8280b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andric return true;
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const8348bcb0991SDimitry Andric SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
8358bcb0991SDimitry Andric const SDLoc &DL) const {
8368bcb0991SDimitry Andric SDNode *Mov = CurDAG->getMachineNode(
8378bcb0991SDimitry Andric AMDGPU::S_MOV_B32, DL, MVT::i32,
8388bcb0991SDimitry Andric CurDAG->getTargetConstant(Val, DL, MVT::i32));
8398bcb0991SDimitry Andric return SDValue(Mov, 0);
8408bcb0991SDimitry Andric }
8418bcb0991SDimitry Andric
84206c3fb27SDimitry Andric // FIXME: Should only handle uaddo_carry/usubo_carry
SelectADD_SUB_I64(SDNode * N)8430b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
8440b57cec5SDimitry Andric SDLoc DL(N);
8450b57cec5SDimitry Andric SDValue LHS = N->getOperand(0);
8460b57cec5SDimitry Andric SDValue RHS = N->getOperand(1);
8470b57cec5SDimitry Andric
8480b57cec5SDimitry Andric unsigned Opcode = N->getOpcode();
8490b57cec5SDimitry Andric bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
8500b57cec5SDimitry Andric bool ProduceCarry =
8510b57cec5SDimitry Andric ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
8520b57cec5SDimitry Andric bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
8530b57cec5SDimitry Andric
8540b57cec5SDimitry Andric SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
8550b57cec5SDimitry Andric SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
8560b57cec5SDimitry Andric
8570b57cec5SDimitry Andric SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8580b57cec5SDimitry Andric DL, MVT::i32, LHS, Sub0);
8590b57cec5SDimitry Andric SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8600b57cec5SDimitry Andric DL, MVT::i32, LHS, Sub1);
8610b57cec5SDimitry Andric
8620b57cec5SDimitry Andric SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8630b57cec5SDimitry Andric DL, MVT::i32, RHS, Sub0);
8640b57cec5SDimitry Andric SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
8650b57cec5SDimitry Andric DL, MVT::i32, RHS, Sub1);
8660b57cec5SDimitry Andric
8670b57cec5SDimitry Andric SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
8680b57cec5SDimitry Andric
8695ffd83dbSDimitry Andric static const unsigned OpcMap[2][2][2] = {
8705ffd83dbSDimitry Andric {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
871e8d8bef9SDimitry Andric {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
8725ffd83dbSDimitry Andric {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
8735ffd83dbSDimitry Andric {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
8745ffd83dbSDimitry Andric
8755ffd83dbSDimitry Andric unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
8765ffd83dbSDimitry Andric unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
8770b57cec5SDimitry Andric
8780b57cec5SDimitry Andric SDNode *AddLo;
8790b57cec5SDimitry Andric if (!ConsumeCarry) {
8800b57cec5SDimitry Andric SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
8810b57cec5SDimitry Andric AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
8820b57cec5SDimitry Andric } else {
8830b57cec5SDimitry Andric SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
8840b57cec5SDimitry Andric AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
8850b57cec5SDimitry Andric }
8860b57cec5SDimitry Andric SDValue AddHiArgs[] = {
8870b57cec5SDimitry Andric SDValue(Hi0, 0),
8880b57cec5SDimitry Andric SDValue(Hi1, 0),
8890b57cec5SDimitry Andric SDValue(AddLo, 1)
8900b57cec5SDimitry Andric };
8910b57cec5SDimitry Andric SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
8920b57cec5SDimitry Andric
8930b57cec5SDimitry Andric SDValue RegSequenceArgs[] = {
8940b57cec5SDimitry Andric CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
8950b57cec5SDimitry Andric SDValue(AddLo,0),
8960b57cec5SDimitry Andric Sub0,
8970b57cec5SDimitry Andric SDValue(AddHi,0),
8980b57cec5SDimitry Andric Sub1,
8990b57cec5SDimitry Andric };
9000b57cec5SDimitry Andric SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
9010b57cec5SDimitry Andric MVT::i64, RegSequenceArgs);
9020b57cec5SDimitry Andric
9030b57cec5SDimitry Andric if (ProduceCarry) {
9040b57cec5SDimitry Andric // Replace the carry-use
9050b57cec5SDimitry Andric ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
9060b57cec5SDimitry Andric }
9070b57cec5SDimitry Andric
9080b57cec5SDimitry Andric // Replace the remaining uses.
9090b57cec5SDimitry Andric ReplaceNode(N, RegSequence);
9100b57cec5SDimitry Andric }
9110b57cec5SDimitry Andric
SelectAddcSubb(SDNode * N)9120b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
9130b57cec5SDimitry Andric SDLoc DL(N);
9140b57cec5SDimitry Andric SDValue LHS = N->getOperand(0);
9150b57cec5SDimitry Andric SDValue RHS = N->getOperand(1);
9160b57cec5SDimitry Andric SDValue CI = N->getOperand(2);
9170b57cec5SDimitry Andric
9185ffd83dbSDimitry Andric if (N->isDivergent()) {
91906c3fb27SDimitry Andric unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64
9200b57cec5SDimitry Andric : AMDGPU::V_SUBB_U32_e64;
9210b57cec5SDimitry Andric CurDAG->SelectNodeTo(
9220b57cec5SDimitry Andric N, Opc, N->getVTList(),
9235ffd83dbSDimitry Andric {LHS, RHS, CI,
9245ffd83dbSDimitry Andric CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9255ffd83dbSDimitry Andric } else {
92606c3fb27SDimitry Andric unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO
9275ffd83dbSDimitry Andric : AMDGPU::S_SUB_CO_PSEUDO;
9285ffd83dbSDimitry Andric CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
9295ffd83dbSDimitry Andric }
9300b57cec5SDimitry Andric }
9310b57cec5SDimitry Andric
SelectUADDO_USUBO(SDNode * N)9320b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
9330b57cec5SDimitry Andric // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
9340b57cec5SDimitry Andric // carry out despite the _i32 name. These were renamed in VI to _U32.
9350b57cec5SDimitry Andric // FIXME: We should probably rename the opcodes here.
9365ffd83dbSDimitry Andric bool IsAdd = N->getOpcode() == ISD::UADDO;
9375ffd83dbSDimitry Andric bool IsVALU = N->isDivergent();
9385ffd83dbSDimitry Andric
9395ffd83dbSDimitry Andric for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
9405ffd83dbSDimitry Andric ++UI)
9415ffd83dbSDimitry Andric if (UI.getUse().getResNo() == 1) {
94206c3fb27SDimitry Andric if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) ||
94306c3fb27SDimitry Andric (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) {
9445ffd83dbSDimitry Andric IsVALU = true;
9455ffd83dbSDimitry Andric break;
9465ffd83dbSDimitry Andric }
9475ffd83dbSDimitry Andric }
9485ffd83dbSDimitry Andric
9495ffd83dbSDimitry Andric if (IsVALU) {
950e8d8bef9SDimitry Andric unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
9510b57cec5SDimitry Andric
9520b57cec5SDimitry Andric CurDAG->SelectNodeTo(
9530b57cec5SDimitry Andric N, Opc, N->getVTList(),
9540b57cec5SDimitry Andric {N->getOperand(0), N->getOperand(1),
9550b57cec5SDimitry Andric CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9565ffd83dbSDimitry Andric } else {
9575ffd83dbSDimitry Andric unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
9585ffd83dbSDimitry Andric : AMDGPU::S_USUBO_PSEUDO;
9595ffd83dbSDimitry Andric
9605ffd83dbSDimitry Andric CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
9615ffd83dbSDimitry Andric {N->getOperand(0), N->getOperand(1)});
9625ffd83dbSDimitry Andric }
9630b57cec5SDimitry Andric }
9640b57cec5SDimitry Andric
SelectFMA_W_CHAIN(SDNode * N)9650b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
9660b57cec5SDimitry Andric SDLoc SL(N);
9670b57cec5SDimitry Andric // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
9680b57cec5SDimitry Andric SDValue Ops[10];
9690b57cec5SDimitry Andric
9700b57cec5SDimitry Andric SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
9710b57cec5SDimitry Andric SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9720b57cec5SDimitry Andric SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
9730b57cec5SDimitry Andric Ops[8] = N->getOperand(0);
9740b57cec5SDimitry Andric Ops[9] = N->getOperand(4);
9750b57cec5SDimitry Andric
976349cc55cSDimitry Andric // If there are no source modifiers, prefer fmac over fma because it can use
977349cc55cSDimitry Andric // the smaller VOP2 encoding.
978349cc55cSDimitry Andric bool UseFMAC = Subtarget->hasDLInsts() &&
979349cc55cSDimitry Andric cast<ConstantSDNode>(Ops[0])->isZero() &&
980349cc55cSDimitry Andric cast<ConstantSDNode>(Ops[2])->isZero() &&
981349cc55cSDimitry Andric cast<ConstantSDNode>(Ops[4])->isZero();
982349cc55cSDimitry Andric unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
983349cc55cSDimitry Andric CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
9840b57cec5SDimitry Andric }
9850b57cec5SDimitry Andric
SelectFMUL_W_CHAIN(SDNode * N)9860b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
9870b57cec5SDimitry Andric SDLoc SL(N);
9880b57cec5SDimitry Andric // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
9890b57cec5SDimitry Andric SDValue Ops[8];
9900b57cec5SDimitry Andric
9910b57cec5SDimitry Andric SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
9920b57cec5SDimitry Andric SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9930b57cec5SDimitry Andric Ops[6] = N->getOperand(0);
9940b57cec5SDimitry Andric Ops[7] = N->getOperand(3);
9950b57cec5SDimitry Andric
9960b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
9970b57cec5SDimitry Andric }
9980b57cec5SDimitry Andric
9990b57cec5SDimitry Andric // We need to handle this here because tablegen doesn't support matching
10000b57cec5SDimitry Andric // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)10010b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
10020b57cec5SDimitry Andric SDLoc SL(N);
10030b57cec5SDimitry Andric EVT VT = N->getValueType(0);
10040b57cec5SDimitry Andric
10050b57cec5SDimitry Andric assert(VT == MVT::f32 || VT == MVT::f64);
10060b57cec5SDimitry Andric
10070b57cec5SDimitry Andric unsigned Opc
1008e8d8bef9SDimitry Andric = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
10090b57cec5SDimitry Andric
1010e8d8bef9SDimitry Andric // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
1011e8d8bef9SDimitry Andric // omod
1012e8d8bef9SDimitry Andric SDValue Ops[8];
1013e8d8bef9SDimitry Andric SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1014e8d8bef9SDimitry Andric SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1015e8d8bef9SDimitry Andric SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
10160b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10170b57cec5SDimitry Andric }
10180b57cec5SDimitry Andric
10190b57cec5SDimitry Andric // We need to handle this here because tablegen doesn't support matching
10200b57cec5SDimitry Andric // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)10210b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
10220b57cec5SDimitry Andric SDLoc SL(N);
10230b57cec5SDimitry Andric bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
102481ad6265SDimitry Andric unsigned Opc;
1025bdd1243dSDimitry Andric if (Subtarget->hasMADIntraFwdBug())
102681ad6265SDimitry Andric Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
102781ad6265SDimitry Andric : AMDGPU::V_MAD_U64_U32_gfx11_e64;
102881ad6265SDimitry Andric else
102981ad6265SDimitry Andric Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10300b57cec5SDimitry Andric
10310b57cec5SDimitry Andric SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10320b57cec5SDimitry Andric SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
10330b57cec5SDimitry Andric Clamp };
10340b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10350b57cec5SDimitry Andric }
10360b57cec5SDimitry Andric
10374824e7fdSDimitry Andric // We need to handle this here because tablegen doesn't support matching
10384824e7fdSDimitry Andric // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)10394824e7fdSDimitry Andric void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
10404824e7fdSDimitry Andric SDLoc SL(N);
10414824e7fdSDimitry Andric bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
104281ad6265SDimitry Andric unsigned Opc;
1043bdd1243dSDimitry Andric if (Subtarget->hasMADIntraFwdBug())
104481ad6265SDimitry Andric Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
104581ad6265SDimitry Andric : AMDGPU::V_MAD_U64_U32_gfx11_e64;
104681ad6265SDimitry Andric else
104781ad6265SDimitry Andric Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10484824e7fdSDimitry Andric
10494824e7fdSDimitry Andric SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
10504824e7fdSDimitry Andric SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10514824e7fdSDimitry Andric SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
10524824e7fdSDimitry Andric SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
10534824e7fdSDimitry Andric if (!SDValue(N, 0).use_empty()) {
10544824e7fdSDimitry Andric SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
10554824e7fdSDimitry Andric SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
10564824e7fdSDimitry Andric MVT::i32, SDValue(Mad, 0), Sub0);
10574824e7fdSDimitry Andric ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
10584824e7fdSDimitry Andric }
10594824e7fdSDimitry Andric if (!SDValue(N, 1).use_empty()) {
10604824e7fdSDimitry Andric SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
10614824e7fdSDimitry Andric SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
10624824e7fdSDimitry Andric MVT::i32, SDValue(Mad, 0), Sub1);
10634824e7fdSDimitry Andric ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
10644824e7fdSDimitry Andric }
10654824e7fdSDimitry Andric CurDAG->RemoveDeadNode(N);
10664824e7fdSDimitry Andric }
10674824e7fdSDimitry Andric
isDSOffsetLegal(SDValue Base,unsigned Offset) const1068e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1069e8d8bef9SDimitry Andric if (!isUInt<16>(Offset))
10700b57cec5SDimitry Andric return false;
10710b57cec5SDimitry Andric
1072e8d8bef9SDimitry Andric if (!Base || Subtarget->hasUsableDSOffset() ||
10730b57cec5SDimitry Andric Subtarget->unsafeDSOffsetFoldingEnabled())
10740b57cec5SDimitry Andric return true;
10750b57cec5SDimitry Andric
10760b57cec5SDimitry Andric // On Southern Islands instruction with a negative base value and an offset
10770b57cec5SDimitry Andric // don't seem to work.
10780b57cec5SDimitry Andric return CurDAG->SignBitIsZero(Base);
10790b57cec5SDimitry Andric }
10800b57cec5SDimitry Andric
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const10810b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
10820b57cec5SDimitry Andric SDValue &Offset) const {
10830b57cec5SDimitry Andric SDLoc DL(Addr);
10840b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr)) {
10850b57cec5SDimitry Andric SDValue N0 = Addr.getOperand(0);
10860b57cec5SDimitry Andric SDValue N1 = Addr.getOperand(1);
10870b57cec5SDimitry Andric ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1088e8d8bef9SDimitry Andric if (isDSOffsetLegal(N0, C1->getSExtValue())) {
10890b57cec5SDimitry Andric // (add n0, c0)
10900b57cec5SDimitry Andric Base = N0;
10910b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
10920b57cec5SDimitry Andric return true;
10930b57cec5SDimitry Andric }
10940b57cec5SDimitry Andric } else if (Addr.getOpcode() == ISD::SUB) {
10950b57cec5SDimitry Andric // sub C, x -> add (sub 0, x), C
10960b57cec5SDimitry Andric if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
10970b57cec5SDimitry Andric int64_t ByteOffset = C->getSExtValue();
1098e8d8bef9SDimitry Andric if (isDSOffsetLegal(SDValue(), ByteOffset)) {
10990b57cec5SDimitry Andric SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
11000b57cec5SDimitry Andric
11010b57cec5SDimitry Andric // XXX - This is kind of hacky. Create a dummy sub node so we can check
11020b57cec5SDimitry Andric // the known bits in isDSOffsetLegal. We need to emit the selected node
11030b57cec5SDimitry Andric // here, so this is thrown away.
11040b57cec5SDimitry Andric SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
11050b57cec5SDimitry Andric Zero, Addr.getOperand(1));
11060b57cec5SDimitry Andric
1107e8d8bef9SDimitry Andric if (isDSOffsetLegal(Sub, ByteOffset)) {
11080b57cec5SDimitry Andric SmallVector<SDValue, 3> Opnds;
11090b57cec5SDimitry Andric Opnds.push_back(Zero);
11100b57cec5SDimitry Andric Opnds.push_back(Addr.getOperand(1));
11110b57cec5SDimitry Andric
11120b57cec5SDimitry Andric // FIXME: Select to VOP3 version for with-carry.
1113e8d8bef9SDimitry Andric unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
11140b57cec5SDimitry Andric if (Subtarget->hasAddNoCarry()) {
11150b57cec5SDimitry Andric SubOp = AMDGPU::V_SUB_U32_e64;
11160b57cec5SDimitry Andric Opnds.push_back(
11170b57cec5SDimitry Andric CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
11180b57cec5SDimitry Andric }
11190b57cec5SDimitry Andric
11200b57cec5SDimitry Andric MachineSDNode *MachineSub =
11210b57cec5SDimitry Andric CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
11220b57cec5SDimitry Andric
11230b57cec5SDimitry Andric Base = SDValue(MachineSub, 0);
11240b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
11250b57cec5SDimitry Andric return true;
11260b57cec5SDimitry Andric }
11270b57cec5SDimitry Andric }
11280b57cec5SDimitry Andric }
11290b57cec5SDimitry Andric } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
11300b57cec5SDimitry Andric // If we have a constant address, prefer to put the constant into the
11310b57cec5SDimitry Andric // offset. This can save moves to load the constant address since multiple
11320b57cec5SDimitry Andric // operations can share the zero base address register, and enables merging
11330b57cec5SDimitry Andric // into read2 / write2 instructions.
11340b57cec5SDimitry Andric
11350b57cec5SDimitry Andric SDLoc DL(Addr);
11360b57cec5SDimitry Andric
1137e8d8bef9SDimitry Andric if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
11380b57cec5SDimitry Andric SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
11390b57cec5SDimitry Andric MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
11400b57cec5SDimitry Andric DL, MVT::i32, Zero);
11410b57cec5SDimitry Andric Base = SDValue(MovZero, 0);
11420b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
11430b57cec5SDimitry Andric return true;
11440b57cec5SDimitry Andric }
11450b57cec5SDimitry Andric }
11460b57cec5SDimitry Andric
11470b57cec5SDimitry Andric // default case
11480b57cec5SDimitry Andric Base = Addr;
11490b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
11500b57cec5SDimitry Andric return true;
11510b57cec5SDimitry Andric }
11520b57cec5SDimitry Andric
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const1153e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1154e8d8bef9SDimitry Andric unsigned Offset1,
1155e8d8bef9SDimitry Andric unsigned Size) const {
1156e8d8bef9SDimitry Andric if (Offset0 % Size != 0 || Offset1 % Size != 0)
1157e8d8bef9SDimitry Andric return false;
1158e8d8bef9SDimitry Andric if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1159e8d8bef9SDimitry Andric return false;
1160e8d8bef9SDimitry Andric
1161e8d8bef9SDimitry Andric if (!Base || Subtarget->hasUsableDSOffset() ||
1162e8d8bef9SDimitry Andric Subtarget->unsafeDSOffsetFoldingEnabled())
1163e8d8bef9SDimitry Andric return true;
1164e8d8bef9SDimitry Andric
1165e8d8bef9SDimitry Andric // On Southern Islands instruction with a negative base value and an offset
1166e8d8bef9SDimitry Andric // don't seem to work.
1167e8d8bef9SDimitry Andric return CurDAG->SignBitIsZero(Base);
1168e8d8bef9SDimitry Andric }
1169e8d8bef9SDimitry Andric
11705f757f3fSDimitry Andric // Return whether the operation has NoUnsignedWrap property.
isNoUnsignedWrap(SDValue Addr)11715f757f3fSDimitry Andric static bool isNoUnsignedWrap(SDValue Addr) {
11725f757f3fSDimitry Andric return (Addr.getOpcode() == ISD::ADD &&
11735f757f3fSDimitry Andric Addr->getFlags().hasNoUnsignedWrap()) ||
11745f757f3fSDimitry Andric Addr->getOpcode() == ISD::OR;
11755f757f3fSDimitry Andric }
11765f757f3fSDimitry Andric
11775f757f3fSDimitry Andric // Check that the base address of flat scratch load/store in the form of `base +
11785f757f3fSDimitry Andric // offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
11795f757f3fSDimitry Andric // requirement). We always treat the first operand as the base address here.
isFlatScratchBaseLegal(SDValue Addr) const11805f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
11815f757f3fSDimitry Andric if (isNoUnsignedWrap(Addr))
118206c3fb27SDimitry Andric return true;
11835f757f3fSDimitry Andric
11845f757f3fSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
11855f757f3fSDimitry Andric // values.
11867a6dacacSDimitry Andric if (Subtarget->hasSignedScratchOffsets())
11875f757f3fSDimitry Andric return true;
11885f757f3fSDimitry Andric
11895f757f3fSDimitry Andric auto LHS = Addr.getOperand(0);
11905f757f3fSDimitry Andric auto RHS = Addr.getOperand(1);
11915f757f3fSDimitry Andric
11925f757f3fSDimitry Andric // If the immediate offset is negative and within certain range, the base
11935f757f3fSDimitry Andric // address cannot also be negative. If the base is also negative, the sum
11945f757f3fSDimitry Andric // would be either negative or much larger than the valid range of scratch
11955f757f3fSDimitry Andric // memory a thread can access.
11965f757f3fSDimitry Andric ConstantSDNode *ImmOp = nullptr;
11975f757f3fSDimitry Andric if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
11985f757f3fSDimitry Andric if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
11995f757f3fSDimitry Andric return true;
12005f757f3fSDimitry Andric }
12015f757f3fSDimitry Andric
12025f757f3fSDimitry Andric return CurDAG->SignBitIsZero(LHS);
12035f757f3fSDimitry Andric }
12045f757f3fSDimitry Andric
12055f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
12065f757f3fSDimitry Andric // of: SGPR + VGPR.
isFlatScratchBaseLegalSV(SDValue Addr) const12075f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
12085f757f3fSDimitry Andric if (isNoUnsignedWrap(Addr))
12095f757f3fSDimitry Andric return true;
12105f757f3fSDimitry Andric
12117a6dacacSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
12127a6dacacSDimitry Andric // values.
12137a6dacacSDimitry Andric if (Subtarget->hasSignedScratchOffsets())
12147a6dacacSDimitry Andric return true;
12157a6dacacSDimitry Andric
12165f757f3fSDimitry Andric auto LHS = Addr.getOperand(0);
12175f757f3fSDimitry Andric auto RHS = Addr.getOperand(1);
12185f757f3fSDimitry Andric return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
12195f757f3fSDimitry Andric }
12205f757f3fSDimitry Andric
12215f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
12225f757f3fSDimitry Andric // of: SGPR + VGPR + Imm.
isFlatScratchBaseLegalSVImm(SDValue Addr) const12235f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
12247a6dacacSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
12257a6dacacSDimitry Andric // values.
12267a6dacacSDimitry Andric if (AMDGPU::isGFX12Plus(*Subtarget))
12277a6dacacSDimitry Andric return true;
12287a6dacacSDimitry Andric
12295f757f3fSDimitry Andric auto Base = Addr.getOperand(0);
12305f757f3fSDimitry Andric auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));
12315f757f3fSDimitry Andric // If the immediate offset is negative and within certain range, the base
12325f757f3fSDimitry Andric // address cannot also be negative. If the base is also negative, the sum
12335f757f3fSDimitry Andric // would be either negative or much larger than the valid range of scratch
12345f757f3fSDimitry Andric // memory a thread can access.
12355f757f3fSDimitry Andric if (isNoUnsignedWrap(Base) &&
12365f757f3fSDimitry Andric (isNoUnsignedWrap(Addr) ||
12375f757f3fSDimitry Andric (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
12385f757f3fSDimitry Andric return true;
12395f757f3fSDimitry Andric
12405f757f3fSDimitry Andric auto LHS = Base.getOperand(0);
12415f757f3fSDimitry Andric auto RHS = Base.getOperand(1);
12425f757f3fSDimitry Andric return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
124306c3fb27SDimitry Andric }
124406c3fb27SDimitry Andric
12450b57cec5SDimitry Andric // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const12460b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
12470b57cec5SDimitry Andric SDValue &Offset0,
12480b57cec5SDimitry Andric SDValue &Offset1) const {
1249e8d8bef9SDimitry Andric return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1250e8d8bef9SDimitry Andric }
1251e8d8bef9SDimitry Andric
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const1252e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1253e8d8bef9SDimitry Andric SDValue &Offset0,
1254e8d8bef9SDimitry Andric SDValue &Offset1) const {
1255e8d8bef9SDimitry Andric return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1256e8d8bef9SDimitry Andric }
1257e8d8bef9SDimitry Andric
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const1258e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1259e8d8bef9SDimitry Andric SDValue &Offset0, SDValue &Offset1,
1260e8d8bef9SDimitry Andric unsigned Size) const {
12610b57cec5SDimitry Andric SDLoc DL(Addr);
12620b57cec5SDimitry Andric
12630b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr)) {
12640b57cec5SDimitry Andric SDValue N0 = Addr.getOperand(0);
12650b57cec5SDimitry Andric SDValue N1 = Addr.getOperand(1);
12660b57cec5SDimitry Andric ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1267e8d8bef9SDimitry Andric unsigned OffsetValue0 = C1->getZExtValue();
1268e8d8bef9SDimitry Andric unsigned OffsetValue1 = OffsetValue0 + Size;
1269e8d8bef9SDimitry Andric
12700b57cec5SDimitry Andric // (add n0, c0)
1271e8d8bef9SDimitry Andric if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
12720b57cec5SDimitry Andric Base = N0;
1273e8d8bef9SDimitry Andric Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1274e8d8bef9SDimitry Andric Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
12750b57cec5SDimitry Andric return true;
12760b57cec5SDimitry Andric }
12770b57cec5SDimitry Andric } else if (Addr.getOpcode() == ISD::SUB) {
12780b57cec5SDimitry Andric // sub C, x -> add (sub 0, x), C
1279e8d8bef9SDimitry Andric if (const ConstantSDNode *C =
1280e8d8bef9SDimitry Andric dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1281e8d8bef9SDimitry Andric unsigned OffsetValue0 = C->getZExtValue();
1282e8d8bef9SDimitry Andric unsigned OffsetValue1 = OffsetValue0 + Size;
12830b57cec5SDimitry Andric
1284e8d8bef9SDimitry Andric if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
12850b57cec5SDimitry Andric SDLoc DL(Addr);
12860b57cec5SDimitry Andric SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
12870b57cec5SDimitry Andric
12880b57cec5SDimitry Andric // XXX - This is kind of hacky. Create a dummy sub node so we can check
12890b57cec5SDimitry Andric // the known bits in isDSOffsetLegal. We need to emit the selected node
12900b57cec5SDimitry Andric // here, so this is thrown away.
1291e8d8bef9SDimitry Andric SDValue Sub =
1292e8d8bef9SDimitry Andric CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
12930b57cec5SDimitry Andric
1294e8d8bef9SDimitry Andric if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
12950b57cec5SDimitry Andric SmallVector<SDValue, 3> Opnds;
12960b57cec5SDimitry Andric Opnds.push_back(Zero);
12970b57cec5SDimitry Andric Opnds.push_back(Addr.getOperand(1));
1298e8d8bef9SDimitry Andric unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
12990b57cec5SDimitry Andric if (Subtarget->hasAddNoCarry()) {
13000b57cec5SDimitry Andric SubOp = AMDGPU::V_SUB_U32_e64;
13010b57cec5SDimitry Andric Opnds.push_back(
13020b57cec5SDimitry Andric CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
13030b57cec5SDimitry Andric }
13040b57cec5SDimitry Andric
1305e8d8bef9SDimitry Andric MachineSDNode *MachineSub = CurDAG->getMachineNode(
1306e8d8bef9SDimitry Andric SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andric Base = SDValue(MachineSub, 0);
1309e8d8bef9SDimitry Andric Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1310e8d8bef9SDimitry Andric Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
13110b57cec5SDimitry Andric return true;
13120b57cec5SDimitry Andric }
13130b57cec5SDimitry Andric }
13140b57cec5SDimitry Andric }
13150b57cec5SDimitry Andric } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1316e8d8bef9SDimitry Andric unsigned OffsetValue0 = CAddr->getZExtValue();
1317e8d8bef9SDimitry Andric unsigned OffsetValue1 = OffsetValue0 + Size;
13180b57cec5SDimitry Andric
1319e8d8bef9SDimitry Andric if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
13200b57cec5SDimitry Andric SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1321e8d8bef9SDimitry Andric MachineSDNode *MovZero =
1322e8d8bef9SDimitry Andric CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
13230b57cec5SDimitry Andric Base = SDValue(MovZero, 0);
1324e8d8bef9SDimitry Andric Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1325e8d8bef9SDimitry Andric Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
13260b57cec5SDimitry Andric return true;
13270b57cec5SDimitry Andric }
13280b57cec5SDimitry Andric }
13290b57cec5SDimitry Andric
13300b57cec5SDimitry Andric // default case
13310b57cec5SDimitry Andric
13320b57cec5SDimitry Andric Base = Addr;
13330b57cec5SDimitry Andric Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
13340b57cec5SDimitry Andric Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
13350b57cec5SDimitry Andric return true;
13360b57cec5SDimitry Andric }
13370b57cec5SDimitry Andric
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const1338fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
1339fe6060f1SDimitry Andric SDValue &SOffset, SDValue &Offset,
1340fe6060f1SDimitry Andric SDValue &Offen, SDValue &Idxen,
1341fe6060f1SDimitry Andric SDValue &Addr64) const {
13420b57cec5SDimitry Andric // Subtarget prefers to use flat instruction
13435ffd83dbSDimitry Andric // FIXME: This should be a pattern predicate and not reach here
13440b57cec5SDimitry Andric if (Subtarget->useFlatForGlobal())
13450b57cec5SDimitry Andric return false;
13460b57cec5SDimitry Andric
13470b57cec5SDimitry Andric SDLoc DL(Addr);
13480b57cec5SDimitry Andric
13490b57cec5SDimitry Andric Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13500b57cec5SDimitry Andric Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
13510b57cec5SDimitry Andric Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
13525f757f3fSDimitry Andric SOffset = Subtarget->hasRestrictedSOffset()
13535f757f3fSDimitry Andric ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
13545f757f3fSDimitry Andric : CurDAG->getTargetConstant(0, DL, MVT::i32);
13550b57cec5SDimitry Andric
13560b57cec5SDimitry Andric ConstantSDNode *C1 = nullptr;
13570b57cec5SDimitry Andric SDValue N0 = Addr;
13580b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr)) {
13590b57cec5SDimitry Andric C1 = cast<ConstantSDNode>(Addr.getOperand(1));
13600b57cec5SDimitry Andric if (isUInt<32>(C1->getZExtValue()))
13610b57cec5SDimitry Andric N0 = Addr.getOperand(0);
13620b57cec5SDimitry Andric else
13630b57cec5SDimitry Andric C1 = nullptr;
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric
13660b57cec5SDimitry Andric if (N0.getOpcode() == ISD::ADD) {
13670b57cec5SDimitry Andric // (add N2, N3) -> addr64, or
13680b57cec5SDimitry Andric // (add (add N2, N3), C1) -> addr64
13690b57cec5SDimitry Andric SDValue N2 = N0.getOperand(0);
13700b57cec5SDimitry Andric SDValue N3 = N0.getOperand(1);
13710b57cec5SDimitry Andric Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
13720b57cec5SDimitry Andric
13730b57cec5SDimitry Andric if (N2->isDivergent()) {
13740b57cec5SDimitry Andric if (N3->isDivergent()) {
13750b57cec5SDimitry Andric // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
13760b57cec5SDimitry Andric // addr64, and construct the resource from a 0 address.
13770b57cec5SDimitry Andric Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
13780b57cec5SDimitry Andric VAddr = N0;
13790b57cec5SDimitry Andric } else {
13800b57cec5SDimitry Andric // N2 is divergent, N3 is not.
13810b57cec5SDimitry Andric Ptr = N3;
13820b57cec5SDimitry Andric VAddr = N2;
13830b57cec5SDimitry Andric }
13840b57cec5SDimitry Andric } else {
13850b57cec5SDimitry Andric // N2 is not divergent.
13860b57cec5SDimitry Andric Ptr = N2;
13870b57cec5SDimitry Andric VAddr = N3;
13880b57cec5SDimitry Andric }
138906c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
13900b57cec5SDimitry Andric } else if (N0->isDivergent()) {
13910b57cec5SDimitry Andric // N0 is divergent. Use it as the addr64, and construct the resource from a
13920b57cec5SDimitry Andric // 0 address.
13930b57cec5SDimitry Andric Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
13940b57cec5SDimitry Andric VAddr = N0;
13950b57cec5SDimitry Andric Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
13960b57cec5SDimitry Andric } else {
13970b57cec5SDimitry Andric // N0 -> offset, or
13980b57cec5SDimitry Andric // (N0 + C1) -> offset
13990b57cec5SDimitry Andric VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
14000b57cec5SDimitry Andric Ptr = N0;
14010b57cec5SDimitry Andric }
14020b57cec5SDimitry Andric
14030b57cec5SDimitry Andric if (!C1) {
14040b57cec5SDimitry Andric // No offset.
140506c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
14060b57cec5SDimitry Andric return true;
14070b57cec5SDimitry Andric }
14080b57cec5SDimitry Andric
14095f757f3fSDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
14105f757f3fSDimitry Andric if (TII->isLegalMUBUFImmOffset(C1->getZExtValue())) {
14110b57cec5SDimitry Andric // Legal offset for instruction.
141206c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
14130b57cec5SDimitry Andric return true;
14140b57cec5SDimitry Andric }
14150b57cec5SDimitry Andric
14160b57cec5SDimitry Andric // Illegal offset, store it in soffset.
141706c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
14180b57cec5SDimitry Andric SOffset =
14190b57cec5SDimitry Andric SDValue(CurDAG->getMachineNode(
14200b57cec5SDimitry Andric AMDGPU::S_MOV_B32, DL, MVT::i32,
14210b57cec5SDimitry Andric CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
14220b57cec5SDimitry Andric 0);
14230b57cec5SDimitry Andric return true;
14240b57cec5SDimitry Andric }
14250b57cec5SDimitry Andric
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const14260b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
14270b57cec5SDimitry Andric SDValue &VAddr, SDValue &SOffset,
1428fe6060f1SDimitry Andric SDValue &Offset) const {
14290b57cec5SDimitry Andric SDValue Ptr, Offen, Idxen, Addr64;
14300b57cec5SDimitry Andric
14310b57cec5SDimitry Andric // addr64 bit was removed for volcanic islands.
14325ffd83dbSDimitry Andric // FIXME: This should be a pattern predicate and not reach here
14330b57cec5SDimitry Andric if (!Subtarget->hasAddr64())
14340b57cec5SDimitry Andric return false;
14350b57cec5SDimitry Andric
1436fe6060f1SDimitry Andric if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
14370b57cec5SDimitry Andric return false;
14380b57cec5SDimitry Andric
14390b57cec5SDimitry Andric ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
14400b57cec5SDimitry Andric if (C->getSExtValue()) {
14410b57cec5SDimitry Andric SDLoc DL(Addr);
14420b57cec5SDimitry Andric
14430b57cec5SDimitry Andric const SITargetLowering& Lowering =
14440b57cec5SDimitry Andric *static_cast<const SITargetLowering*>(getTargetLowering());
14450b57cec5SDimitry Andric
14460b57cec5SDimitry Andric SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
14470b57cec5SDimitry Andric return true;
14480b57cec5SDimitry Andric }
14490b57cec5SDimitry Andric
14500b57cec5SDimitry Andric return false;
14510b57cec5SDimitry Andric }
14520b57cec5SDimitry Andric
foldFrameIndex(SDValue N) const14530b57cec5SDimitry Andric std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
14545ffd83dbSDimitry Andric SDLoc DL(N);
14550b57cec5SDimitry Andric
1456e8d8bef9SDimitry Andric auto *FI = dyn_cast<FrameIndexSDNode>(N);
1457e8d8bef9SDimitry Andric SDValue TFI =
1458e8d8bef9SDimitry Andric FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
14590b57cec5SDimitry Andric
1460e8d8bef9SDimitry Andric // We rebase the base address into an absolute stack address and hence
1461e8d8bef9SDimitry Andric // use constant 0 for soffset. This value must be retained until
1462e8d8bef9SDimitry Andric // frame elimination and eliminateFrameIndex will choose the appropriate
1463e8d8bef9SDimitry Andric // frame register if need be.
1464bdd1243dSDimitry Andric return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
14650b57cec5SDimitry Andric }
14660b57cec5SDimitry Andric
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const14670b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
14680b57cec5SDimitry Andric SDValue Addr, SDValue &Rsrc,
14690b57cec5SDimitry Andric SDValue &VAddr, SDValue &SOffset,
14700b57cec5SDimitry Andric SDValue &ImmOffset) const {
14710b57cec5SDimitry Andric
14720b57cec5SDimitry Andric SDLoc DL(Addr);
14730b57cec5SDimitry Andric MachineFunction &MF = CurDAG->getMachineFunction();
14740b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
14750b57cec5SDimitry Andric
14760b57cec5SDimitry Andric Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
14770b57cec5SDimitry Andric
14780b57cec5SDimitry Andric if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
14795ffd83dbSDimitry Andric int64_t Imm = CAddr->getSExtValue();
14805ffd83dbSDimitry Andric const int64_t NullPtr =
14815ffd83dbSDimitry Andric AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
14825ffd83dbSDimitry Andric // Don't fold null pointer.
14835ffd83dbSDimitry Andric if (Imm != NullPtr) {
14845f757f3fSDimitry Andric const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
148506c3fb27SDimitry Andric SDValue HighBits =
148606c3fb27SDimitry Andric CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
14875ffd83dbSDimitry Andric MachineSDNode *MovHighBits = CurDAG->getMachineNode(
14885ffd83dbSDimitry Andric AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
14890b57cec5SDimitry Andric VAddr = SDValue(MovHighBits, 0);
14900b57cec5SDimitry Andric
1491fe6060f1SDimitry Andric SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
149206c3fb27SDimitry Andric ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
14930b57cec5SDimitry Andric return true;
14940b57cec5SDimitry Andric }
14955ffd83dbSDimitry Andric }
14960b57cec5SDimitry Andric
14970b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr)) {
14980b57cec5SDimitry Andric // (add n0, c1)
14990b57cec5SDimitry Andric
15000b57cec5SDimitry Andric SDValue N0 = Addr.getOperand(0);
15010fca6ea1SDimitry Andric uint64_t C1 = Addr.getConstantOperandVal(1);
15020b57cec5SDimitry Andric
15030b57cec5SDimitry Andric // Offsets in vaddr must be positive if range checking is enabled.
15040b57cec5SDimitry Andric //
15050b57cec5SDimitry Andric // The total computation of vaddr + soffset + offset must not overflow. If
15060b57cec5SDimitry Andric // vaddr is negative, even if offset is 0 the sgpr offset add will end up
15070b57cec5SDimitry Andric // overflowing.
15080b57cec5SDimitry Andric //
15090b57cec5SDimitry Andric // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
15100b57cec5SDimitry Andric // always perform a range check. If a negative vaddr base index was used,
15110b57cec5SDimitry Andric // this would fail the range check. The overall address computation would
15120b57cec5SDimitry Andric // compute a valid address, but this doesn't happen due to the range
15130b57cec5SDimitry Andric // check. For out-of-bounds MUBUF loads, a 0 is returned.
15140b57cec5SDimitry Andric //
15150b57cec5SDimitry Andric // Therefore it should be safe to fold any VGPR offset on gfx9 into the
15160b57cec5SDimitry Andric // MUBUF vaddr, but not on older subtargets which can only do this if the
15170b57cec5SDimitry Andric // sign bit is known 0.
15185f757f3fSDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
15190fca6ea1SDimitry Andric if (TII->isLegalMUBUFImmOffset(C1) &&
15200b57cec5SDimitry Andric (!Subtarget->privateMemoryResourceIsRangeChecked() ||
15210b57cec5SDimitry Andric CurDAG->SignBitIsZero(N0))) {
15220b57cec5SDimitry Andric std::tie(VAddr, SOffset) = foldFrameIndex(N0);
15230fca6ea1SDimitry Andric ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);
15240b57cec5SDimitry Andric return true;
15250b57cec5SDimitry Andric }
15260b57cec5SDimitry Andric }
15270b57cec5SDimitry Andric
15280b57cec5SDimitry Andric // (node)
15290b57cec5SDimitry Andric std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
153006c3fb27SDimitry Andric ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
15310b57cec5SDimitry Andric return true;
15320b57cec5SDimitry Andric }
15330b57cec5SDimitry Andric
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)1534fe6060f1SDimitry Andric static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1535fe6060f1SDimitry Andric if (Val.getOpcode() != ISD::CopyFromReg)
1536fe6060f1SDimitry Andric return false;
1537bdd1243dSDimitry Andric auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1538bdd1243dSDimitry Andric if (!Reg.isPhysical())
1539bdd1243dSDimitry Andric return false;
1540bdd1243dSDimitry Andric auto RC = TRI.getPhysRegBaseClass(Reg);
1541fe6060f1SDimitry Andric return RC && TRI.isSGPRClass(RC);
1542fe6060f1SDimitry Andric }
1543fe6060f1SDimitry Andric
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const15440b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
15450b57cec5SDimitry Andric SDValue Addr,
15460b57cec5SDimitry Andric SDValue &SRsrc,
15470b57cec5SDimitry Andric SDValue &SOffset,
15480b57cec5SDimitry Andric SDValue &Offset) const {
1549fe6060f1SDimitry Andric const SIRegisterInfo *TRI =
1550fe6060f1SDimitry Andric static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
15515f757f3fSDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
15520b57cec5SDimitry Andric MachineFunction &MF = CurDAG->getMachineFunction();
15530b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1554fe6060f1SDimitry Andric SDLoc DL(Addr);
1555fe6060f1SDimitry Andric
1556fe6060f1SDimitry Andric // CopyFromReg <sgpr>
1557fe6060f1SDimitry Andric if (IsCopyFromSGPR(*TRI, Addr)) {
1558fe6060f1SDimitry Andric SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1559fe6060f1SDimitry Andric SOffset = Addr;
156006c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1561fe6060f1SDimitry Andric return true;
1562fe6060f1SDimitry Andric }
1563fe6060f1SDimitry Andric
1564fe6060f1SDimitry Andric ConstantSDNode *CAddr;
1565fe6060f1SDimitry Andric if (Addr.getOpcode() == ISD::ADD) {
1566fe6060f1SDimitry Andric // Add (CopyFromReg <sgpr>) <constant>
1567fe6060f1SDimitry Andric CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
15685f757f3fSDimitry Andric if (!CAddr || !TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1569fe6060f1SDimitry Andric return false;
1570fe6060f1SDimitry Andric if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1571fe6060f1SDimitry Andric return false;
1572fe6060f1SDimitry Andric
1573fe6060f1SDimitry Andric SOffset = Addr.getOperand(0);
1574fe6060f1SDimitry Andric } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
15755f757f3fSDimitry Andric TII->isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1576fe6060f1SDimitry Andric // <constant>
1577fe6060f1SDimitry Andric SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1578fe6060f1SDimitry Andric } else {
1579fe6060f1SDimitry Andric return false;
1580fe6060f1SDimitry Andric }
15810b57cec5SDimitry Andric
15820b57cec5SDimitry Andric SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
15830b57cec5SDimitry Andric
158406c3fb27SDimitry Andric Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32);
15850b57cec5SDimitry Andric return true;
15860b57cec5SDimitry Andric }
15870b57cec5SDimitry Andric
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const15880b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1589fe6060f1SDimitry Andric SDValue &SOffset, SDValue &Offset
1590fe6060f1SDimitry Andric ) const {
15910b57cec5SDimitry Andric SDValue Ptr, VAddr, Offen, Idxen, Addr64;
15925f757f3fSDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
15930b57cec5SDimitry Andric
1594fe6060f1SDimitry Andric if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
15950b57cec5SDimitry Andric return false;
15960b57cec5SDimitry Andric
15970b57cec5SDimitry Andric if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
15980b57cec5SDimitry Andric !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
15990b57cec5SDimitry Andric !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
16000b57cec5SDimitry Andric uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1601349cc55cSDimitry Andric APInt::getAllOnes(32).getZExtValue(); // Size
16020b57cec5SDimitry Andric SDLoc DL(Addr);
16030b57cec5SDimitry Andric
16040b57cec5SDimitry Andric const SITargetLowering& Lowering =
16050b57cec5SDimitry Andric *static_cast<const SITargetLowering*>(getTargetLowering());
16060b57cec5SDimitry Andric
16070b57cec5SDimitry Andric SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
16080b57cec5SDimitry Andric return true;
16090b57cec5SDimitry Andric }
16100b57cec5SDimitry Andric return false;
16110b57cec5SDimitry Andric }
16120b57cec5SDimitry Andric
SelectBUFSOffset(SDValue ByteOffsetNode,SDValue & SOffset) const16135f757f3fSDimitry Andric bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
16145f757f3fSDimitry Andric SDValue &SOffset) const {
1615297eecfbSDimitry Andric if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
16165f757f3fSDimitry Andric SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
16175f757f3fSDimitry Andric return true;
16185f757f3fSDimitry Andric }
16195f757f3fSDimitry Andric
16205f757f3fSDimitry Andric SOffset = ByteOffsetNode;
16215f757f3fSDimitry Andric return true;
16225f757f3fSDimitry Andric }
16235f757f3fSDimitry Andric
16248bcb0991SDimitry Andric // Find a load or store from corresponding pattern root.
16258bcb0991SDimitry Andric // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)16268bcb0991SDimitry Andric static MemSDNode* findMemSDNode(SDNode *N) {
16278bcb0991SDimitry Andric N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
16288bcb0991SDimitry Andric if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
16298bcb0991SDimitry Andric return MN;
16308bcb0991SDimitry Andric assert(isa<BuildVectorSDNode>(N));
16318bcb0991SDimitry Andric for (SDValue V : N->op_values())
16328bcb0991SDimitry Andric if (MemSDNode *MN =
16338bcb0991SDimitry Andric dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
16348bcb0991SDimitry Andric return MN;
16358bcb0991SDimitry Andric llvm_unreachable("cannot find MemSDNode in the pattern!");
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const1638fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1639fe6060f1SDimitry Andric SDValue &VAddr, SDValue &Offset,
1640fe6060f1SDimitry Andric uint64_t FlatVariant) const {
16418bcb0991SDimitry Andric int64_t OffsetVal = 0;
16428bcb0991SDimitry Andric
1643e8d8bef9SDimitry Andric unsigned AS = findMemSDNode(N)->getAddressSpace();
1644e8d8bef9SDimitry Andric
1645fe6060f1SDimitry Andric bool CanHaveFlatSegmentOffsetBug =
1646fe6060f1SDimitry Andric Subtarget->hasFlatSegmentOffsetBug() &&
1647fe6060f1SDimitry Andric FlatVariant == SIInstrFlags::FLAT &&
1648fe6060f1SDimitry Andric (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
1649fe6060f1SDimitry Andric
1650fe6060f1SDimitry Andric if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
16515ffd83dbSDimitry Andric SDValue N0, N1;
165206c3fb27SDimitry Andric if (isBaseWithConstantOffset64(Addr, N0, N1) &&
16535f757f3fSDimitry Andric (FlatVariant != SIInstrFlags::FlatScratch ||
16545f757f3fSDimitry Andric isFlatScratchBaseLegal(Addr))) {
1655fe6060f1SDimitry Andric int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
16568bcb0991SDimitry Andric
16578bcb0991SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
1658fe6060f1SDimitry Andric if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
16598bcb0991SDimitry Andric Addr = N0;
16608bcb0991SDimitry Andric OffsetVal = COffsetVal;
16618bcb0991SDimitry Andric } else {
16628bcb0991SDimitry Andric // If the offset doesn't fit, put the low bits into the offset field and
16638bcb0991SDimitry Andric // add the rest.
1664e8d8bef9SDimitry Andric //
1665e8d8bef9SDimitry Andric // For a FLAT instruction the hardware decides whether to access
1666e8d8bef9SDimitry Andric // global/scratch/shared memory based on the high bits of vaddr,
1667e8d8bef9SDimitry Andric // ignoring the offset field, so we have to ensure that when we add
1668e8d8bef9SDimitry Andric // remainder to vaddr it still points into the same underlying object.
1669e8d8bef9SDimitry Andric // The easiest way to do that is to make sure that we split the offset
1670e8d8bef9SDimitry Andric // into two pieces that are both >= 0 or both <= 0.
16718bcb0991SDimitry Andric
16728bcb0991SDimitry Andric SDLoc DL(N);
1673e8d8bef9SDimitry Andric uint64_t RemainderOffset;
16748bcb0991SDimitry Andric
1675fe6060f1SDimitry Andric std::tie(OffsetVal, RemainderOffset) =
1676fe6060f1SDimitry Andric TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
16778bcb0991SDimitry Andric
1678e8d8bef9SDimitry Andric SDValue AddOffsetLo =
1679e8d8bef9SDimitry Andric getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1680e8d8bef9SDimitry Andric SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1681e8d8bef9SDimitry Andric
1682e8d8bef9SDimitry Andric if (Addr.getValueType().getSizeInBits() == 32) {
1683e8d8bef9SDimitry Andric SmallVector<SDValue, 3> Opnds;
1684e8d8bef9SDimitry Andric Opnds.push_back(N0);
1685e8d8bef9SDimitry Andric Opnds.push_back(AddOffsetLo);
1686e8d8bef9SDimitry Andric unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1687e8d8bef9SDimitry Andric if (Subtarget->hasAddNoCarry()) {
1688e8d8bef9SDimitry Andric AddOp = AMDGPU::V_ADD_U32_e64;
1689e8d8bef9SDimitry Andric Opnds.push_back(Clamp);
16908bcb0991SDimitry Andric }
1691e8d8bef9SDimitry Andric Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
16928bcb0991SDimitry Andric } else {
16935ffd83dbSDimitry Andric // TODO: Should this try to use a scalar add pseudo if the base address
16945ffd83dbSDimitry Andric // is uniform and saddr is usable?
16958bcb0991SDimitry Andric SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
16968bcb0991SDimitry Andric SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
16978bcb0991SDimitry Andric
1698e8d8bef9SDimitry Andric SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1699e8d8bef9SDimitry Andric DL, MVT::i32, N0, Sub0);
1700e8d8bef9SDimitry Andric SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1701e8d8bef9SDimitry Andric DL, MVT::i32, N0, Sub1);
17028bcb0991SDimitry Andric
17035ffd83dbSDimitry Andric SDValue AddOffsetHi =
17045ffd83dbSDimitry Andric getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
17058bcb0991SDimitry Andric
17068bcb0991SDimitry Andric SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
17078bcb0991SDimitry Andric
17085ffd83dbSDimitry Andric SDNode *Add =
1709e8d8bef9SDimitry Andric CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
17108bcb0991SDimitry Andric {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
17118bcb0991SDimitry Andric
17128bcb0991SDimitry Andric SDNode *Addc = CurDAG->getMachineNode(
17138bcb0991SDimitry Andric AMDGPU::V_ADDC_U32_e64, DL, VTs,
17148bcb0991SDimitry Andric {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
17158bcb0991SDimitry Andric
17168bcb0991SDimitry Andric SDValue RegSequenceArgs[] = {
17178bcb0991SDimitry Andric CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
17185ffd83dbSDimitry Andric SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
17198bcb0991SDimitry Andric
17208bcb0991SDimitry Andric Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
17215ffd83dbSDimitry Andric MVT::i64, RegSequenceArgs),
17225ffd83dbSDimitry Andric 0);
17235ffd83dbSDimitry Andric }
17248bcb0991SDimitry Andric }
17258bcb0991SDimitry Andric }
1726e8d8bef9SDimitry Andric }
17278bcb0991SDimitry Andric
17288bcb0991SDimitry Andric VAddr = Addr;
17295f757f3fSDimitry Andric Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
17308bcb0991SDimitry Andric return true;
17310b57cec5SDimitry Andric }
17320b57cec5SDimitry Andric
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1733fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1734fe6060f1SDimitry Andric SDValue &VAddr,
1735fe6060f1SDimitry Andric SDValue &Offset) const {
1736fe6060f1SDimitry Andric return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1737fe6060f1SDimitry Andric }
1738fe6060f1SDimitry Andric
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1739fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1740fe6060f1SDimitry Andric SDValue &VAddr,
1741fe6060f1SDimitry Andric SDValue &Offset) const {
1742fe6060f1SDimitry Andric return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1743fe6060f1SDimitry Andric }
1744fe6060f1SDimitry Andric
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1745fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1746fe6060f1SDimitry Andric SDValue &VAddr,
1747fe6060f1SDimitry Andric SDValue &Offset) const {
1748fe6060f1SDimitry Andric return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1749fe6060f1SDimitry Andric SIInstrFlags::FlatScratch);
1750fe6060f1SDimitry Andric }
1751fe6060f1SDimitry Andric
1752e8d8bef9SDimitry Andric // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)1753e8d8bef9SDimitry Andric static SDValue matchZExtFromI32(SDValue Op) {
1754e8d8bef9SDimitry Andric if (Op.getOpcode() != ISD::ZERO_EXTEND)
1755e8d8bef9SDimitry Andric return SDValue();
1756e8d8bef9SDimitry Andric
1757e8d8bef9SDimitry Andric SDValue ExtSrc = Op.getOperand(0);
1758e8d8bef9SDimitry Andric return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
17590b57cec5SDimitry Andric }
17600b57cec5SDimitry Andric
1761e8d8bef9SDimitry Andric // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const1762e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
17630b57cec5SDimitry Andric SDValue Addr,
1764e8d8bef9SDimitry Andric SDValue &SAddr,
1765e8d8bef9SDimitry Andric SDValue &VOffset,
1766e8d8bef9SDimitry Andric SDValue &Offset) const {
1767e8d8bef9SDimitry Andric int64_t ImmOffset = 0;
1768e8d8bef9SDimitry Andric
1769e8d8bef9SDimitry Andric // Match the immediate offset first, which canonically is moved as low as
1770e8d8bef9SDimitry Andric // possible.
1771e8d8bef9SDimitry Andric
1772e8d8bef9SDimitry Andric SDValue LHS, RHS;
1773e8d8bef9SDimitry Andric if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1774e8d8bef9SDimitry Andric int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1775e8d8bef9SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
1776e8d8bef9SDimitry Andric
1777fe6060f1SDimitry Andric if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1778fe6060f1SDimitry Andric SIInstrFlags::FlatGlobal)) {
1779e8d8bef9SDimitry Andric Addr = LHS;
1780e8d8bef9SDimitry Andric ImmOffset = COffsetVal;
1781fe6060f1SDimitry Andric } else if (!LHS->isDivergent()) {
1782fe6060f1SDimitry Andric if (COffsetVal > 0) {
1783e8d8bef9SDimitry Andric SDLoc SL(N);
1784fe6060f1SDimitry Andric // saddr + large_offset -> saddr +
1785fe6060f1SDimitry Andric // (voffset = large_offset & ~MaxOffset) +
1786e8d8bef9SDimitry Andric // (large_offset & MaxOffset);
1787e8d8bef9SDimitry Andric int64_t SplitImmOffset, RemainderOffset;
1788fe6060f1SDimitry Andric std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1789fe6060f1SDimitry Andric COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1790e8d8bef9SDimitry Andric
1791e8d8bef9SDimitry Andric if (isUInt<32>(RemainderOffset)) {
1792e8d8bef9SDimitry Andric SDNode *VMov = CurDAG->getMachineNode(
1793e8d8bef9SDimitry Andric AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1794e8d8bef9SDimitry Andric CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1795e8d8bef9SDimitry Andric VOffset = SDValue(VMov, 0);
1796e8d8bef9SDimitry Andric SAddr = LHS;
17975f757f3fSDimitry Andric Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
1798e8d8bef9SDimitry Andric return true;
1799e8d8bef9SDimitry Andric }
1800e8d8bef9SDimitry Andric }
1801fe6060f1SDimitry Andric
1802fe6060f1SDimitry Andric // We are adding a 64 bit SGPR and a constant. If constant bus limit
1803fe6060f1SDimitry Andric // is 1 we would need to perform 1 or 2 extra moves for each half of
1804fe6060f1SDimitry Andric // the constant and it is better to do a scalar add and then issue a
1805fe6060f1SDimitry Andric // single VALU instruction to materialize zero. Otherwise it is less
1806fe6060f1SDimitry Andric // instructions to perform VALU adds with immediates or inline literals.
1807fe6060f1SDimitry Andric unsigned NumLiterals =
1808fe6060f1SDimitry Andric !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1809fe6060f1SDimitry Andric !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1810fe6060f1SDimitry Andric if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1811fe6060f1SDimitry Andric return false;
1812fe6060f1SDimitry Andric }
1813e8d8bef9SDimitry Andric }
1814e8d8bef9SDimitry Andric
1815e8d8bef9SDimitry Andric // Match the variable offset.
1816fe6060f1SDimitry Andric if (Addr.getOpcode() == ISD::ADD) {
1817e8d8bef9SDimitry Andric LHS = Addr.getOperand(0);
1818e8d8bef9SDimitry Andric RHS = Addr.getOperand(1);
1819e8d8bef9SDimitry Andric
1820e8d8bef9SDimitry Andric if (!LHS->isDivergent()) {
1821e8d8bef9SDimitry Andric // add (i64 sgpr), (zero_extend (i32 vgpr))
1822e8d8bef9SDimitry Andric if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1823e8d8bef9SDimitry Andric SAddr = LHS;
1824e8d8bef9SDimitry Andric VOffset = ZextRHS;
1825e8d8bef9SDimitry Andric }
1826e8d8bef9SDimitry Andric }
1827e8d8bef9SDimitry Andric
1828e8d8bef9SDimitry Andric if (!SAddr && !RHS->isDivergent()) {
1829e8d8bef9SDimitry Andric // add (zero_extend (i32 vgpr)), (i64 sgpr)
1830e8d8bef9SDimitry Andric if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1831e8d8bef9SDimitry Andric SAddr = RHS;
1832e8d8bef9SDimitry Andric VOffset = ZextLHS;
1833e8d8bef9SDimitry Andric }
1834e8d8bef9SDimitry Andric }
1835e8d8bef9SDimitry Andric
1836fe6060f1SDimitry Andric if (SAddr) {
18375f757f3fSDimitry Andric Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1838fe6060f1SDimitry Andric return true;
1839fe6060f1SDimitry Andric }
1840fe6060f1SDimitry Andric }
1841fe6060f1SDimitry Andric
1842fe6060f1SDimitry Andric if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1843fe6060f1SDimitry Andric isa<ConstantSDNode>(Addr))
1844e8d8bef9SDimitry Andric return false;
1845e8d8bef9SDimitry Andric
1846fe6060f1SDimitry Andric // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1847fe6060f1SDimitry Andric // moves required to copy a 64-bit SGPR to VGPR.
1848fe6060f1SDimitry Andric SAddr = Addr;
1849fe6060f1SDimitry Andric SDNode *VMov =
1850fe6060f1SDimitry Andric CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1851fe6060f1SDimitry Andric CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1852fe6060f1SDimitry Andric VOffset = SDValue(VMov, 0);
18535f757f3fSDimitry Andric Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
1854e8d8bef9SDimitry Andric return true;
1855e8d8bef9SDimitry Andric }
1856e8d8bef9SDimitry Andric
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)1857fe6060f1SDimitry Andric static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1858e8d8bef9SDimitry Andric if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1859e8d8bef9SDimitry Andric SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1860e8d8bef9SDimitry Andric } else if (SAddr.getOpcode() == ISD::ADD &&
1861e8d8bef9SDimitry Andric isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1862e8d8bef9SDimitry Andric // Materialize this into a scalar move for scalar address to avoid
1863e8d8bef9SDimitry Andric // readfirstlane.
1864e8d8bef9SDimitry Andric auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1865e8d8bef9SDimitry Andric SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1866e8d8bef9SDimitry Andric FI->getValueType(0));
1867fe6060f1SDimitry Andric SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1868e8d8bef9SDimitry Andric MVT::i32, TFI, SAddr.getOperand(1)),
1869e8d8bef9SDimitry Andric 0);
1870e8d8bef9SDimitry Andric }
1871e8d8bef9SDimitry Andric
1872fe6060f1SDimitry Andric return SAddr;
1873e8d8bef9SDimitry Andric }
1874e8d8bef9SDimitry Andric
1875fe6060f1SDimitry Andric // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const1876fe6060f1SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1877fe6060f1SDimitry Andric SDValue &SAddr,
1878fe6060f1SDimitry Andric SDValue &Offset) const {
1879fe6060f1SDimitry Andric if (Addr->isDivergent())
1880fe6060f1SDimitry Andric return false;
1881fe6060f1SDimitry Andric
1882fe6060f1SDimitry Andric SDLoc DL(Addr);
1883fe6060f1SDimitry Andric
1884fe6060f1SDimitry Andric int64_t COffsetVal = 0;
1885fe6060f1SDimitry Andric
18865f757f3fSDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
1887fe6060f1SDimitry Andric COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1888fe6060f1SDimitry Andric SAddr = Addr.getOperand(0);
1889fe6060f1SDimitry Andric } else {
1890fe6060f1SDimitry Andric SAddr = Addr;
1891fe6060f1SDimitry Andric }
1892fe6060f1SDimitry Andric
1893fe6060f1SDimitry Andric SAddr = SelectSAddrFI(CurDAG, SAddr);
1894fe6060f1SDimitry Andric
1895fe6060f1SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
1896fe6060f1SDimitry Andric
1897fe6060f1SDimitry Andric if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1898fe6060f1SDimitry Andric SIInstrFlags::FlatScratch)) {
1899fe6060f1SDimitry Andric int64_t SplitImmOffset, RemainderOffset;
1900fe6060f1SDimitry Andric std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1901fe6060f1SDimitry Andric COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
1902fe6060f1SDimitry Andric
1903fe6060f1SDimitry Andric COffsetVal = SplitImmOffset;
1904fe6060f1SDimitry Andric
1905fe6060f1SDimitry Andric SDValue AddOffset =
1906fe6060f1SDimitry Andric SAddr.getOpcode() == ISD::TargetFrameIndex
1907fe6060f1SDimitry Andric ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1908fe6060f1SDimitry Andric : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
1909fe6060f1SDimitry Andric SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
1910fe6060f1SDimitry Andric SAddr, AddOffset),
1911fe6060f1SDimitry Andric 0);
1912fe6060f1SDimitry Andric }
1913fe6060f1SDimitry Andric
1914*6e516c87SDimitry Andric Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
1915e8d8bef9SDimitry Andric
1916e8d8bef9SDimitry Andric return true;
19170b57cec5SDimitry Andric }
19180b57cec5SDimitry Andric
191981ad6265SDimitry Andric // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const192081ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
192181ad6265SDimitry Andric SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
192281ad6265SDimitry Andric if (!Subtarget->hasFlatScratchSVSSwizzleBug())
192381ad6265SDimitry Andric return false;
192481ad6265SDimitry Andric
192581ad6265SDimitry Andric // The bug affects the swizzling of SVS accesses if there is any carry out
192681ad6265SDimitry Andric // from the two low order bits (i.e. from bit 1 into bit 2) when adding
192781ad6265SDimitry Andric // voffset to (soffset + inst_offset).
192881ad6265SDimitry Andric KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
192981ad6265SDimitry Andric KnownBits SKnown = KnownBits::computeForAddSub(
19300fca6ea1SDimitry Andric /*Add=*/true, /*NSW=*/false, /*NUW=*/false,
19310fca6ea1SDimitry Andric CurDAG->computeKnownBits(SAddr),
193281ad6265SDimitry Andric KnownBits::makeConstant(APInt(32, ImmOffset)));
193381ad6265SDimitry Andric uint64_t VMax = VKnown.getMaxValue().getZExtValue();
193481ad6265SDimitry Andric uint64_t SMax = SKnown.getMaxValue().getZExtValue();
193581ad6265SDimitry Andric return (VMax & 3) + (SMax & 3) >= 4;
193681ad6265SDimitry Andric }
193781ad6265SDimitry Andric
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const193881ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
193981ad6265SDimitry Andric SDValue &VAddr, SDValue &SAddr,
194081ad6265SDimitry Andric SDValue &Offset) const {
194181ad6265SDimitry Andric int64_t ImmOffset = 0;
194281ad6265SDimitry Andric
194381ad6265SDimitry Andric SDValue LHS, RHS;
19445f757f3fSDimitry Andric SDValue OrigAddr = Addr;
194581ad6265SDimitry Andric if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
194681ad6265SDimitry Andric int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
194781ad6265SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
194881ad6265SDimitry Andric
194981ad6265SDimitry Andric if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
195081ad6265SDimitry Andric Addr = LHS;
195181ad6265SDimitry Andric ImmOffset = COffsetVal;
195281ad6265SDimitry Andric } else if (!LHS->isDivergent() && COffsetVal > 0) {
195381ad6265SDimitry Andric SDLoc SL(N);
195481ad6265SDimitry Andric // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
195581ad6265SDimitry Andric // (large_offset & MaxOffset);
195681ad6265SDimitry Andric int64_t SplitImmOffset, RemainderOffset;
195781ad6265SDimitry Andric std::tie(SplitImmOffset, RemainderOffset)
195881ad6265SDimitry Andric = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
195981ad6265SDimitry Andric
196081ad6265SDimitry Andric if (isUInt<32>(RemainderOffset)) {
196181ad6265SDimitry Andric SDNode *VMov = CurDAG->getMachineNode(
196281ad6265SDimitry Andric AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
196381ad6265SDimitry Andric CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
196481ad6265SDimitry Andric VAddr = SDValue(VMov, 0);
196581ad6265SDimitry Andric SAddr = LHS;
19665f757f3fSDimitry Andric if (!isFlatScratchBaseLegal(Addr))
196706c3fb27SDimitry Andric return false;
196881ad6265SDimitry Andric if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
196981ad6265SDimitry Andric return false;
1970*6e516c87SDimitry Andric Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
197181ad6265SDimitry Andric return true;
197281ad6265SDimitry Andric }
197381ad6265SDimitry Andric }
197481ad6265SDimitry Andric }
197581ad6265SDimitry Andric
197681ad6265SDimitry Andric if (Addr.getOpcode() != ISD::ADD)
197781ad6265SDimitry Andric return false;
197881ad6265SDimitry Andric
197981ad6265SDimitry Andric LHS = Addr.getOperand(0);
198081ad6265SDimitry Andric RHS = Addr.getOperand(1);
198181ad6265SDimitry Andric
198281ad6265SDimitry Andric if (!LHS->isDivergent() && RHS->isDivergent()) {
198381ad6265SDimitry Andric SAddr = LHS;
198481ad6265SDimitry Andric VAddr = RHS;
198581ad6265SDimitry Andric } else if (!RHS->isDivergent() && LHS->isDivergent()) {
198681ad6265SDimitry Andric SAddr = RHS;
198781ad6265SDimitry Andric VAddr = LHS;
198881ad6265SDimitry Andric } else {
198981ad6265SDimitry Andric return false;
199081ad6265SDimitry Andric }
199181ad6265SDimitry Andric
19925f757f3fSDimitry Andric if (OrigAddr != Addr) {
19935f757f3fSDimitry Andric if (!isFlatScratchBaseLegalSVImm(OrigAddr))
199406c3fb27SDimitry Andric return false;
19955f757f3fSDimitry Andric } else {
19965f757f3fSDimitry Andric if (!isFlatScratchBaseLegalSV(OrigAddr))
19975f757f3fSDimitry Andric return false;
19985f757f3fSDimitry Andric }
199906c3fb27SDimitry Andric
200081ad6265SDimitry Andric if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
200181ad6265SDimitry Andric return false;
200281ad6265SDimitry Andric SAddr = SelectSAddrFI(CurDAG, SAddr);
2003*6e516c87SDimitry Andric Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
200481ad6265SDimitry Andric return true;
200581ad6265SDimitry Andric }
200681ad6265SDimitry Andric
20070fca6ea1SDimitry Andric // For unbuffered smem loads, it is illegal for the Immediate Offset to be
20080fca6ea1SDimitry Andric // negative if the resulting (Offset + (M0 or SOffset or zero) is negative.
20090fca6ea1SDimitry Andric // Handle the case where the Immediate Offset + SOffset is negative.
isSOffsetLegalWithImmOffset(SDValue * SOffset,bool Imm32Only,bool IsBuffer,int64_t ImmOffset) const20100fca6ea1SDimitry Andric bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
20110fca6ea1SDimitry Andric bool Imm32Only,
20120fca6ea1SDimitry Andric bool IsBuffer,
20130fca6ea1SDimitry Andric int64_t ImmOffset) const {
20140fca6ea1SDimitry Andric if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
20150fca6ea1SDimitry Andric AMDGPU::hasSMRDSignedImmOffset(*Subtarget)) {
20160fca6ea1SDimitry Andric KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
20170fca6ea1SDimitry Andric if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0)
20180fca6ea1SDimitry Andric return false;
20190fca6ea1SDimitry Andric }
20200fca6ea1SDimitry Andric
20210fca6ea1SDimitry Andric return true;
20220fca6ea1SDimitry Andric }
20230fca6ea1SDimitry Andric
2024bdd1243dSDimitry Andric // Match an immediate (if Offset is not null) or an SGPR (if SOffset is
2025bdd1243dSDimitry Andric // not null) offset. If Imm32Only is true, match only 32-bit immediate
2026bdd1243dSDimitry Andric // offsets available on CI.
SelectSMRDOffset(SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer,bool HasSOffset,int64_t ImmOffset) const2027bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
2028fcaf7f86SDimitry Andric SDValue *SOffset, SDValue *Offset,
20290fca6ea1SDimitry Andric bool Imm32Only, bool IsBuffer,
20300fca6ea1SDimitry Andric bool HasSOffset,
20310fca6ea1SDimitry Andric int64_t ImmOffset) const {
2032bdd1243dSDimitry Andric assert((!SOffset || !Offset) &&
2033bdd1243dSDimitry Andric "Cannot match both soffset and offset at the same time!");
2034bdd1243dSDimitry Andric
20350b57cec5SDimitry Andric ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
20365ffd83dbSDimitry Andric if (!C) {
2037fcaf7f86SDimitry Andric if (!SOffset)
2038753f127fSDimitry Andric return false;
20390fca6ea1SDimitry Andric
20405ffd83dbSDimitry Andric if (ByteOffsetNode.getValueType().isScalarInteger() &&
20415ffd83dbSDimitry Andric ByteOffsetNode.getValueType().getSizeInBits() == 32) {
2042fcaf7f86SDimitry Andric *SOffset = ByteOffsetNode;
20430fca6ea1SDimitry Andric return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
20440fca6ea1SDimitry Andric ImmOffset);
20455ffd83dbSDimitry Andric }
20465ffd83dbSDimitry Andric if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
20475ffd83dbSDimitry Andric if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
2048fcaf7f86SDimitry Andric *SOffset = ByteOffsetNode.getOperand(0);
20490fca6ea1SDimitry Andric return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
20500fca6ea1SDimitry Andric ImmOffset);
20515ffd83dbSDimitry Andric }
20525ffd83dbSDimitry Andric }
20530b57cec5SDimitry Andric return false;
20545ffd83dbSDimitry Andric }
20550b57cec5SDimitry Andric
20560b57cec5SDimitry Andric SDLoc SL(ByteOffsetNode);
2057bdd1243dSDimitry Andric
2058bdd1243dSDimitry Andric // GFX9 and GFX10 have signed byte immediate offsets. The immediate
2059bdd1243dSDimitry Andric // offset for S_BUFFER instructions is unsigned.
2060bdd1243dSDimitry Andric int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
20610fca6ea1SDimitry Andric std::optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(
20620fca6ea1SDimitry Andric *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2063fcaf7f86SDimitry Andric if (EncodedOffset && Offset && !Imm32Only) {
2064fcaf7f86SDimitry Andric *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
20650b57cec5SDimitry Andric return true;
20660b57cec5SDimitry Andric }
20670b57cec5SDimitry Andric
20685ffd83dbSDimitry Andric // SGPR and literal offsets are unsigned.
20695ffd83dbSDimitry Andric if (ByteOffset < 0)
20700b57cec5SDimitry Andric return false;
20710b57cec5SDimitry Andric
20725ffd83dbSDimitry Andric EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
2073fcaf7f86SDimitry Andric if (EncodedOffset && Offset && Imm32Only) {
2074fcaf7f86SDimitry Andric *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
20755ffd83dbSDimitry Andric return true;
20760b57cec5SDimitry Andric }
20775ffd83dbSDimitry Andric
20785ffd83dbSDimitry Andric if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
20795ffd83dbSDimitry Andric return false;
20805ffd83dbSDimitry Andric
2081fcaf7f86SDimitry Andric if (SOffset) {
20825ffd83dbSDimitry Andric SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2083fcaf7f86SDimitry Andric *SOffset = SDValue(
20845ffd83dbSDimitry Andric CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
20850b57cec5SDimitry Andric return true;
20860b57cec5SDimitry Andric }
20870b57cec5SDimitry Andric
2088753f127fSDimitry Andric return false;
2089753f127fSDimitry Andric }
2090753f127fSDimitry Andric
Expand32BitAddress(SDValue Addr) const20910b57cec5SDimitry Andric SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
20920b57cec5SDimitry Andric if (Addr.getValueType() != MVT::i32)
20930b57cec5SDimitry Andric return Addr;
20940b57cec5SDimitry Andric
20950b57cec5SDimitry Andric // Zero-extend a 32-bit address.
20960b57cec5SDimitry Andric SDLoc SL(Addr);
20970b57cec5SDimitry Andric
20980b57cec5SDimitry Andric const MachineFunction &MF = CurDAG->getMachineFunction();
20990b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
21000b57cec5SDimitry Andric unsigned AddrHiVal = Info->get32BitAddressHighBits();
21010b57cec5SDimitry Andric SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
21020b57cec5SDimitry Andric
21030b57cec5SDimitry Andric const SDValue Ops[] = {
21040b57cec5SDimitry Andric CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
21050b57cec5SDimitry Andric Addr,
21060b57cec5SDimitry Andric CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
21070b57cec5SDimitry Andric SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
21080b57cec5SDimitry Andric 0),
21090b57cec5SDimitry Andric CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
21100b57cec5SDimitry Andric };
21110b57cec5SDimitry Andric
21120b57cec5SDimitry Andric return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
21130b57cec5SDimitry Andric Ops), 0);
21140b57cec5SDimitry Andric }
21150b57cec5SDimitry Andric
2116bdd1243dSDimitry Andric // Match a base and an immediate (if Offset is not null) or an SGPR (if
2117bdd1243dSDimitry Andric // SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
2118bdd1243dSDimitry Andric // true, match only 32-bit immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer,bool HasSOffset,int64_t ImmOffset) const2119fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
2120fcaf7f86SDimitry Andric SDValue *SOffset, SDValue *Offset,
21210fca6ea1SDimitry Andric bool Imm32Only, bool IsBuffer,
21220fca6ea1SDimitry Andric bool HasSOffset,
21230fca6ea1SDimitry Andric int64_t ImmOffset) const {
2124fcaf7f86SDimitry Andric if (SOffset && Offset) {
2125bdd1243dSDimitry Andric assert(!Imm32Only && !IsBuffer);
2126fcaf7f86SDimitry Andric SDValue B;
21270fca6ea1SDimitry Andric
21280fca6ea1SDimitry Andric if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true))
21290fca6ea1SDimitry Andric return false;
21300fca6ea1SDimitry Andric
21310fca6ea1SDimitry Andric int64_t ImmOff = 0;
21320fca6ea1SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(*Offset))
21330fca6ea1SDimitry Andric ImmOff = C->getSExtValue();
21340fca6ea1SDimitry Andric
21350fca6ea1SDimitry Andric return SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true,
21360fca6ea1SDimitry Andric ImmOff);
2137fcaf7f86SDimitry Andric }
2138fcaf7f86SDimitry Andric
21390b57cec5SDimitry Andric // A 32-bit (address + offset) should not cause unsigned 32-bit integer
21400b57cec5SDimitry Andric // wraparound, because s_load instructions perform the addition in 64 bits.
2141bdd1243dSDimitry Andric if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
2142bdd1243dSDimitry Andric !Addr->getFlags().hasNoUnsignedWrap())
2143bdd1243dSDimitry Andric return false;
2144bdd1243dSDimitry Andric
21455ffd83dbSDimitry Andric SDValue N0, N1;
21465ffd83dbSDimitry Andric // Extract the base and offset if possible.
2147bdd1243dSDimitry Andric if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
21485ffd83dbSDimitry Andric N0 = Addr.getOperand(0);
21495ffd83dbSDimitry Andric N1 = Addr.getOperand(1);
21505ffd83dbSDimitry Andric } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
21515ffd83dbSDimitry Andric assert(N0 && N1 && isa<ConstantSDNode>(N1));
21525ffd83dbSDimitry Andric }
2153bdd1243dSDimitry Andric if (!N0 || !N1)
2154bdd1243dSDimitry Andric return false;
21550fca6ea1SDimitry Andric
21560fca6ea1SDimitry Andric if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
21570fca6ea1SDimitry Andric ImmOffset)) {
2158fcaf7f86SDimitry Andric SBase = N0;
2159fcaf7f86SDimitry Andric return true;
2160fcaf7f86SDimitry Andric }
21610fca6ea1SDimitry Andric if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
21620fca6ea1SDimitry Andric ImmOffset)) {
2163fcaf7f86SDimitry Andric SBase = N1;
21640b57cec5SDimitry Andric return true;
21650b57cec5SDimitry Andric }
2166753f127fSDimitry Andric return false;
2167fcaf7f86SDimitry Andric }
2168fcaf7f86SDimitry Andric
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2169fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2170fcaf7f86SDimitry Andric SDValue *SOffset, SDValue *Offset,
2171fcaf7f86SDimitry Andric bool Imm32Only) const {
2172bdd1243dSDimitry Andric if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2173fcaf7f86SDimitry Andric SBase = Expand32BitAddress(SBase);
21740b57cec5SDimitry Andric return true;
21750b57cec5SDimitry Andric }
21760b57cec5SDimitry Andric
2177bdd1243dSDimitry Andric if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
2178bdd1243dSDimitry Andric SBase = Expand32BitAddress(Addr);
2179bdd1243dSDimitry Andric *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2180bdd1243dSDimitry Andric return true;
2181bdd1243dSDimitry Andric }
2182bdd1243dSDimitry Andric
2183bdd1243dSDimitry Andric return false;
2184bdd1243dSDimitry Andric }
2185bdd1243dSDimitry Andric
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const21860b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
21870b57cec5SDimitry Andric SDValue &Offset) const {
2188fcaf7f86SDimitry Andric return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
21890b57cec5SDimitry Andric }
21900b57cec5SDimitry Andric
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const21910b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
21920b57cec5SDimitry Andric SDValue &Offset) const {
21935ffd83dbSDimitry Andric assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2194fcaf7f86SDimitry Andric return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2195fcaf7f86SDimitry Andric /* Imm32Only */ true);
21960b57cec5SDimitry Andric }
21970b57cec5SDimitry Andric
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const21980b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2199fcaf7f86SDimitry Andric SDValue &SOffset) const {
2200fcaf7f86SDimitry Andric return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2201fcaf7f86SDimitry Andric }
2202fcaf7f86SDimitry Andric
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2203fcaf7f86SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2204fcaf7f86SDimitry Andric SDValue &SOffset,
22050b57cec5SDimitry Andric SDValue &Offset) const {
2206fcaf7f86SDimitry Andric return SelectSMRD(Addr, SBase, &SOffset, &Offset);
22070b57cec5SDimitry Andric }
22080b57cec5SDimitry Andric
SelectSMRDBufferImm(SDValue N,SDValue & Offset) const2209bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2210bdd1243dSDimitry Andric return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2211bdd1243dSDimitry Andric /* Imm32Only */ false, /* IsBuffer */ true);
22125ffd83dbSDimitry Andric }
22135ffd83dbSDimitry Andric
SelectSMRDBufferImm32(SDValue N,SDValue & Offset) const2214bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
22150b57cec5SDimitry Andric SDValue &Offset) const {
22165ffd83dbSDimitry Andric assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2217bdd1243dSDimitry Andric return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2218bdd1243dSDimitry Andric /* Imm32Only */ true, /* IsBuffer */ true);
22195ffd83dbSDimitry Andric }
22200b57cec5SDimitry Andric
SelectSMRDBufferSgprImm(SDValue N,SDValue & SOffset,SDValue & Offset) const2221bdd1243dSDimitry Andric bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2222bdd1243dSDimitry Andric SDValue &Offset) const {
2223bdd1243dSDimitry Andric // Match the (soffset + offset) pair as a 32-bit register base and
2224bdd1243dSDimitry Andric // an immediate offset.
2225bdd1243dSDimitry Andric return N.getValueType() == MVT::i32 &&
2226bdd1243dSDimitry Andric SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2227bdd1243dSDimitry Andric &Offset, /* Imm32Only */ false,
2228bdd1243dSDimitry Andric /* IsBuffer */ true);
22290b57cec5SDimitry Andric }
22300b57cec5SDimitry Andric
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const22310b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
22320b57cec5SDimitry Andric SDValue &Base,
22330b57cec5SDimitry Andric SDValue &Offset) const {
22340b57cec5SDimitry Andric SDLoc DL(Index);
22350b57cec5SDimitry Andric
22360b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Index)) {
22370b57cec5SDimitry Andric SDValue N0 = Index.getOperand(0);
22380b57cec5SDimitry Andric SDValue N1 = Index.getOperand(1);
22390b57cec5SDimitry Andric ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
22400b57cec5SDimitry Andric
22410b57cec5SDimitry Andric // (add n0, c0)
22420b57cec5SDimitry Andric // Don't peel off the offset (c0) if doing so could possibly lead
22430b57cec5SDimitry Andric // the base (n0) to be negative.
22445ffd83dbSDimitry Andric // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
22455ffd83dbSDimitry Andric if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
22465ffd83dbSDimitry Andric (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
22470b57cec5SDimitry Andric Base = N0;
22480b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
22490b57cec5SDimitry Andric return true;
22500b57cec5SDimitry Andric }
22510b57cec5SDimitry Andric }
22520b57cec5SDimitry Andric
22530b57cec5SDimitry Andric if (isa<ConstantSDNode>(Index))
22540b57cec5SDimitry Andric return false;
22550b57cec5SDimitry Andric
22560b57cec5SDimitry Andric Base = Index;
22570b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
22580b57cec5SDimitry Andric return true;
22590b57cec5SDimitry Andric }
22600b57cec5SDimitry Andric
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)2261349cc55cSDimitry Andric SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
22620b57cec5SDimitry Andric SDValue Val, uint32_t Offset,
22630b57cec5SDimitry Andric uint32_t Width) {
2264349cc55cSDimitry Andric if (Val->isDivergent()) {
2265349cc55cSDimitry Andric unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2266349cc55cSDimitry Andric SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
2267349cc55cSDimitry Andric SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
2268349cc55cSDimitry Andric
2269349cc55cSDimitry Andric return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2270349cc55cSDimitry Andric }
2271349cc55cSDimitry Andric unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
22720b57cec5SDimitry Andric // Transformation function, pack the offset and width of a BFE into
22730b57cec5SDimitry Andric // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
22740b57cec5SDimitry Andric // source, bits [5:0] contain the offset and bits [22:16] the width.
22750b57cec5SDimitry Andric uint32_t PackedVal = Offset | (Width << 16);
22760b57cec5SDimitry Andric SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
22770b57cec5SDimitry Andric
22780b57cec5SDimitry Andric return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
22790b57cec5SDimitry Andric }
22800b57cec5SDimitry Andric
SelectS_BFEFromShifts(SDNode * N)22810b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
22820b57cec5SDimitry Andric // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
22830b57cec5SDimitry Andric // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
22840b57cec5SDimitry Andric // Predicate: 0 < b <= c < 32
22850b57cec5SDimitry Andric
22860b57cec5SDimitry Andric const SDValue &Shl = N->getOperand(0);
22870b57cec5SDimitry Andric ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
22880b57cec5SDimitry Andric ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
22890b57cec5SDimitry Andric
22900b57cec5SDimitry Andric if (B && C) {
22910b57cec5SDimitry Andric uint32_t BVal = B->getZExtValue();
22920b57cec5SDimitry Andric uint32_t CVal = C->getZExtValue();
22930b57cec5SDimitry Andric
22940b57cec5SDimitry Andric if (0 < BVal && BVal <= CVal && CVal < 32) {
22950b57cec5SDimitry Andric bool Signed = N->getOpcode() == ISD::SRA;
2296349cc55cSDimitry Andric ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
22970b57cec5SDimitry Andric 32 - CVal));
22980b57cec5SDimitry Andric return;
22990b57cec5SDimitry Andric }
23000b57cec5SDimitry Andric }
23010b57cec5SDimitry Andric SelectCode(N);
23020b57cec5SDimitry Andric }
23030b57cec5SDimitry Andric
SelectS_BFE(SDNode * N)23040b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
23050b57cec5SDimitry Andric switch (N->getOpcode()) {
23060b57cec5SDimitry Andric case ISD::AND:
23070b57cec5SDimitry Andric if (N->getOperand(0).getOpcode() == ISD::SRL) {
23080b57cec5SDimitry Andric // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
23090b57cec5SDimitry Andric // Predicate: isMask(mask)
23100b57cec5SDimitry Andric const SDValue &Srl = N->getOperand(0);
23110b57cec5SDimitry Andric ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
23120b57cec5SDimitry Andric ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
23130b57cec5SDimitry Andric
23140b57cec5SDimitry Andric if (Shift && Mask) {
23150b57cec5SDimitry Andric uint32_t ShiftVal = Shift->getZExtValue();
23160b57cec5SDimitry Andric uint32_t MaskVal = Mask->getZExtValue();
23170b57cec5SDimitry Andric
23180b57cec5SDimitry Andric if (isMask_32(MaskVal)) {
2319bdd1243dSDimitry Andric uint32_t WidthVal = llvm::popcount(MaskVal);
2320349cc55cSDimitry Andric ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2321349cc55cSDimitry Andric WidthVal));
23220b57cec5SDimitry Andric return;
23230b57cec5SDimitry Andric }
23240b57cec5SDimitry Andric }
23250b57cec5SDimitry Andric }
23260b57cec5SDimitry Andric break;
23270b57cec5SDimitry Andric case ISD::SRL:
23280b57cec5SDimitry Andric if (N->getOperand(0).getOpcode() == ISD::AND) {
23290b57cec5SDimitry Andric // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
23300b57cec5SDimitry Andric // Predicate: isMask(mask >> b)
23310b57cec5SDimitry Andric const SDValue &And = N->getOperand(0);
23320b57cec5SDimitry Andric ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
23330b57cec5SDimitry Andric ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
23340b57cec5SDimitry Andric
23350b57cec5SDimitry Andric if (Shift && Mask) {
23360b57cec5SDimitry Andric uint32_t ShiftVal = Shift->getZExtValue();
23370b57cec5SDimitry Andric uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
23380b57cec5SDimitry Andric
23390b57cec5SDimitry Andric if (isMask_32(MaskVal)) {
2340bdd1243dSDimitry Andric uint32_t WidthVal = llvm::popcount(MaskVal);
2341349cc55cSDimitry Andric ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2342349cc55cSDimitry Andric WidthVal));
23430b57cec5SDimitry Andric return;
23440b57cec5SDimitry Andric }
23450b57cec5SDimitry Andric }
23460b57cec5SDimitry Andric } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
23470b57cec5SDimitry Andric SelectS_BFEFromShifts(N);
23480b57cec5SDimitry Andric return;
23490b57cec5SDimitry Andric }
23500b57cec5SDimitry Andric break;
23510b57cec5SDimitry Andric case ISD::SRA:
23520b57cec5SDimitry Andric if (N->getOperand(0).getOpcode() == ISD::SHL) {
23530b57cec5SDimitry Andric SelectS_BFEFromShifts(N);
23540b57cec5SDimitry Andric return;
23550b57cec5SDimitry Andric }
23560b57cec5SDimitry Andric break;
23570b57cec5SDimitry Andric
23580b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: {
23590b57cec5SDimitry Andric // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
23600b57cec5SDimitry Andric SDValue Src = N->getOperand(0);
23610b57cec5SDimitry Andric if (Src.getOpcode() != ISD::SRL)
23620b57cec5SDimitry Andric break;
23630b57cec5SDimitry Andric
23640b57cec5SDimitry Andric const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
23650b57cec5SDimitry Andric if (!Amt)
23660b57cec5SDimitry Andric break;
23670b57cec5SDimitry Andric
23680b57cec5SDimitry Andric unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2369349cc55cSDimitry Andric ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
23700b57cec5SDimitry Andric Amt->getZExtValue(), Width));
23710b57cec5SDimitry Andric return;
23720b57cec5SDimitry Andric }
23730b57cec5SDimitry Andric }
23740b57cec5SDimitry Andric
23750b57cec5SDimitry Andric SelectCode(N);
23760b57cec5SDimitry Andric }
23770b57cec5SDimitry Andric
isCBranchSCC(const SDNode * N) const23780b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
23790b57cec5SDimitry Andric assert(N->getOpcode() == ISD::BRCOND);
23800b57cec5SDimitry Andric if (!N->hasOneUse())
23810b57cec5SDimitry Andric return false;
23820b57cec5SDimitry Andric
23830b57cec5SDimitry Andric SDValue Cond = N->getOperand(1);
23840b57cec5SDimitry Andric if (Cond.getOpcode() == ISD::CopyToReg)
23850b57cec5SDimitry Andric Cond = Cond.getOperand(2);
23860b57cec5SDimitry Andric
23870b57cec5SDimitry Andric if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
23880b57cec5SDimitry Andric return false;
23890b57cec5SDimitry Andric
23900b57cec5SDimitry Andric MVT VT = Cond.getOperand(0).getSimpleValueType();
23910b57cec5SDimitry Andric if (VT == MVT::i32)
23920b57cec5SDimitry Andric return true;
23930b57cec5SDimitry Andric
23940b57cec5SDimitry Andric if (VT == MVT::i64) {
23950b57cec5SDimitry Andric auto ST = static_cast<const GCNSubtarget *>(Subtarget);
23960b57cec5SDimitry Andric
23970b57cec5SDimitry Andric ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
23980b57cec5SDimitry Andric return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
23990b57cec5SDimitry Andric }
24000b57cec5SDimitry Andric
24010b57cec5SDimitry Andric return false;
24020b57cec5SDimitry Andric }
24030b57cec5SDimitry Andric
combineBallotPattern(SDValue VCMP,bool & Negate)24045f757f3fSDimitry Andric static SDValue combineBallotPattern(SDValue VCMP, bool &Negate) {
24055f757f3fSDimitry Andric assert(VCMP->getOpcode() == AMDGPUISD::SETCC);
24065f757f3fSDimitry Andric // Special case for amdgcn.ballot:
24075f757f3fSDimitry Andric // %Cond = i1 (and/or combination of i1 ISD::SETCCs)
24085f757f3fSDimitry Andric // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq
24095f757f3fSDimitry Andric // =>
24105f757f3fSDimitry Andric // Use i1 %Cond value instead of i(WaveSize) %VCMP.
24115f757f3fSDimitry Andric // This is possible because divergent ISD::SETCC is selected as V_CMP and
24125f757f3fSDimitry Andric // Cond becomes a i(WaveSize) full mask value.
24135f757f3fSDimitry Andric // Note that ballot doesn't use SETEQ condition but its easy to support it
24145f757f3fSDimitry Andric // here for completeness, so in this case Negate is set true on return.
24155f757f3fSDimitry Andric auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
24165f757f3fSDimitry Andric if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) &&
24175f757f3fSDimitry Andric isNullConstant(VCMP.getOperand(1))) {
24185f757f3fSDimitry Andric
24195f757f3fSDimitry Andric auto Cond = VCMP.getOperand(0);
24205f757f3fSDimitry Andric if (ISD::isExtOpcode(Cond->getOpcode())) // Skip extension.
24215f757f3fSDimitry Andric Cond = Cond.getOperand(0);
24225f757f3fSDimitry Andric
24235f757f3fSDimitry Andric if (isBoolSGPR(Cond)) {
24245f757f3fSDimitry Andric Negate = VCMP_CC == ISD::SETEQ;
24255f757f3fSDimitry Andric return Cond;
24265f757f3fSDimitry Andric }
24275f757f3fSDimitry Andric }
24285f757f3fSDimitry Andric return SDValue();
24295f757f3fSDimitry Andric }
24305f757f3fSDimitry Andric
SelectBRCOND(SDNode * N)24310b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
24320b57cec5SDimitry Andric SDValue Cond = N->getOperand(1);
24330b57cec5SDimitry Andric
24340b57cec5SDimitry Andric if (Cond.isUndef()) {
24350b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
24360b57cec5SDimitry Andric N->getOperand(2), N->getOperand(0));
24370b57cec5SDimitry Andric return;
24380b57cec5SDimitry Andric }
24390b57cec5SDimitry Andric
24400b57cec5SDimitry Andric const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
24410b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST->getRegisterInfo();
24420b57cec5SDimitry Andric
24430b57cec5SDimitry Andric bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
24445f757f3fSDimitry Andric bool AndExec = !UseSCCBr;
24455f757f3fSDimitry Andric bool Negate = false;
24465f757f3fSDimitry Andric
24475f757f3fSDimitry Andric if (Cond.getOpcode() == ISD::SETCC &&
24485f757f3fSDimitry Andric Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
24495f757f3fSDimitry Andric SDValue VCMP = Cond->getOperand(0);
24505f757f3fSDimitry Andric auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
24515f757f3fSDimitry Andric if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
24525f757f3fSDimitry Andric isNullConstant(Cond->getOperand(1)) &&
24530fca6ea1SDimitry Andric // We may encounter ballot.i64 in wave32 mode on -O0.
24545f757f3fSDimitry Andric VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
24555f757f3fSDimitry Andric // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
24565f757f3fSDimitry Andric // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
24575f757f3fSDimitry Andric // BRCOND i1 %C, %BB
24585f757f3fSDimitry Andric // =>
24595f757f3fSDimitry Andric // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
24605f757f3fSDimitry Andric // VCC = COPY i(WaveSize) %VCMP
24615f757f3fSDimitry Andric // S_CBRANCH_VCCNZ/VCCZ %BB
24625f757f3fSDimitry Andric Negate = CC == ISD::SETEQ;
24635f757f3fSDimitry Andric bool NegatedBallot = false;
24645f757f3fSDimitry Andric if (auto BallotCond = combineBallotPattern(VCMP, NegatedBallot)) {
24655f757f3fSDimitry Andric Cond = BallotCond;
24665f757f3fSDimitry Andric UseSCCBr = !BallotCond->isDivergent();
24675f757f3fSDimitry Andric Negate = Negate ^ NegatedBallot;
24685f757f3fSDimitry Andric } else {
24695f757f3fSDimitry Andric // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always
24705f757f3fSDimitry Andric // selected as V_CMP, but this may change for uniform condition.
24715f757f3fSDimitry Andric Cond = VCMP;
24725f757f3fSDimitry Andric UseSCCBr = false;
24735f757f3fSDimitry Andric }
24745f757f3fSDimitry Andric }
24755f757f3fSDimitry Andric // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of
24765f757f3fSDimitry Andric // V_CMPs resulted from ballot or ballot has uniform condition and SCC is
24775f757f3fSDimitry Andric // used.
24785f757f3fSDimitry Andric AndExec = false;
24795f757f3fSDimitry Andric }
24805f757f3fSDimitry Andric
24815f757f3fSDimitry Andric unsigned BrOp =
24825f757f3fSDimitry Andric UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
24835f757f3fSDimitry Andric : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
24845ffd83dbSDimitry Andric Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
24850b57cec5SDimitry Andric SDLoc SL(N);
24860b57cec5SDimitry Andric
24875f757f3fSDimitry Andric if (AndExec) {
24880b57cec5SDimitry Andric // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
24890b57cec5SDimitry Andric // analyzed what generates the vcc value, so we do not know whether vcc
24900b57cec5SDimitry Andric // bits for disabled lanes are 0. Thus we need to mask out bits for
24910b57cec5SDimitry Andric // disabled lanes.
24920b57cec5SDimitry Andric //
24930b57cec5SDimitry Andric // For the case that we select S_CBRANCH_SCC1 and it gets
24940b57cec5SDimitry Andric // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
24950b57cec5SDimitry Andric // SIInstrInfo::moveToVALU which inserts the S_AND).
24960b57cec5SDimitry Andric //
24970b57cec5SDimitry Andric // We could add an analysis of what generates the vcc value here and omit
24980b57cec5SDimitry Andric // the S_AND when is unnecessary. But it would be better to add a separate
24990b57cec5SDimitry Andric // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
25000b57cec5SDimitry Andric // catches both cases.
25010b57cec5SDimitry Andric Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
25020b57cec5SDimitry Andric : AMDGPU::S_AND_B64,
25030b57cec5SDimitry Andric SL, MVT::i1,
25040b57cec5SDimitry Andric CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
25050b57cec5SDimitry Andric : AMDGPU::EXEC,
25060b57cec5SDimitry Andric MVT::i1),
25070b57cec5SDimitry Andric Cond),
25080b57cec5SDimitry Andric 0);
25090b57cec5SDimitry Andric }
25100b57cec5SDimitry Andric
25110b57cec5SDimitry Andric SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
25120b57cec5SDimitry Andric CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
25130b57cec5SDimitry Andric N->getOperand(2), // Basic Block
25140b57cec5SDimitry Andric VCC.getValue(0));
25150b57cec5SDimitry Andric }
25160b57cec5SDimitry Andric
SelectFP_EXTEND(SDNode * N)25175f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
25185f757f3fSDimitry Andric if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
25195f757f3fSDimitry Andric !N->isDivergent()) {
25205f757f3fSDimitry Andric SDValue Src = N->getOperand(0);
25215f757f3fSDimitry Andric if (Src.getValueType() == MVT::f16) {
25225f757f3fSDimitry Andric if (isExtractHiElt(Src, Src)) {
25235f757f3fSDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
25245f757f3fSDimitry Andric {Src});
25255f757f3fSDimitry Andric return;
25265f757f3fSDimitry Andric }
25275f757f3fSDimitry Andric }
25285f757f3fSDimitry Andric }
25295f757f3fSDimitry Andric
25305f757f3fSDimitry Andric SelectCode(N);
25315f757f3fSDimitry Andric }
25325f757f3fSDimitry Andric
SelectDSAppendConsume(SDNode * N,unsigned IntrID)25330b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
25340b57cec5SDimitry Andric // The address is assumed to be uniform, so if it ends up in a VGPR, it will
25350b57cec5SDimitry Andric // be copied to an SGPR with readfirstlane.
25360b57cec5SDimitry Andric unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
25370b57cec5SDimitry Andric AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
25380b57cec5SDimitry Andric
25390b57cec5SDimitry Andric SDValue Chain = N->getOperand(0);
25400b57cec5SDimitry Andric SDValue Ptr = N->getOperand(2);
25410b57cec5SDimitry Andric MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
25420b57cec5SDimitry Andric MachineMemOperand *MMO = M->getMemOperand();
25430b57cec5SDimitry Andric bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
25440b57cec5SDimitry Andric
25450b57cec5SDimitry Andric SDValue Offset;
25460b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(Ptr)) {
25470b57cec5SDimitry Andric SDValue PtrBase = Ptr.getOperand(0);
25480b57cec5SDimitry Andric SDValue PtrOffset = Ptr.getOperand(1);
25490b57cec5SDimitry Andric
2550297eecfbSDimitry Andric const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
2551e8d8bef9SDimitry Andric if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
25520b57cec5SDimitry Andric N = glueCopyToM0(N, PtrBase);
25530b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
25540b57cec5SDimitry Andric }
25550b57cec5SDimitry Andric }
25560b57cec5SDimitry Andric
25570b57cec5SDimitry Andric if (!Offset) {
25580b57cec5SDimitry Andric N = glueCopyToM0(N, Ptr);
25590b57cec5SDimitry Andric Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
25600b57cec5SDimitry Andric }
25610b57cec5SDimitry Andric
25620b57cec5SDimitry Andric SDValue Ops[] = {
25630b57cec5SDimitry Andric Offset,
25640b57cec5SDimitry Andric CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
25650b57cec5SDimitry Andric Chain,
25660b57cec5SDimitry Andric N->getOperand(N->getNumOperands() - 1) // New glue
25670b57cec5SDimitry Andric };
25680b57cec5SDimitry Andric
25690b57cec5SDimitry Andric SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
25700b57cec5SDimitry Andric CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
25710b57cec5SDimitry Andric }
25720b57cec5SDimitry Andric
2573bdd1243dSDimitry Andric // We need to handle this here because tablegen doesn't support matching
2574bdd1243dSDimitry Andric // instructions with multiple outputs.
SelectDSBvhStackIntrinsic(SDNode * N)2575bdd1243dSDimitry Andric void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2576bdd1243dSDimitry Andric unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2577bdd1243dSDimitry Andric SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2578bdd1243dSDimitry Andric N->getOperand(5), N->getOperand(0)};
2579bdd1243dSDimitry Andric
2580bdd1243dSDimitry Andric MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2581bdd1243dSDimitry Andric MachineMemOperand *MMO = M->getMemOperand();
2582bdd1243dSDimitry Andric SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2583bdd1243dSDimitry Andric CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2584bdd1243dSDimitry Andric }
2585bdd1243dSDimitry Andric
gwsIntrinToOpcode(unsigned IntrID)25860b57cec5SDimitry Andric static unsigned gwsIntrinToOpcode(unsigned IntrID) {
25870b57cec5SDimitry Andric switch (IntrID) {
25880b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_init:
25890b57cec5SDimitry Andric return AMDGPU::DS_GWS_INIT;
25900b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_barrier:
25910b57cec5SDimitry Andric return AMDGPU::DS_GWS_BARRIER;
25920b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_v:
25930b57cec5SDimitry Andric return AMDGPU::DS_GWS_SEMA_V;
25940b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_br:
25950b57cec5SDimitry Andric return AMDGPU::DS_GWS_SEMA_BR;
25960b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_p:
25970b57cec5SDimitry Andric return AMDGPU::DS_GWS_SEMA_P;
25980b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_release_all:
25990b57cec5SDimitry Andric return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
26000b57cec5SDimitry Andric default:
26010b57cec5SDimitry Andric llvm_unreachable("not a gws intrinsic");
26020b57cec5SDimitry Andric }
26030b57cec5SDimitry Andric }
26040b57cec5SDimitry Andric
SelectDS_GWS(SDNode * N,unsigned IntrID)26050b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
26065f757f3fSDimitry Andric if (!Subtarget->hasGWS() ||
26075f757f3fSDimitry Andric (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
26085f757f3fSDimitry Andric !Subtarget->hasGWSSemaReleaseAll())) {
26090b57cec5SDimitry Andric // Let this error.
26100b57cec5SDimitry Andric SelectCode(N);
26110b57cec5SDimitry Andric return;
26120b57cec5SDimitry Andric }
26130b57cec5SDimitry Andric
26140b57cec5SDimitry Andric // Chain, intrinsic ID, vsrc, offset
26150b57cec5SDimitry Andric const bool HasVSrc = N->getNumOperands() == 4;
26160b57cec5SDimitry Andric assert(HasVSrc || N->getNumOperands() == 3);
26170b57cec5SDimitry Andric
26180b57cec5SDimitry Andric SDLoc SL(N);
26190b57cec5SDimitry Andric SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
26200b57cec5SDimitry Andric int ImmOffset = 0;
26210b57cec5SDimitry Andric MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
26220b57cec5SDimitry Andric MachineMemOperand *MMO = M->getMemOperand();
26230b57cec5SDimitry Andric
26240b57cec5SDimitry Andric // Don't worry if the offset ends up in a VGPR. Only one lane will have
26250b57cec5SDimitry Andric // effect, so SIFixSGPRCopies will validly insert readfirstlane.
26260b57cec5SDimitry Andric
26270b57cec5SDimitry Andric // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
26280b57cec5SDimitry Andric // offset field) % 64. Some versions of the programming guide omit the m0
26290b57cec5SDimitry Andric // part, or claim it's from offset 0.
26300b57cec5SDimitry Andric if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
26318bcb0991SDimitry Andric // If we have a constant offset, try to use the 0 in m0 as the base.
26328bcb0991SDimitry Andric // TODO: Look into changing the default m0 initialization value. If the
26338bcb0991SDimitry Andric // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
26348bcb0991SDimitry Andric // the immediate offset.
26358bcb0991SDimitry Andric glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
26368bcb0991SDimitry Andric ImmOffset = ConstOffset->getZExtValue();
26370b57cec5SDimitry Andric } else {
26380b57cec5SDimitry Andric if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
26390b57cec5SDimitry Andric ImmOffset = BaseOffset.getConstantOperandVal(1);
26400b57cec5SDimitry Andric BaseOffset = BaseOffset.getOperand(0);
26410b57cec5SDimitry Andric }
26420b57cec5SDimitry Andric
26430b57cec5SDimitry Andric // Prefer to do the shift in an SGPR since it should be possible to use m0
26440b57cec5SDimitry Andric // as the result directly. If it's already an SGPR, it will be eliminated
26450b57cec5SDimitry Andric // later.
26460b57cec5SDimitry Andric SDNode *SGPROffset
26470b57cec5SDimitry Andric = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
26480b57cec5SDimitry Andric BaseOffset);
26490b57cec5SDimitry Andric // Shift to offset in m0
26500b57cec5SDimitry Andric SDNode *M0Base
26510b57cec5SDimitry Andric = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
26520b57cec5SDimitry Andric SDValue(SGPROffset, 0),
26530b57cec5SDimitry Andric CurDAG->getTargetConstant(16, SL, MVT::i32));
26540b57cec5SDimitry Andric glueCopyToM0(N, SDValue(M0Base, 0));
26550b57cec5SDimitry Andric }
26560b57cec5SDimitry Andric
26570b57cec5SDimitry Andric SDValue Chain = N->getOperand(0);
26580b57cec5SDimitry Andric SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
26590b57cec5SDimitry Andric
26600b57cec5SDimitry Andric const unsigned Opc = gwsIntrinToOpcode(IntrID);
26610b57cec5SDimitry Andric SmallVector<SDValue, 5> Ops;
26620b57cec5SDimitry Andric if (HasVSrc)
26638bcb0991SDimitry Andric Ops.push_back(N->getOperand(2));
26640b57cec5SDimitry Andric Ops.push_back(OffsetField);
26650b57cec5SDimitry Andric Ops.push_back(Chain);
26660b57cec5SDimitry Andric
26670b57cec5SDimitry Andric SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
26680b57cec5SDimitry Andric CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
26690b57cec5SDimitry Andric }
26700b57cec5SDimitry Andric
SelectInterpP1F16(SDNode * N)26715ffd83dbSDimitry Andric void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
26725ffd83dbSDimitry Andric if (Subtarget->getLDSBankCount() != 16) {
26735ffd83dbSDimitry Andric // This is a single instruction with a pattern.
26745ffd83dbSDimitry Andric SelectCode(N);
26755ffd83dbSDimitry Andric return;
26765ffd83dbSDimitry Andric }
26775ffd83dbSDimitry Andric
26785ffd83dbSDimitry Andric SDLoc DL(N);
26795ffd83dbSDimitry Andric
26805ffd83dbSDimitry Andric // This requires 2 instructions. It is possible to write a pattern to support
26815ffd83dbSDimitry Andric // this, but the generated isel emitter doesn't correctly deal with multiple
26825ffd83dbSDimitry Andric // output instructions using the same physical register input. The copy to m0
26835ffd83dbSDimitry Andric // is incorrectly placed before the second instruction.
26845ffd83dbSDimitry Andric //
26855ffd83dbSDimitry Andric // TODO: Match source modifiers.
26865ffd83dbSDimitry Andric //
26875ffd83dbSDimitry Andric // def : Pat <
26885ffd83dbSDimitry Andric // (int_amdgcn_interp_p1_f16
26895ffd83dbSDimitry Andric // (VOP3Mods f32:$src0, i32:$src0_modifiers),
26905ffd83dbSDimitry Andric // (i32 timm:$attrchan), (i32 timm:$attr),
26915ffd83dbSDimitry Andric // (i1 timm:$high), M0),
26925ffd83dbSDimitry Andric // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
26935ffd83dbSDimitry Andric // timm:$attrchan, 0,
26945ffd83dbSDimitry Andric // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
26955ffd83dbSDimitry Andric // let Predicates = [has16BankLDS];
26965ffd83dbSDimitry Andric // }
26975ffd83dbSDimitry Andric
26985ffd83dbSDimitry Andric // 16 bank LDS
26995ffd83dbSDimitry Andric SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
27005ffd83dbSDimitry Andric N->getOperand(5), SDValue());
27015ffd83dbSDimitry Andric
27025ffd83dbSDimitry Andric SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
27035ffd83dbSDimitry Andric
27045ffd83dbSDimitry Andric SDNode *InterpMov =
27055ffd83dbSDimitry Andric CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
27065ffd83dbSDimitry Andric CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
27075ffd83dbSDimitry Andric N->getOperand(3), // Attr
27085ffd83dbSDimitry Andric N->getOperand(2), // Attrchan
27095ffd83dbSDimitry Andric ToM0.getValue(1) // In glue
27105ffd83dbSDimitry Andric });
27115ffd83dbSDimitry Andric
27125ffd83dbSDimitry Andric SDNode *InterpP1LV =
27135ffd83dbSDimitry Andric CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
27145ffd83dbSDimitry Andric CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
27155ffd83dbSDimitry Andric N->getOperand(1), // Src0
27165ffd83dbSDimitry Andric N->getOperand(3), // Attr
27175ffd83dbSDimitry Andric N->getOperand(2), // Attrchan
27185ffd83dbSDimitry Andric CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
27195ffd83dbSDimitry Andric SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
27205ffd83dbSDimitry Andric N->getOperand(4), // high
27215ffd83dbSDimitry Andric CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
27225ffd83dbSDimitry Andric CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
27235ffd83dbSDimitry Andric SDValue(InterpMov, 1)
27245ffd83dbSDimitry Andric });
27255ffd83dbSDimitry Andric
27265ffd83dbSDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
27275ffd83dbSDimitry Andric }
27285ffd83dbSDimitry Andric
SelectINTRINSIC_W_CHAIN(SDNode * N)27290b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2730647cbc5dSDimitry Andric unsigned IntrID = N->getConstantOperandVal(1);
27310b57cec5SDimitry Andric switch (IntrID) {
27320b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_append:
27330b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_consume: {
27340b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i32)
27350b57cec5SDimitry Andric break;
27360b57cec5SDimitry Andric SelectDSAppendConsume(N, IntrID);
27370b57cec5SDimitry Andric return;
27380b57cec5SDimitry Andric }
2739bdd1243dSDimitry Andric case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2740bdd1243dSDimitry Andric SelectDSBvhStackIntrinsic(N);
2741bdd1243dSDimitry Andric return;
27420b57cec5SDimitry Andric }
27430b57cec5SDimitry Andric
27440b57cec5SDimitry Andric SelectCode(N);
27450b57cec5SDimitry Andric }
27460b57cec5SDimitry Andric
SelectINTRINSIC_WO_CHAIN(SDNode * N)27478bcb0991SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
2748647cbc5dSDimitry Andric unsigned IntrID = N->getConstantOperandVal(0);
27490fca6ea1SDimitry Andric unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
27500fca6ea1SDimitry Andric SDNode *ConvGlueNode = N->getGluedNode();
27510fca6ea1SDimitry Andric if (ConvGlueNode) {
27520fca6ea1SDimitry Andric // FIXME: Possibly iterate over multiple glue nodes?
27530fca6ea1SDimitry Andric assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
27540fca6ea1SDimitry Andric ConvGlueNode = ConvGlueNode->getOperand(0).getNode();
27550fca6ea1SDimitry Andric ConvGlueNode =
27560fca6ea1SDimitry Andric CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
27570fca6ea1SDimitry Andric MVT::Glue, SDValue(ConvGlueNode, 0));
27580fca6ea1SDimitry Andric } else {
27590fca6ea1SDimitry Andric ConvGlueNode = nullptr;
27600fca6ea1SDimitry Andric }
27618bcb0991SDimitry Andric switch (IntrID) {
27628bcb0991SDimitry Andric case Intrinsic::amdgcn_wqm:
27638bcb0991SDimitry Andric Opcode = AMDGPU::WQM;
27648bcb0991SDimitry Andric break;
27658bcb0991SDimitry Andric case Intrinsic::amdgcn_softwqm:
27668bcb0991SDimitry Andric Opcode = AMDGPU::SOFT_WQM;
27678bcb0991SDimitry Andric break;
27688bcb0991SDimitry Andric case Intrinsic::amdgcn_wwm:
2769fe6060f1SDimitry Andric case Intrinsic::amdgcn_strict_wwm:
2770fe6060f1SDimitry Andric Opcode = AMDGPU::STRICT_WWM;
2771fe6060f1SDimitry Andric break;
2772fe6060f1SDimitry Andric case Intrinsic::amdgcn_strict_wqm:
2773fe6060f1SDimitry Andric Opcode = AMDGPU::STRICT_WQM;
27748bcb0991SDimitry Andric break;
27755ffd83dbSDimitry Andric case Intrinsic::amdgcn_interp_p1_f16:
27765ffd83dbSDimitry Andric SelectInterpP1F16(N);
27775ffd83dbSDimitry Andric return;
27788bcb0991SDimitry Andric default:
27798bcb0991SDimitry Andric SelectCode(N);
27800fca6ea1SDimitry Andric break;
27818bcb0991SDimitry Andric }
27828bcb0991SDimitry Andric
27830fca6ea1SDimitry Andric if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
27848bcb0991SDimitry Andric SDValue Src = N->getOperand(1);
27858bcb0991SDimitry Andric CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
27868bcb0991SDimitry Andric }
27878bcb0991SDimitry Andric
27880fca6ea1SDimitry Andric if (ConvGlueNode) {
27890fca6ea1SDimitry Andric SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
27900fca6ea1SDimitry Andric NewOps.push_back(SDValue(ConvGlueNode, 0));
27910fca6ea1SDimitry Andric CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
27920fca6ea1SDimitry Andric }
27930fca6ea1SDimitry Andric }
27940fca6ea1SDimitry Andric
SelectINTRINSIC_VOID(SDNode * N)27950b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
2796647cbc5dSDimitry Andric unsigned IntrID = N->getConstantOperandVal(1);
27970b57cec5SDimitry Andric switch (IntrID) {
27980b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_init:
27990b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_barrier:
28000b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_v:
28010b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_br:
28020b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_p:
28030b57cec5SDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_release_all:
28040b57cec5SDimitry Andric SelectDS_GWS(N, IntrID);
28050b57cec5SDimitry Andric return;
28060b57cec5SDimitry Andric default:
28070b57cec5SDimitry Andric break;
28080b57cec5SDimitry Andric }
28090b57cec5SDimitry Andric
28100b57cec5SDimitry Andric SelectCode(N);
28110b57cec5SDimitry Andric }
28120b57cec5SDimitry Andric
SelectWAVE_ADDRESS(SDNode * N)28135f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {
28145f757f3fSDimitry Andric SDValue Log2WaveSize =
28155f757f3fSDimitry Andric CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);
28165f757f3fSDimitry Andric CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),
28175f757f3fSDimitry Andric {N->getOperand(0), Log2WaveSize});
28185f757f3fSDimitry Andric }
28195f757f3fSDimitry Andric
SelectSTACKRESTORE(SDNode * N)28205f757f3fSDimitry Andric void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {
28215f757f3fSDimitry Andric SDValue SrcVal = N->getOperand(1);
28225f757f3fSDimitry Andric if (SrcVal.getValueType() != MVT::i32) {
28235f757f3fSDimitry Andric SelectCode(N); // Emit default error
28245f757f3fSDimitry Andric return;
28255f757f3fSDimitry Andric }
28265f757f3fSDimitry Andric
28275f757f3fSDimitry Andric SDValue CopyVal;
28285f757f3fSDimitry Andric Register SP = TLI->getStackPointerRegisterToSaveRestore();
28295f757f3fSDimitry Andric SDLoc SL(N);
28305f757f3fSDimitry Andric
28315f757f3fSDimitry Andric if (SrcVal.getOpcode() == AMDGPUISD::WAVE_ADDRESS) {
28325f757f3fSDimitry Andric CopyVal = SrcVal.getOperand(0);
28335f757f3fSDimitry Andric } else {
28345f757f3fSDimitry Andric SDValue Log2WaveSize = CurDAG->getTargetConstant(
28355f757f3fSDimitry Andric Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
28365f757f3fSDimitry Andric
28375f757f3fSDimitry Andric if (N->isDivergent()) {
28385f757f3fSDimitry Andric SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
28395f757f3fSDimitry Andric MVT::i32, SrcVal),
28405f757f3fSDimitry Andric 0);
28415f757f3fSDimitry Andric }
28425f757f3fSDimitry Andric
28435f757f3fSDimitry Andric CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
28445f757f3fSDimitry Andric {SrcVal, Log2WaveSize}),
28455f757f3fSDimitry Andric 0);
28465f757f3fSDimitry Andric }
28475f757f3fSDimitry Andric
28485f757f3fSDimitry Andric SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);
28495f757f3fSDimitry Andric CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);
28505f757f3fSDimitry Andric }
28515f757f3fSDimitry Andric
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool IsCanonicalizing,bool AllowAbs) const28520b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
2853e8d8bef9SDimitry Andric unsigned &Mods,
285406c3fb27SDimitry Andric bool IsCanonicalizing,
2855e8d8bef9SDimitry Andric bool AllowAbs) const {
285606c3fb27SDimitry Andric Mods = SISrcMods::NONE;
28570b57cec5SDimitry Andric Src = In;
28580b57cec5SDimitry Andric
28590b57cec5SDimitry Andric if (Src.getOpcode() == ISD::FNEG) {
28600b57cec5SDimitry Andric Mods |= SISrcMods::NEG;
28610b57cec5SDimitry Andric Src = Src.getOperand(0);
286206c3fb27SDimitry Andric } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {
286306c3fb27SDimitry Andric // Fold fsub [+-]0 into fneg. This may not have folded depending on the
286406c3fb27SDimitry Andric // denormal mode, but we're implicitly canonicalizing in a source operand.
286506c3fb27SDimitry Andric auto *LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
286606c3fb27SDimitry Andric if (LHS && LHS->isZero()) {
286706c3fb27SDimitry Andric Mods |= SISrcMods::NEG;
286806c3fb27SDimitry Andric Src = Src.getOperand(1);
286906c3fb27SDimitry Andric }
28700b57cec5SDimitry Andric }
28710b57cec5SDimitry Andric
2872e8d8bef9SDimitry Andric if (AllowAbs && Src.getOpcode() == ISD::FABS) {
28730b57cec5SDimitry Andric Mods |= SISrcMods::ABS;
28740b57cec5SDimitry Andric Src = Src.getOperand(0);
28750b57cec5SDimitry Andric }
28760b57cec5SDimitry Andric
28770b57cec5SDimitry Andric return true;
28780b57cec5SDimitry Andric }
28790b57cec5SDimitry Andric
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const28800b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
28810b57cec5SDimitry Andric SDValue &SrcMods) const {
28820b57cec5SDimitry Andric unsigned Mods;
288306c3fb27SDimitry Andric if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/true,
288406c3fb27SDimitry Andric /*AllowAbs=*/true)) {
288506c3fb27SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
288606c3fb27SDimitry Andric return true;
288706c3fb27SDimitry Andric }
288806c3fb27SDimitry Andric
288906c3fb27SDimitry Andric return false;
289006c3fb27SDimitry Andric }
289106c3fb27SDimitry Andric
SelectVOP3ModsNonCanonicalizing(SDValue In,SDValue & Src,SDValue & SrcMods) const289206c3fb27SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
289306c3fb27SDimitry Andric SDValue In, SDValue &Src, SDValue &SrcMods) const {
289406c3fb27SDimitry Andric unsigned Mods;
289506c3fb27SDimitry Andric if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/false,
289606c3fb27SDimitry Andric /*AllowAbs=*/true)) {
28970b57cec5SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28980b57cec5SDimitry Andric return true;
28990b57cec5SDimitry Andric }
29000b57cec5SDimitry Andric
29010b57cec5SDimitry Andric return false;
29020b57cec5SDimitry Andric }
29030b57cec5SDimitry Andric
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2904e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
2905e8d8bef9SDimitry Andric SDValue &SrcMods) const {
2906e8d8bef9SDimitry Andric unsigned Mods;
290706c3fb27SDimitry Andric if (SelectVOP3ModsImpl(In, Src, Mods,
290806c3fb27SDimitry Andric /*IsCanonicalizing=*/true,
290906c3fb27SDimitry Andric /*AllowAbs=*/false)) {
2910e8d8bef9SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2911e8d8bef9SDimitry Andric return true;
2912e8d8bef9SDimitry Andric }
2913e8d8bef9SDimitry Andric
2914e8d8bef9SDimitry Andric return false;
2915e8d8bef9SDimitry Andric }
2916e8d8bef9SDimitry Andric
SelectVOP3NoMods(SDValue In,SDValue & Src) const29170b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
29180b57cec5SDimitry Andric if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
29190b57cec5SDimitry Andric return false;
29200b57cec5SDimitry Andric
29210b57cec5SDimitry Andric Src = In;
29220b57cec5SDimitry Andric return true;
29230b57cec5SDimitry Andric }
29240b57cec5SDimitry Andric
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const292581ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
292681ad6265SDimitry Andric SDValue &SrcMods,
292781ad6265SDimitry Andric bool OpSel) const {
292881ad6265SDimitry Andric unsigned Mods;
292906c3fb27SDimitry Andric if (SelectVOP3ModsImpl(In, Src, Mods,
293006c3fb27SDimitry Andric /*IsCanonicalizing=*/true,
293106c3fb27SDimitry Andric /*AllowAbs=*/false)) {
293281ad6265SDimitry Andric if (OpSel)
293381ad6265SDimitry Andric Mods |= SISrcMods::OP_SEL_0;
293481ad6265SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
293581ad6265SDimitry Andric return true;
293681ad6265SDimitry Andric }
293781ad6265SDimitry Andric
293881ad6265SDimitry Andric return false;
293981ad6265SDimitry Andric }
294081ad6265SDimitry Andric
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const294181ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
294281ad6265SDimitry Andric SDValue &SrcMods) const {
294381ad6265SDimitry Andric return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
294481ad6265SDimitry Andric }
294581ad6265SDimitry Andric
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const294681ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
294781ad6265SDimitry Andric SDValue &SrcMods) const {
294881ad6265SDimitry Andric return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
294981ad6265SDimitry Andric }
295081ad6265SDimitry Andric
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const29510b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
29520b57cec5SDimitry Andric SDValue &SrcMods, SDValue &Clamp,
29530b57cec5SDimitry Andric SDValue &Omod) const {
29540b57cec5SDimitry Andric SDLoc DL(In);
29550b57cec5SDimitry Andric Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
29560b57cec5SDimitry Andric Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
29570b57cec5SDimitry Andric
29580b57cec5SDimitry Andric return SelectVOP3Mods(In, Src, SrcMods);
29590b57cec5SDimitry Andric }
29600b57cec5SDimitry Andric
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const2961e8d8bef9SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
2962e8d8bef9SDimitry Andric SDValue &SrcMods, SDValue &Clamp,
2963e8d8bef9SDimitry Andric SDValue &Omod) const {
2964e8d8bef9SDimitry Andric SDLoc DL(In);
2965e8d8bef9SDimitry Andric Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2966e8d8bef9SDimitry Andric Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2967e8d8bef9SDimitry Andric
2968e8d8bef9SDimitry Andric return SelectVOP3BMods(In, Src, SrcMods);
2969e8d8bef9SDimitry Andric }
2970e8d8bef9SDimitry Andric
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const29710b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
29720b57cec5SDimitry Andric SDValue &Clamp, SDValue &Omod) const {
29730b57cec5SDimitry Andric Src = In;
29740b57cec5SDimitry Andric
29750b57cec5SDimitry Andric SDLoc DL(In);
29760b57cec5SDimitry Andric Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
29770b57cec5SDimitry Andric Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
29780b57cec5SDimitry Andric
29790b57cec5SDimitry Andric return true;
29800b57cec5SDimitry Andric }
29810b57cec5SDimitry Andric
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const29820b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
298381ad6265SDimitry Andric SDValue &SrcMods, bool IsDOT) const {
298406c3fb27SDimitry Andric unsigned Mods = SISrcMods::NONE;
29850b57cec5SDimitry Andric Src = In;
29860b57cec5SDimitry Andric
298706c3fb27SDimitry Andric // TODO: Handle G_FSUB 0 as fneg
29880b57cec5SDimitry Andric if (Src.getOpcode() == ISD::FNEG) {
29890b57cec5SDimitry Andric Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
29900b57cec5SDimitry Andric Src = Src.getOperand(0);
29910b57cec5SDimitry Andric }
29920b57cec5SDimitry Andric
2993bdd1243dSDimitry Andric if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
299481ad6265SDimitry Andric (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
29950b57cec5SDimitry Andric unsigned VecMods = Mods;
29960b57cec5SDimitry Andric
29970b57cec5SDimitry Andric SDValue Lo = stripBitcast(Src.getOperand(0));
29980b57cec5SDimitry Andric SDValue Hi = stripBitcast(Src.getOperand(1));
29990b57cec5SDimitry Andric
30000b57cec5SDimitry Andric if (Lo.getOpcode() == ISD::FNEG) {
30010b57cec5SDimitry Andric Lo = stripBitcast(Lo.getOperand(0));
30020b57cec5SDimitry Andric Mods ^= SISrcMods::NEG;
30030b57cec5SDimitry Andric }
30040b57cec5SDimitry Andric
30050b57cec5SDimitry Andric if (Hi.getOpcode() == ISD::FNEG) {
30060b57cec5SDimitry Andric Hi = stripBitcast(Hi.getOperand(0));
30070b57cec5SDimitry Andric Mods ^= SISrcMods::NEG_HI;
30080b57cec5SDimitry Andric }
30090b57cec5SDimitry Andric
30100b57cec5SDimitry Andric if (isExtractHiElt(Lo, Lo))
30110b57cec5SDimitry Andric Mods |= SISrcMods::OP_SEL_0;
30120b57cec5SDimitry Andric
30130b57cec5SDimitry Andric if (isExtractHiElt(Hi, Hi))
30140b57cec5SDimitry Andric Mods |= SISrcMods::OP_SEL_1;
30150b57cec5SDimitry Andric
3016fe6060f1SDimitry Andric unsigned VecSize = Src.getValueSizeInBits();
30170b57cec5SDimitry Andric Lo = stripExtractLoElt(Lo);
30180b57cec5SDimitry Andric Hi = stripExtractLoElt(Hi);
30190b57cec5SDimitry Andric
3020fe6060f1SDimitry Andric if (Lo.getValueSizeInBits() > VecSize) {
3021fe6060f1SDimitry Andric Lo = CurDAG->getTargetExtractSubreg(
3022fe6060f1SDimitry Andric (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3023fe6060f1SDimitry Andric MVT::getIntegerVT(VecSize), Lo);
3024fe6060f1SDimitry Andric }
3025fe6060f1SDimitry Andric
3026fe6060f1SDimitry Andric if (Hi.getValueSizeInBits() > VecSize) {
3027fe6060f1SDimitry Andric Hi = CurDAG->getTargetExtractSubreg(
3028fe6060f1SDimitry Andric (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3029fe6060f1SDimitry Andric MVT::getIntegerVT(VecSize), Hi);
3030fe6060f1SDimitry Andric }
3031fe6060f1SDimitry Andric
3032fe6060f1SDimitry Andric assert(Lo.getValueSizeInBits() <= VecSize &&
3033fe6060f1SDimitry Andric Hi.getValueSizeInBits() <= VecSize);
3034fe6060f1SDimitry Andric
30350b57cec5SDimitry Andric if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
30360b57cec5SDimitry Andric // Really a scalar input. Just select from the low half of the register to
30370b57cec5SDimitry Andric // avoid packing.
30380b57cec5SDimitry Andric
3039fe6060f1SDimitry Andric if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
30400b57cec5SDimitry Andric Src = Lo;
3041fe6060f1SDimitry Andric } else {
3042fe6060f1SDimitry Andric assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
3043fe6060f1SDimitry Andric
3044fe6060f1SDimitry Andric SDLoc SL(In);
3045fe6060f1SDimitry Andric SDValue Undef = SDValue(
3046fe6060f1SDimitry Andric CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3047fe6060f1SDimitry Andric Lo.getValueType()), 0);
3048fe6060f1SDimitry Andric auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3049fe6060f1SDimitry Andric : AMDGPU::SReg_64RegClassID;
3050fe6060f1SDimitry Andric const SDValue Ops[] = {
3051fe6060f1SDimitry Andric CurDAG->getTargetConstant(RC, SL, MVT::i32),
3052fe6060f1SDimitry Andric Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3053fe6060f1SDimitry Andric Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3054fe6060f1SDimitry Andric
3055fe6060f1SDimitry Andric Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3056fe6060f1SDimitry Andric Src.getValueType(), Ops), 0);
3057fe6060f1SDimitry Andric }
30580b57cec5SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
30590b57cec5SDimitry Andric return true;
30600b57cec5SDimitry Andric }
30610b57cec5SDimitry Andric
3062fe6060f1SDimitry Andric if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
3063fe6060f1SDimitry Andric uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
3064fe6060f1SDimitry Andric .bitcastToAPInt().getZExtValue();
3065fe6060f1SDimitry Andric if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
306606c3fb27SDimitry Andric Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
3067fe6060f1SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3068fe6060f1SDimitry Andric return true;
3069fe6060f1SDimitry Andric }
3070fe6060f1SDimitry Andric }
3071fe6060f1SDimitry Andric
30720b57cec5SDimitry Andric Mods = VecMods;
30730b57cec5SDimitry Andric }
30740b57cec5SDimitry Andric
30750b57cec5SDimitry Andric // Packed instructions do not have abs modifiers.
30760b57cec5SDimitry Andric Mods |= SISrcMods::OP_SEL_1;
30770b57cec5SDimitry Andric
30780b57cec5SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
30790b57cec5SDimitry Andric return true;
30800b57cec5SDimitry Andric }
30810b57cec5SDimitry Andric
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const308281ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
308381ad6265SDimitry Andric SDValue &SrcMods) const {
308481ad6265SDimitry Andric return SelectVOP3PMods(In, Src, SrcMods, true);
308581ad6265SDimitry Andric }
308681ad6265SDimitry Andric
SelectVOP3PModsNeg(SDValue In,SDValue & Src) const30877a6dacacSDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
308881ad6265SDimitry Andric const ConstantSDNode *C = cast<ConstantSDNode>(In);
308981ad6265SDimitry Andric // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
309081ad6265SDimitry Andric // 1 promotes packed values to signed, 0 treats them as unsigned.
309181ad6265SDimitry Andric assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
309281ad6265SDimitry Andric
309381ad6265SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1;
309406c3fb27SDimitry Andric unsigned SrcSign = C->getZExtValue();
309581ad6265SDimitry Andric if (SrcSign == 1)
309681ad6265SDimitry Andric Mods ^= SISrcMods::NEG;
309781ad6265SDimitry Andric
309881ad6265SDimitry Andric Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
309981ad6265SDimitry Andric return true;
310081ad6265SDimitry Andric }
310181ad6265SDimitry Andric
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const310281ad6265SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
310381ad6265SDimitry Andric SDValue &Src) const {
310481ad6265SDimitry Andric const ConstantSDNode *C = cast<ConstantSDNode>(In);
310581ad6265SDimitry Andric assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
310681ad6265SDimitry Andric
310781ad6265SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1;
310806c3fb27SDimitry Andric unsigned SrcVal = C->getZExtValue();
310981ad6265SDimitry Andric if (SrcVal == 1)
311081ad6265SDimitry Andric Mods |= SISrcMods::OP_SEL_0;
311181ad6265SDimitry Andric
311281ad6265SDimitry Andric Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
311381ad6265SDimitry Andric return true;
311481ad6265SDimitry Andric }
311581ad6265SDimitry Andric
buildRegSequence32(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL)3116b3edf446SDimitry Andric static MachineSDNode *buildRegSequence32(SmallVectorImpl<SDValue> &Elts,
3117b3edf446SDimitry Andric llvm::SelectionDAG *CurDAG,
3118b3edf446SDimitry Andric const SDLoc &DL) {
3119b3edf446SDimitry Andric unsigned DstRegClass;
3120b3edf446SDimitry Andric EVT DstTy;
3121b3edf446SDimitry Andric switch (Elts.size()) {
3122b3edf446SDimitry Andric case 8:
3123b3edf446SDimitry Andric DstRegClass = AMDGPU::VReg_256RegClassID;
3124b3edf446SDimitry Andric DstTy = MVT::v8i32;
3125b3edf446SDimitry Andric break;
3126b3edf446SDimitry Andric case 4:
3127b3edf446SDimitry Andric DstRegClass = AMDGPU::VReg_128RegClassID;
3128b3edf446SDimitry Andric DstTy = MVT::v4i32;
3129b3edf446SDimitry Andric break;
3130b3edf446SDimitry Andric case 2:
3131b3edf446SDimitry Andric DstRegClass = AMDGPU::VReg_64RegClassID;
3132b3edf446SDimitry Andric DstTy = MVT::v2i32;
3133b3edf446SDimitry Andric break;
3134b3edf446SDimitry Andric default:
3135b3edf446SDimitry Andric llvm_unreachable("unhandled Reg sequence size");
3136b3edf446SDimitry Andric }
3137b3edf446SDimitry Andric
3138b3edf446SDimitry Andric SmallVector<SDValue, 17> Ops;
3139b3edf446SDimitry Andric Ops.push_back(CurDAG->getTargetConstant(DstRegClass, DL, MVT::i32));
3140b3edf446SDimitry Andric for (unsigned i = 0; i < Elts.size(); ++i) {
3141b3edf446SDimitry Andric Ops.push_back(Elts[i]);
3142b3edf446SDimitry Andric Ops.push_back(CurDAG->getTargetConstant(
3143b3edf446SDimitry Andric SIRegisterInfo::getSubRegFromChannel(i), DL, MVT::i32));
3144b3edf446SDimitry Andric }
3145b3edf446SDimitry Andric return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);
3146b3edf446SDimitry Andric }
3147b3edf446SDimitry Andric
buildRegSequence16(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL)3148b3edf446SDimitry Andric static MachineSDNode *buildRegSequence16(SmallVectorImpl<SDValue> &Elts,
3149b3edf446SDimitry Andric llvm::SelectionDAG *CurDAG,
3150b3edf446SDimitry Andric const SDLoc &DL) {
3151b3edf446SDimitry Andric SmallVector<SDValue, 8> PackedElts;
3152b3edf446SDimitry Andric assert("unhandled Reg sequence size" &&
3153b3edf446SDimitry Andric (Elts.size() == 8 || Elts.size() == 16));
3154b3edf446SDimitry Andric
3155b3edf446SDimitry Andric // Pack 16-bit elements in pairs into 32-bit register. If both elements are
3156b3edf446SDimitry Andric // unpacked from 32-bit source use it, otherwise pack them using v_perm.
3157b3edf446SDimitry Andric for (unsigned i = 0; i < Elts.size(); i += 2) {
3158b3edf446SDimitry Andric SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3159b3edf446SDimitry Andric SDValue HiSrc;
3160b3edf446SDimitry Andric if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {
3161b3edf446SDimitry Andric PackedElts.push_back(HiSrc);
3162b3edf446SDimitry Andric } else {
3163b3edf446SDimitry Andric SDValue PackLoLo = CurDAG->getTargetConstant(0x05040100, DL, MVT::i32);
3164b3edf446SDimitry Andric MachineSDNode *Packed =
3165b3edf446SDimitry Andric CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,
3166b3edf446SDimitry Andric {Elts[i + 1], Elts[i], PackLoLo});
3167b3edf446SDimitry Andric PackedElts.push_back(SDValue(Packed, 0));
3168b3edf446SDimitry Andric }
3169b3edf446SDimitry Andric }
3170b3edf446SDimitry Andric
3171b3edf446SDimitry Andric return buildRegSequence32(PackedElts, CurDAG, DL);
3172b3edf446SDimitry Andric }
3173b3edf446SDimitry Andric
buildRegSequence(SmallVectorImpl<SDValue> & Elts,llvm::SelectionDAG * CurDAG,const SDLoc & DL,unsigned ElementSize)3174b3edf446SDimitry Andric static MachineSDNode *buildRegSequence(SmallVectorImpl<SDValue> &Elts,
3175b3edf446SDimitry Andric llvm::SelectionDAG *CurDAG,
3176b3edf446SDimitry Andric const SDLoc &DL, unsigned ElementSize) {
3177b3edf446SDimitry Andric if (ElementSize == 16)
3178b3edf446SDimitry Andric return buildRegSequence16(Elts, CurDAG, DL);
3179b3edf446SDimitry Andric if (ElementSize == 32)
3180b3edf446SDimitry Andric return buildRegSequence32(Elts, CurDAG, DL);
3181b3edf446SDimitry Andric llvm_unreachable("Unhandled element size");
3182b3edf446SDimitry Andric }
3183b3edf446SDimitry Andric
selectWMMAModsNegAbs(unsigned ModOpcode,unsigned & Mods,SmallVectorImpl<SDValue> & Elts,SDValue & Src,llvm::SelectionDAG * CurDAG,const SDLoc & DL,unsigned ElementSize)3184b3edf446SDimitry Andric static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
3185b3edf446SDimitry Andric SmallVectorImpl<SDValue> &Elts, SDValue &Src,
3186b3edf446SDimitry Andric llvm::SelectionDAG *CurDAG, const SDLoc &DL,
3187b3edf446SDimitry Andric unsigned ElementSize) {
3188b3edf446SDimitry Andric if (ModOpcode == ISD::FNEG) {
3189b3edf446SDimitry Andric Mods |= SISrcMods::NEG;
3190b3edf446SDimitry Andric // Check if all elements also have abs modifier
3191b3edf446SDimitry Andric SmallVector<SDValue, 8> NegAbsElts;
3192b3edf446SDimitry Andric for (auto El : Elts) {
3193b3edf446SDimitry Andric if (El.getOpcode() != ISD::FABS)
3194b3edf446SDimitry Andric break;
3195b3edf446SDimitry Andric NegAbsElts.push_back(El->getOperand(0));
3196b3edf446SDimitry Andric }
3197b3edf446SDimitry Andric if (Elts.size() != NegAbsElts.size()) {
3198b3edf446SDimitry Andric // Neg
3199b3edf446SDimitry Andric Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
3200b3edf446SDimitry Andric } else {
3201b3edf446SDimitry Andric // Neg and Abs
3202b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI;
3203b3edf446SDimitry Andric Src = SDValue(buildRegSequence(NegAbsElts, CurDAG, DL, ElementSize), 0);
3204b3edf446SDimitry Andric }
3205b3edf446SDimitry Andric } else {
3206b3edf446SDimitry Andric assert(ModOpcode == ISD::FABS);
3207b3edf446SDimitry Andric // Abs
3208b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI;
3209b3edf446SDimitry Andric Src = SDValue(buildRegSequence(Elts, CurDAG, DL, ElementSize), 0);
3210b3edf446SDimitry Andric }
3211b3edf446SDimitry Andric }
3212b3edf446SDimitry Andric
3213b3edf446SDimitry Andric // Check all f16 elements for modifiers while looking through b32 and v2b16
3214b3edf446SDimitry Andric // build vector, stop if element does not satisfy ModifierCheck.
3215b3edf446SDimitry Andric static void
checkWMMAElementsModifiersF16(BuildVectorSDNode * BV,std::function<bool (SDValue)> ModifierCheck)3216b3edf446SDimitry Andric checkWMMAElementsModifiersF16(BuildVectorSDNode *BV,
3217b3edf446SDimitry Andric std::function<bool(SDValue)> ModifierCheck) {
3218b3edf446SDimitry Andric for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3219b3edf446SDimitry Andric if (auto *F16Pair =
3220b3edf446SDimitry Andric dyn_cast<BuildVectorSDNode>(stripBitcast(BV->getOperand(i)))) {
3221b3edf446SDimitry Andric for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3222b3edf446SDimitry Andric SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3223b3edf446SDimitry Andric if (!ModifierCheck(ElF16))
3224b3edf446SDimitry Andric break;
3225b3edf446SDimitry Andric }
3226b3edf446SDimitry Andric }
3227b3edf446SDimitry Andric }
3228b3edf446SDimitry Andric }
3229b3edf446SDimitry Andric
SelectWMMAModsF16Neg(SDValue In,SDValue & Src,SDValue & SrcMods) const3230b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,
3231b3edf446SDimitry Andric SDValue &SrcMods) const {
3232b3edf446SDimitry Andric Src = In;
3233b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1;
3234b3edf446SDimitry Andric
3235b3edf446SDimitry Andric // mods are on f16 elements
3236b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3237b3edf446SDimitry Andric SmallVector<SDValue, 8> EltsF16;
3238b3edf446SDimitry Andric
3239b3edf446SDimitry Andric checkWMMAElementsModifiersF16(BV, [&](SDValue Element) -> bool {
3240b3edf446SDimitry Andric if (Element.getOpcode() != ISD::FNEG)
3241b3edf446SDimitry Andric return false;
3242b3edf446SDimitry Andric EltsF16.push_back(Element.getOperand(0));
3243b3edf446SDimitry Andric return true;
3244b3edf446SDimitry Andric });
3245b3edf446SDimitry Andric
3246b3edf446SDimitry Andric // All elements have neg modifier
3247b3edf446SDimitry Andric if (BV->getNumOperands() * 2 == EltsF16.size()) {
3248b3edf446SDimitry Andric Src = SDValue(buildRegSequence16(EltsF16, CurDAG, SDLoc(In)), 0);
3249b3edf446SDimitry Andric Mods |= SISrcMods::NEG;
3250b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI;
3251b3edf446SDimitry Andric }
3252b3edf446SDimitry Andric }
3253b3edf446SDimitry Andric
3254b3edf446SDimitry Andric // mods are on v2f16 elements
3255b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3256b3edf446SDimitry Andric SmallVector<SDValue, 8> EltsV2F16;
3257b3edf446SDimitry Andric for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3258b3edf446SDimitry Andric SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3259b3edf446SDimitry Andric // Based on first element decide which mod we match, neg or abs
3260b3edf446SDimitry Andric if (ElV2f16.getOpcode() != ISD::FNEG)
3261b3edf446SDimitry Andric break;
3262b3edf446SDimitry Andric EltsV2F16.push_back(ElV2f16.getOperand(0));
3263b3edf446SDimitry Andric }
3264b3edf446SDimitry Andric
3265b3edf446SDimitry Andric // All pairs of elements have neg modifier
3266b3edf446SDimitry Andric if (BV->getNumOperands() == EltsV2F16.size()) {
3267b3edf446SDimitry Andric Src = SDValue(buildRegSequence32(EltsV2F16, CurDAG, SDLoc(In)), 0);
3268b3edf446SDimitry Andric Mods |= SISrcMods::NEG;
3269b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI;
3270b3edf446SDimitry Andric }
3271b3edf446SDimitry Andric }
3272b3edf446SDimitry Andric
3273b3edf446SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3274b3edf446SDimitry Andric return true;
3275b3edf446SDimitry Andric }
3276b3edf446SDimitry Andric
SelectWMMAModsF16NegAbs(SDValue In,SDValue & Src,SDValue & SrcMods) const3277b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
3278b3edf446SDimitry Andric SDValue &SrcMods) const {
3279b3edf446SDimitry Andric Src = In;
3280b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1;
3281b3edf446SDimitry Andric unsigned ModOpcode;
3282b3edf446SDimitry Andric
3283b3edf446SDimitry Andric // mods are on f16 elements
3284b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3285b3edf446SDimitry Andric SmallVector<SDValue, 8> EltsF16;
3286b3edf446SDimitry Andric checkWMMAElementsModifiersF16(BV, [&](SDValue ElF16) -> bool {
3287b3edf446SDimitry Andric // Based on first element decide which mod we match, neg or abs
3288b3edf446SDimitry Andric if (EltsF16.empty())
3289b3edf446SDimitry Andric ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3290b3edf446SDimitry Andric if (ElF16.getOpcode() != ModOpcode)
3291b3edf446SDimitry Andric return false;
3292b3edf446SDimitry Andric EltsF16.push_back(ElF16.getOperand(0));
3293b3edf446SDimitry Andric return true;
3294b3edf446SDimitry Andric });
3295b3edf446SDimitry Andric
3296b3edf446SDimitry Andric // All elements have ModOpcode modifier
3297b3edf446SDimitry Andric if (BV->getNumOperands() * 2 == EltsF16.size())
3298b3edf446SDimitry Andric selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, CurDAG, SDLoc(In),
3299b3edf446SDimitry Andric 16);
3300b3edf446SDimitry Andric }
3301b3edf446SDimitry Andric
3302b3edf446SDimitry Andric // mods are on v2f16 elements
3303b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3304b3edf446SDimitry Andric SmallVector<SDValue, 8> EltsV2F16;
3305b3edf446SDimitry Andric
3306b3edf446SDimitry Andric for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3307b3edf446SDimitry Andric SDValue ElV2f16 = stripBitcast(BV->getOperand(i));
3308b3edf446SDimitry Andric // Based on first element decide which mod we match, neg or abs
3309b3edf446SDimitry Andric if (EltsV2F16.empty())
3310b3edf446SDimitry Andric ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3311b3edf446SDimitry Andric if (ElV2f16->getOpcode() != ModOpcode)
3312b3edf446SDimitry Andric break;
3313b3edf446SDimitry Andric EltsV2F16.push_back(ElV2f16->getOperand(0));
3314b3edf446SDimitry Andric }
3315b3edf446SDimitry Andric
3316b3edf446SDimitry Andric // All elements have ModOpcode modifier
3317b3edf446SDimitry Andric if (BV->getNumOperands() == EltsV2F16.size())
3318b3edf446SDimitry Andric selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, CurDAG, SDLoc(In),
3319b3edf446SDimitry Andric 32);
3320b3edf446SDimitry Andric }
3321b3edf446SDimitry Andric
3322b3edf446SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3323b3edf446SDimitry Andric return true;
3324b3edf446SDimitry Andric }
3325b3edf446SDimitry Andric
SelectWMMAModsF32NegAbs(SDValue In,SDValue & Src,SDValue & SrcMods) const3326b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
3327b3edf446SDimitry Andric SDValue &SrcMods) const {
3328b3edf446SDimitry Andric Src = In;
3329b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1;
3330b3edf446SDimitry Andric SmallVector<SDValue, 8> EltsF32;
3331b3edf446SDimitry Andric
3332b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
33330fca6ea1SDimitry Andric assert(BV->getNumOperands() > 0);
33340fca6ea1SDimitry Andric // Based on first element decide which mod we match, neg or abs
33350fca6ea1SDimitry Andric SDValue ElF32 = stripBitcast(BV->getOperand(0));
33360fca6ea1SDimitry Andric unsigned ModOpcode =
33370fca6ea1SDimitry Andric (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3338b3edf446SDimitry Andric for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3339b3edf446SDimitry Andric SDValue ElF32 = stripBitcast(BV->getOperand(i));
3340b3edf446SDimitry Andric if (ElF32.getOpcode() != ModOpcode)
3341b3edf446SDimitry Andric break;
3342b3edf446SDimitry Andric EltsF32.push_back(ElF32.getOperand(0));
3343b3edf446SDimitry Andric }
3344b3edf446SDimitry Andric
3345b3edf446SDimitry Andric // All elements had ModOpcode modifier
3346b3edf446SDimitry Andric if (BV->getNumOperands() == EltsF32.size())
3347b3edf446SDimitry Andric selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, CurDAG, SDLoc(In),
3348b3edf446SDimitry Andric 32);
3349b3edf446SDimitry Andric }
3350b3edf446SDimitry Andric
3351b3edf446SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3352b3edf446SDimitry Andric return true;
3353b3edf446SDimitry Andric }
3354b3edf446SDimitry Andric
SelectWMMAVISrc(SDValue In,SDValue & Src) const3355b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {
3356b3edf446SDimitry Andric if (auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3357b3edf446SDimitry Andric BitVector UndefElements;
3358b3edf446SDimitry Andric if (SDValue Splat = BV->getSplatValue(&UndefElements))
3359b3edf446SDimitry Andric if (isInlineImmediate(Splat.getNode())) {
3360b3edf446SDimitry Andric if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat)) {
3361b3edf446SDimitry Andric unsigned Imm = C->getAPIntValue().getSExtValue();
3362b3edf446SDimitry Andric Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3363b3edf446SDimitry Andric return true;
3364b3edf446SDimitry Andric }
3365b3edf446SDimitry Andric if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat)) {
3366b3edf446SDimitry Andric unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
3367b3edf446SDimitry Andric Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3368b3edf446SDimitry Andric return true;
3369b3edf446SDimitry Andric }
3370b3edf446SDimitry Andric llvm_unreachable("unhandled Constant node");
3371b3edf446SDimitry Andric }
3372b3edf446SDimitry Andric }
3373b3edf446SDimitry Andric
3374b3edf446SDimitry Andric // 16 bit splat
3375b3edf446SDimitry Andric SDValue SplatSrc32 = stripBitcast(In);
33760fca6ea1SDimitry Andric if (auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3377b3edf446SDimitry Andric if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3378b3edf446SDimitry Andric SDValue SplatSrc16 = stripBitcast(Splat32);
33790fca6ea1SDimitry Andric if (auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3380b3edf446SDimitry Andric if (SDValue Splat = SplatSrc16BV->getSplatValue()) {
3381b3edf446SDimitry Andric const SIInstrInfo *TII = Subtarget->getInstrInfo();
33820fca6ea1SDimitry Andric std::optional<APInt> RawValue;
33830fca6ea1SDimitry Andric if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Splat))
33840fca6ea1SDimitry Andric RawValue = C->getValueAPF().bitcastToAPInt();
33850fca6ea1SDimitry Andric else if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat))
33860fca6ea1SDimitry Andric RawValue = C->getAPIntValue();
33870fca6ea1SDimitry Andric
33880fca6ea1SDimitry Andric if (RawValue.has_value()) {
33890fca6ea1SDimitry Andric EVT VT = In.getValueType().getScalarType();
33900fca6ea1SDimitry Andric if (VT.getSimpleVT() == MVT::f16 || VT.getSimpleVT() == MVT::bf16) {
33910fca6ea1SDimitry Andric APFloat FloatVal(VT.getSimpleVT() == MVT::f16
33920fca6ea1SDimitry Andric ? APFloatBase::IEEEhalf()
33930fca6ea1SDimitry Andric : APFloatBase::BFloat(),
33940fca6ea1SDimitry Andric RawValue.value());
33950fca6ea1SDimitry Andric if (TII->isInlineConstant(FloatVal)) {
33960fca6ea1SDimitry Andric Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
33970fca6ea1SDimitry Andric MVT::i16);
3398b3edf446SDimitry Andric return true;
3399b3edf446SDimitry Andric }
34000fca6ea1SDimitry Andric } else if (VT.getSimpleVT() == MVT::i16) {
34010fca6ea1SDimitry Andric if (TII->isInlineConstant(RawValue.value())) {
34020fca6ea1SDimitry Andric Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
34030fca6ea1SDimitry Andric MVT::i16);
34040fca6ea1SDimitry Andric return true;
3405b3edf446SDimitry Andric }
34060fca6ea1SDimitry Andric } else
34070fca6ea1SDimitry Andric llvm_unreachable("unknown 16-bit type");
3408b3edf446SDimitry Andric }
3409b3edf446SDimitry Andric }
3410b3edf446SDimitry Andric }
3411b3edf446SDimitry Andric
3412b3edf446SDimitry Andric return false;
3413b3edf446SDimitry Andric }
3414b3edf446SDimitry Andric
SelectSWMMACIndex8(SDValue In,SDValue & Src,SDValue & IndexKey) const3415b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,
3416b3edf446SDimitry Andric SDValue &IndexKey) const {
3417b3edf446SDimitry Andric unsigned Key = 0;
3418b3edf446SDimitry Andric Src = In;
3419b3edf446SDimitry Andric
3420b3edf446SDimitry Andric if (In.getOpcode() == ISD::SRL) {
3421b3edf446SDimitry Andric const llvm::SDValue &ShiftSrc = In.getOperand(0);
3422b3edf446SDimitry Andric ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
3423b3edf446SDimitry Andric if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
3424b3edf446SDimitry Andric ShiftAmt->getZExtValue() % 8 == 0) {
3425b3edf446SDimitry Andric Key = ShiftAmt->getZExtValue() / 8;
3426b3edf446SDimitry Andric Src = ShiftSrc;
3427b3edf446SDimitry Andric }
3428b3edf446SDimitry Andric }
3429b3edf446SDimitry Andric
3430b3edf446SDimitry Andric IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3431b3edf446SDimitry Andric return true;
3432b3edf446SDimitry Andric }
3433b3edf446SDimitry Andric
SelectSWMMACIndex16(SDValue In,SDValue & Src,SDValue & IndexKey) const3434b3edf446SDimitry Andric bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
3435b3edf446SDimitry Andric SDValue &IndexKey) const {
3436b3edf446SDimitry Andric unsigned Key = 0;
3437b3edf446SDimitry Andric Src = In;
3438b3edf446SDimitry Andric
3439b3edf446SDimitry Andric if (In.getOpcode() == ISD::SRL) {
3440b3edf446SDimitry Andric const llvm::SDValue &ShiftSrc = In.getOperand(0);
3441b3edf446SDimitry Andric ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
3442b3edf446SDimitry Andric if (ShiftSrc.getValueType().getSizeInBits() == 32 && ShiftAmt &&
3443b3edf446SDimitry Andric ShiftAmt->getZExtValue() == 16) {
3444b3edf446SDimitry Andric Key = 1;
3445b3edf446SDimitry Andric Src = ShiftSrc;
3446b3edf446SDimitry Andric }
3447b3edf446SDimitry Andric }
3448b3edf446SDimitry Andric
3449b3edf446SDimitry Andric IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3450b3edf446SDimitry Andric return true;
3451b3edf446SDimitry Andric }
3452b3edf446SDimitry Andric
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const34530b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
34540b57cec5SDimitry Andric SDValue &SrcMods) const {
34550b57cec5SDimitry Andric Src = In;
34560b57cec5SDimitry Andric // FIXME: Handle op_sel
34570b57cec5SDimitry Andric SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
34580b57cec5SDimitry Andric return true;
34590b57cec5SDimitry Andric }
34600b57cec5SDimitry Andric
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const34610b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
34620b57cec5SDimitry Andric SDValue &SrcMods) const {
34630b57cec5SDimitry Andric // FIXME: Handle op_sel
34640b57cec5SDimitry Andric return SelectVOP3Mods(In, Src, SrcMods);
34650b57cec5SDimitry Andric }
34660b57cec5SDimitry Andric
34670b57cec5SDimitry Andric // The return value is not whether the match is possible (which it always is),
34680b57cec5SDimitry Andric // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const34690b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
34700b57cec5SDimitry Andric unsigned &Mods) const {
34710b57cec5SDimitry Andric Mods = 0;
34720b57cec5SDimitry Andric SelectVOP3ModsImpl(In, Src, Mods);
34730b57cec5SDimitry Andric
34740b57cec5SDimitry Andric if (Src.getOpcode() == ISD::FP_EXTEND) {
34750b57cec5SDimitry Andric Src = Src.getOperand(0);
34760b57cec5SDimitry Andric assert(Src.getValueType() == MVT::f16);
34770b57cec5SDimitry Andric Src = stripBitcast(Src);
34780b57cec5SDimitry Andric
34790b57cec5SDimitry Andric // Be careful about folding modifiers if we already have an abs. fneg is
34800b57cec5SDimitry Andric // applied last, so we don't want to apply an earlier fneg.
34810b57cec5SDimitry Andric if ((Mods & SISrcMods::ABS) == 0) {
34820b57cec5SDimitry Andric unsigned ModsTmp;
34830b57cec5SDimitry Andric SelectVOP3ModsImpl(Src, Src, ModsTmp);
34840b57cec5SDimitry Andric
34850b57cec5SDimitry Andric if ((ModsTmp & SISrcMods::NEG) != 0)
34860b57cec5SDimitry Andric Mods ^= SISrcMods::NEG;
34870b57cec5SDimitry Andric
34880b57cec5SDimitry Andric if ((ModsTmp & SISrcMods::ABS) != 0)
34890b57cec5SDimitry Andric Mods |= SISrcMods::ABS;
34900b57cec5SDimitry Andric }
34910b57cec5SDimitry Andric
34920b57cec5SDimitry Andric // op_sel/op_sel_hi decide the source type and source.
34930b57cec5SDimitry Andric // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
34940b57cec5SDimitry Andric // If the sources's op_sel is set, it picks the high half of the source
34950b57cec5SDimitry Andric // register.
34960b57cec5SDimitry Andric
34970b57cec5SDimitry Andric Mods |= SISrcMods::OP_SEL_1;
34980b57cec5SDimitry Andric if (isExtractHiElt(Src, Src)) {
34990b57cec5SDimitry Andric Mods |= SISrcMods::OP_SEL_0;
35000b57cec5SDimitry Andric
35010b57cec5SDimitry Andric // TODO: Should we try to look for neg/abs here?
35020b57cec5SDimitry Andric }
35030b57cec5SDimitry Andric
35040b57cec5SDimitry Andric return true;
35050b57cec5SDimitry Andric }
35060b57cec5SDimitry Andric
35070b57cec5SDimitry Andric return false;
35080b57cec5SDimitry Andric }
35090b57cec5SDimitry Andric
SelectVOP3PMadMixModsExt(SDValue In,SDValue & Src,SDValue & SrcMods) const351006c3fb27SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
351106c3fb27SDimitry Andric SDValue &SrcMods) const {
351206c3fb27SDimitry Andric unsigned Mods = 0;
351306c3fb27SDimitry Andric if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
351406c3fb27SDimitry Andric return false;
351506c3fb27SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
351606c3fb27SDimitry Andric return true;
351706c3fb27SDimitry Andric }
351806c3fb27SDimitry Andric
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const35190b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
35200b57cec5SDimitry Andric SDValue &SrcMods) const {
35210b57cec5SDimitry Andric unsigned Mods = 0;
35220b57cec5SDimitry Andric SelectVOP3PMadMixModsImpl(In, Src, Mods);
35230b57cec5SDimitry Andric SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
35240b57cec5SDimitry Andric return true;
35250b57cec5SDimitry Andric }
35260b57cec5SDimitry Andric
getHi16Elt(SDValue In) const35270b57cec5SDimitry Andric SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
35280b57cec5SDimitry Andric if (In.isUndef())
35290b57cec5SDimitry Andric return CurDAG->getUNDEF(MVT::i32);
35300b57cec5SDimitry Andric
35310b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
35320b57cec5SDimitry Andric SDLoc SL(In);
35330b57cec5SDimitry Andric return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
35340b57cec5SDimitry Andric }
35350b57cec5SDimitry Andric
35360b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
35370b57cec5SDimitry Andric SDLoc SL(In);
35380b57cec5SDimitry Andric return CurDAG->getConstant(
35390b57cec5SDimitry Andric C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
35400b57cec5SDimitry Andric }
35410b57cec5SDimitry Andric
35420b57cec5SDimitry Andric SDValue Src;
35430b57cec5SDimitry Andric if (isExtractHiElt(In, Src))
35440b57cec5SDimitry Andric return Src;
35450b57cec5SDimitry Andric
35460b57cec5SDimitry Andric return SDValue();
35470b57cec5SDimitry Andric }
35480b57cec5SDimitry Andric
isVGPRImm(const SDNode * N) const35490b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
35500b57cec5SDimitry Andric assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
35510b57cec5SDimitry Andric
35520b57cec5SDimitry Andric const SIRegisterInfo *SIRI =
35530b57cec5SDimitry Andric static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
35540b57cec5SDimitry Andric const SIInstrInfo * SII =
35550b57cec5SDimitry Andric static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
35560b57cec5SDimitry Andric
35570b57cec5SDimitry Andric unsigned Limit = 0;
35580b57cec5SDimitry Andric bool AllUsesAcceptSReg = true;
35590b57cec5SDimitry Andric for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
35600b57cec5SDimitry Andric Limit < 10 && U != E; ++U, ++Limit) {
35610b57cec5SDimitry Andric const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
35620b57cec5SDimitry Andric
35630b57cec5SDimitry Andric // If the register class is unknown, it could be an unknown
35640b57cec5SDimitry Andric // register class that needs to be an SGPR, e.g. an inline asm
35650b57cec5SDimitry Andric // constraint
35660b57cec5SDimitry Andric if (!RC || SIRI->isSGPRClass(RC))
35670b57cec5SDimitry Andric return false;
35680b57cec5SDimitry Andric
35695f757f3fSDimitry Andric if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
35700b57cec5SDimitry Andric AllUsesAcceptSReg = false;
35710b57cec5SDimitry Andric SDNode * User = *U;
35720b57cec5SDimitry Andric if (User->isMachineOpcode()) {
35730b57cec5SDimitry Andric unsigned Opc = User->getMachineOpcode();
3574bdd1243dSDimitry Andric const MCInstrDesc &Desc = SII->get(Opc);
35750b57cec5SDimitry Andric if (Desc.isCommutable()) {
35760b57cec5SDimitry Andric unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
35770b57cec5SDimitry Andric unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
35780b57cec5SDimitry Andric if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
35790b57cec5SDimitry Andric unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
35800b57cec5SDimitry Andric const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
35815f757f3fSDimitry Andric if (CommutedRC == &AMDGPU::VS_32RegClass ||
35825f757f3fSDimitry Andric CommutedRC == &AMDGPU::VS_64RegClass)
35830b57cec5SDimitry Andric AllUsesAcceptSReg = true;
35840b57cec5SDimitry Andric }
35850b57cec5SDimitry Andric }
35860b57cec5SDimitry Andric }
358781ad6265SDimitry Andric // If "AllUsesAcceptSReg == false" so far we haven't succeeded
35880b57cec5SDimitry Andric // commuting current user. This means have at least one use
35890b57cec5SDimitry Andric // that strictly require VGPR. Thus, we will not attempt to commute
35900b57cec5SDimitry Andric // other user instructions.
35910b57cec5SDimitry Andric if (!AllUsesAcceptSReg)
35920b57cec5SDimitry Andric break;
35930b57cec5SDimitry Andric }
35940b57cec5SDimitry Andric }
35950b57cec5SDimitry Andric return !AllUsesAcceptSReg && (Limit < 10);
35960b57cec5SDimitry Andric }
35970b57cec5SDimitry Andric
isUniformLoad(const SDNode * N) const35980b57cec5SDimitry Andric bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {
35990b57cec5SDimitry Andric auto Ld = cast<LoadSDNode>(N);
36000b57cec5SDimitry Andric
36017a6dacacSDimitry Andric const MachineMemOperand *MMO = Ld->getMemOperand();
36027a6dacacSDimitry Andric if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(MMO))
3603bdd1243dSDimitry Andric return false;
3604bdd1243dSDimitry Andric
36050fca6ea1SDimitry Andric return MMO->getSize().hasValue() &&
36060fca6ea1SDimitry Andric Ld->getAlign() >=
36070fca6ea1SDimitry Andric Align(std::min(MMO->getSize().getValue().getKnownMinValue(),
36080fca6ea1SDimitry Andric uint64_t(4))) &&
3609bdd1243dSDimitry Andric ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
3610bdd1243dSDimitry Andric Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
361181ad6265SDimitry Andric (Subtarget->getScalarizeGlobalBehavior() &&
36120b57cec5SDimitry Andric Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
3613bdd1243dSDimitry Andric Ld->isSimple() &&
361481ad6265SDimitry Andric static_cast<const SITargetLowering *>(getTargetLowering())
361581ad6265SDimitry Andric ->isMemOpHasNoClobberedMemOperand(N)));
36160b57cec5SDimitry Andric }
36170b57cec5SDimitry Andric
PostprocessISelDAG()36180b57cec5SDimitry Andric void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
36190b57cec5SDimitry Andric const AMDGPUTargetLowering& Lowering =
36200b57cec5SDimitry Andric *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
36210b57cec5SDimitry Andric bool IsModified = false;
36220b57cec5SDimitry Andric do {
36230b57cec5SDimitry Andric IsModified = false;
36240b57cec5SDimitry Andric
36250b57cec5SDimitry Andric // Go over all selected nodes and try to fold them a bit more
36260b57cec5SDimitry Andric SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
36270b57cec5SDimitry Andric while (Position != CurDAG->allnodes_end()) {
36280b57cec5SDimitry Andric SDNode *Node = &*Position++;
36290b57cec5SDimitry Andric MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
36300b57cec5SDimitry Andric if (!MachineNode)
36310b57cec5SDimitry Andric continue;
36320b57cec5SDimitry Andric
36330b57cec5SDimitry Andric SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
36340b57cec5SDimitry Andric if (ResNode != Node) {
36350b57cec5SDimitry Andric if (ResNode)
36360b57cec5SDimitry Andric ReplaceUses(Node, ResNode);
36370b57cec5SDimitry Andric IsModified = true;
36380b57cec5SDimitry Andric }
36390b57cec5SDimitry Andric }
36400b57cec5SDimitry Andric CurDAG->RemoveDeadNodes();
36410b57cec5SDimitry Andric } while (IsModified);
36420b57cec5SDimitry Andric }
3643bdd1243dSDimitry Andric
AMDGPUDAGToDAGISelLegacy(TargetMachine & TM,CodeGenOptLevel OptLevel)36440fca6ea1SDimitry Andric AMDGPUDAGToDAGISelLegacy::AMDGPUDAGToDAGISelLegacy(TargetMachine &TM,
36450fca6ea1SDimitry Andric CodeGenOptLevel OptLevel)
36460fca6ea1SDimitry Andric : SelectionDAGISelLegacy(
36470fca6ea1SDimitry Andric ID, std::make_unique<AMDGPUDAGToDAGISel>(TM, OptLevel)) {}
36480fca6ea1SDimitry Andric
36490fca6ea1SDimitry Andric char AMDGPUDAGToDAGISelLegacy::ID = 0;
3650