1 //===- AMDGPUGlobalISelUtils.cpp ---------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUGlobalISelUtils.h" 10 #include "AMDGPURegisterBankInfo.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "llvm/ADT/DenseSet.h" 13 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" 14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 15 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 16 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 17 #include "llvm/CodeGenTypes/LowLevelType.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 21 using namespace llvm; 22 using namespace AMDGPU; 23 using namespace MIPatternMatch; 24 25 std::pair<Register, unsigned> 26 AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, 27 GISelValueTracking *ValueTracking, 28 bool CheckNUW) { 29 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 30 if (Def->getOpcode() == TargetOpcode::G_CONSTANT) { 31 unsigned Offset; 32 const MachineOperand &Op = Def->getOperand(1); 33 if (Op.isImm()) 34 Offset = Op.getImm(); 35 else 36 Offset = Op.getCImm()->getZExtValue(); 37 38 return std::pair(Register(), Offset); 39 } 40 41 int64_t Offset; 42 if (Def->getOpcode() == TargetOpcode::G_ADD) { 43 // A 32-bit (address + offset) should not cause unsigned 32-bit integer 44 // wraparound, because s_load instructions perform the addition in 64 bits. 45 if (CheckNUW && !Def->getFlag(MachineInstr::NoUWrap)) { 46 assert(MRI.getType(Reg).getScalarSizeInBits() == 32); 47 return std::pair(Reg, 0); 48 } 49 // TODO: Handle G_OR used for add case 50 if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset))) 51 return std::pair(Def->getOperand(1).getReg(), Offset); 52 53 // FIXME: matcher should ignore copies 54 if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset)))) 55 return std::pair(Def->getOperand(1).getReg(), Offset); 56 } 57 58 Register Base; 59 if (ValueTracking && mi_match(Reg, MRI, m_GOr(m_Reg(Base), m_ICst(Offset))) && 60 ValueTracking->maskedValueIsZero(Base, 61 APInt(32, Offset, /*isSigned=*/true))) 62 return std::pair(Base, Offset); 63 64 // Handle G_PTRTOINT (G_PTR_ADD base, const) case 65 if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) { 66 MachineInstr *Base; 67 if (mi_match(Def->getOperand(1).getReg(), MRI, 68 m_GPtrAdd(m_MInstr(Base), m_ICst(Offset)))) { 69 // If Base was int converted to pointer, simply return int and offset. 70 if (Base->getOpcode() == TargetOpcode::G_INTTOPTR) 71 return std::pair(Base->getOperand(1).getReg(), Offset); 72 73 // Register returned here will be of pointer type. 74 return std::pair(Base->getOperand(0).getReg(), Offset); 75 } 76 } 77 78 return std::pair(Reg, 0); 79 } 80 81 IntrinsicLaneMaskAnalyzer::IntrinsicLaneMaskAnalyzer(MachineFunction &MF) 82 : MRI(MF.getRegInfo()) { 83 initLaneMaskIntrinsics(MF); 84 } 85 86 bool IntrinsicLaneMaskAnalyzer::isS32S64LaneMask(Register Reg) const { 87 return S32S64LaneMask.contains(Reg); 88 } 89 90 void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) { 91 for (auto &MBB : MF) { 92 for (auto &MI : MBB) { 93 GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI); 94 if (GI && GI->is(Intrinsic::amdgcn_if_break)) { 95 S32S64LaneMask.insert(MI.getOperand(3).getReg()); 96 S32S64LaneMask.insert(MI.getOperand(0).getReg()); 97 } 98 99 if (MI.getOpcode() == AMDGPU::SI_IF || 100 MI.getOpcode() == AMDGPU::SI_ELSE) { 101 S32S64LaneMask.insert(MI.getOperand(0).getReg()); 102 } 103 } 104 } 105 } 106 107 static LLT getReadAnyLaneSplitTy(LLT Ty) { 108 if (Ty.isVector()) { 109 LLT ElTy = Ty.getElementType(); 110 if (ElTy.getSizeInBits() == 16) 111 return LLT::fixed_vector(2, ElTy); 112 // S32, S64 or pointer 113 return ElTy; 114 } 115 116 // Large scalars and 64-bit pointers 117 return LLT::scalar(32); 118 } 119 120 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, 121 const RegisterBankInfo &RBI); 122 123 static void unmergeReadAnyLane(MachineIRBuilder &B, 124 SmallVectorImpl<Register> &SgprDstParts, 125 LLT UnmergeTy, Register VgprSrc, 126 const RegisterBankInfo &RBI) { 127 const RegisterBank *VgprRB = &RBI.getRegBank(AMDGPU::VGPRRegBankID); 128 auto Unmerge = B.buildUnmerge({VgprRB, UnmergeTy}, VgprSrc); 129 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i) { 130 SgprDstParts.push_back(buildReadAnyLane(B, Unmerge.getReg(i), RBI)); 131 } 132 } 133 134 static Register buildReadAnyLane(MachineIRBuilder &B, Register VgprSrc, 135 const RegisterBankInfo &RBI) { 136 LLT Ty = B.getMRI()->getType(VgprSrc); 137 const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID); 138 if (Ty.getSizeInBits() == 32) { 139 return B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {{SgprRB, Ty}}, {VgprSrc}) 140 .getReg(0); 141 } 142 143 SmallVector<Register, 8> SgprDstParts; 144 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); 145 146 return B.buildMergeLikeInstr({SgprRB, Ty}, SgprDstParts).getReg(0); 147 } 148 149 void AMDGPU::buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, 150 Register VgprSrc, const RegisterBankInfo &RBI) { 151 LLT Ty = B.getMRI()->getType(VgprSrc); 152 if (Ty.getSizeInBits() == 32) { 153 B.buildInstr(AMDGPU::G_AMDGPU_READANYLANE, {SgprDst}, {VgprSrc}); 154 return; 155 } 156 157 SmallVector<Register, 8> SgprDstParts; 158 unmergeReadAnyLane(B, SgprDstParts, getReadAnyLaneSplitTy(Ty), VgprSrc, RBI); 159 160 B.buildMergeLikeInstr(SgprDst, SgprDstParts).getReg(0); 161 } 162