1 //===-- AMDGPURegBankLegalizeHelper.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Implements actual lowering algorithms for each ID that can be used in
10 /// Rule.OperandMapping. Similar to legalizer helper but with register banks.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPURegBankLegalizeHelper.h"
15 #include "AMDGPUGlobalISelUtils.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegBankLegalizeRules.h"
18 #include "AMDGPURegisterBankInfo.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
25 #include "llvm/IR/IntrinsicsAMDGPU.h"
26
27 #define DEBUG_TYPE "amdgpu-regbanklegalize"
28
29 using namespace llvm;
30 using namespace AMDGPU;
31
RegBankLegalizeHelper(MachineIRBuilder & B,const MachineUniformityInfo & MUI,const RegisterBankInfo & RBI,const RegBankLegalizeRules & RBLRules)32 RegBankLegalizeHelper::RegBankLegalizeHelper(
33 MachineIRBuilder &B, const MachineUniformityInfo &MUI,
34 const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
35 : ST(B.getMF().getSubtarget<GCNSubtarget>()), B(B), MRI(*B.getMRI()),
36 MUI(MUI), RBI(RBI), RBLRules(RBLRules),
37 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
38 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
39 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
40
findRuleAndApplyMapping(MachineInstr & MI)41 void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
42 const SetOfRulesForOpcode &RuleSet = RBLRules.getRulesForOpc(MI);
43 const RegBankLLTMapping &Mapping = RuleSet.findMappingForMI(MI, MRI, MUI);
44
45 SmallSet<Register, 4> WaterfallSgprs;
46 unsigned OpIdx = 0;
47 if (Mapping.DstOpMapping.size() > 0) {
48 B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
49 applyMappingDst(MI, OpIdx, Mapping.DstOpMapping);
50 }
51 if (Mapping.SrcOpMapping.size() > 0) {
52 B.setInstr(MI);
53 applyMappingSrc(MI, OpIdx, Mapping.SrcOpMapping, WaterfallSgprs);
54 }
55
56 lower(MI, Mapping, WaterfallSgprs);
57 }
58
splitLoad(MachineInstr & MI,ArrayRef<LLT> LLTBreakdown,LLT MergeTy)59 void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
60 ArrayRef<LLT> LLTBreakdown, LLT MergeTy) {
61 MachineFunction &MF = B.getMF();
62 assert(MI.getNumMemOperands() == 1);
63 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
64 Register Dst = MI.getOperand(0).getReg();
65 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
66 Register Base = MI.getOperand(1).getReg();
67 LLT PtrTy = MRI.getType(Base);
68 const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
69 LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
70 SmallVector<Register, 4> LoadPartRegs;
71
72 unsigned ByteOffset = 0;
73 for (LLT PartTy : LLTBreakdown) {
74 Register BasePlusOffset;
75 if (ByteOffset == 0) {
76 BasePlusOffset = Base;
77 } else {
78 auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
79 BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
80 }
81 auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
82 auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
83 LoadPartRegs.push_back(LoadPart.getReg(0));
84 ByteOffset += PartTy.getSizeInBytes();
85 }
86
87 if (!MergeTy.isValid()) {
88 // Loads are of same size, concat or merge them together.
89 B.buildMergeLikeInstr(Dst, LoadPartRegs);
90 } else {
91 // Loads are not all of same size, need to unmerge them to smaller pieces
92 // of MergeTy type, then merge pieces to Dst.
93 SmallVector<Register, 4> MergeTyParts;
94 for (Register Reg : LoadPartRegs) {
95 if (MRI.getType(Reg) == MergeTy) {
96 MergeTyParts.push_back(Reg);
97 } else {
98 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
99 for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
100 MergeTyParts.push_back(Unmerge.getReg(i));
101 }
102 }
103 B.buildMergeLikeInstr(Dst, MergeTyParts);
104 }
105 MI.eraseFromParent();
106 }
107
widenLoad(MachineInstr & MI,LLT WideTy,LLT MergeTy)108 void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
109 LLT MergeTy) {
110 MachineFunction &MF = B.getMF();
111 assert(MI.getNumMemOperands() == 1);
112 MachineMemOperand &BaseMMO = **MI.memoperands_begin();
113 Register Dst = MI.getOperand(0).getReg();
114 const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
115 Register Base = MI.getOperand(1).getReg();
116
117 MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
118 auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
119
120 if (WideTy.isScalar()) {
121 B.buildTrunc(Dst, WideLoad);
122 } else {
123 SmallVector<Register, 4> MergeTyParts;
124 auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
125
126 LLT DstTy = MRI.getType(Dst);
127 unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits();
128 for (unsigned i = 0; i < NumElts; ++i) {
129 MergeTyParts.push_back(Unmerge.getReg(i));
130 }
131 B.buildMergeLikeInstr(Dst, MergeTyParts);
132 }
133 MI.eraseFromParent();
134 }
135
lowerVccExtToSel(MachineInstr & MI)136 void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
137 Register Dst = MI.getOperand(0).getReg();
138 LLT Ty = MRI.getType(Dst);
139 Register Src = MI.getOperand(1).getReg();
140 unsigned Opc = MI.getOpcode();
141 int TrueExtCst = Opc == G_SEXT ? -1 : 1;
142 if (Ty == S32 || Ty == S16) {
143 auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
144 auto False = B.buildConstant({VgprRB, Ty}, 0);
145 B.buildSelect(Dst, Src, True, False);
146 } else if (Ty == S64) {
147 auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
148 auto False = B.buildConstant({VgprRB_S32}, 0);
149 auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
150 MachineInstrBuilder Hi;
151 switch (Opc) {
152 case G_SEXT:
153 Hi = Lo;
154 break;
155 case G_ZEXT:
156 Hi = False;
157 break;
158 case G_ANYEXT:
159 Hi = B.buildUndef({VgprRB_S32});
160 break;
161 default:
162 llvm_unreachable("Opcode not supported");
163 }
164
165 B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
166 } else {
167 llvm_unreachable("Type not supported");
168 }
169
170 MI.eraseFromParent();
171 }
172
unpackZExt(Register Reg)173 std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
174 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
175 auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
176 auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
177 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
178 return {Lo.getReg(0), Hi.getReg(0)};
179 }
180
unpackSExt(Register Reg)181 std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
182 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
183 auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
184 auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
185 return {Lo.getReg(0), Hi.getReg(0)};
186 }
187
unpackAExt(Register Reg)188 std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
189 auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
190 auto Lo = PackedS32;
191 auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
192 return {Lo.getReg(0), Hi.getReg(0)};
193 }
194
lowerUnpackBitShift(MachineInstr & MI)195 void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
196 Register Lo, Hi;
197 switch (MI.getOpcode()) {
198 case AMDGPU::G_SHL: {
199 auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
200 auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
201 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
202 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
203 break;
204 }
205 case AMDGPU::G_LSHR: {
206 auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
207 auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
208 Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
209 Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
210 break;
211 }
212 case AMDGPU::G_ASHR: {
213 auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
214 auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
215 Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
216 Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
217 break;
218 }
219 default:
220 llvm_unreachable("Unpack lowering not implemented");
221 }
222 B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
223 MI.eraseFromParent();
224 }
225
isSignedBFE(MachineInstr & MI)226 static bool isSignedBFE(MachineInstr &MI) {
227 if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
228 return (GI->is(Intrinsic::amdgcn_sbfe));
229
230 return MI.getOpcode() == AMDGPU::G_SBFX;
231 }
232
lowerV_BFE(MachineInstr & MI)233 void RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
234 Register Dst = MI.getOperand(0).getReg();
235 assert(MRI.getType(Dst) == LLT::scalar(64));
236 bool Signed = isSignedBFE(MI);
237 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
238 // Extract bitfield from Src, LSBit is the least-significant bit for the
239 // extraction (field offset) and Width is size of bitfield.
240 Register Src = MI.getOperand(FirstOpnd).getReg();
241 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
242 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
243 // Comments are for signed bitfield extract, similar for unsigned. x is sign
244 // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
245
246 // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
247 unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
248 auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
249
250 auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
251
252 // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
253 // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
254 // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
255 if (!ConstWidth) {
256 auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
257 auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
258 B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
259 MI.eraseFromParent();
260 return;
261 }
262
263 uint64_t WidthImm = ConstWidth->Value.getZExtValue();
264 auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
265 Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
266 Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
267 auto Zero = B.buildConstant({VgprRB, S32}, 0);
268 unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
269
270 if (WidthImm <= 32) {
271 // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
272 auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
273 MachineInstrBuilder Hi;
274 if (Signed) {
275 // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
276 Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
277 } else {
278 // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
279 Hi = Zero;
280 }
281 B.buildMergeLikeInstr(Dst, {Lo, Hi});
282 } else {
283 auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
284 // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
285 auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
286 B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
287 }
288
289 MI.eraseFromParent();
290 }
291
lowerS_BFE(MachineInstr & MI)292 void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
293 Register DstReg = MI.getOperand(0).getReg();
294 LLT Ty = MRI.getType(DstReg);
295 bool Signed = isSignedBFE(MI);
296 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
297 Register Src = MI.getOperand(FirstOpnd).getReg();
298 Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
299 Register Width = MI.getOperand(FirstOpnd + 2).getReg();
300 // For uniform bit field extract there are 4 available instructions, but
301 // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
302 // field offset in low and size in high 16 bits.
303
304 // Src1 Hi16|Lo16 = Size|FieldOffset
305 auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
306 auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
307 auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
308 auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
309 unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
310 unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
311 unsigned Opc = Ty == S32 ? Opc32 : Opc64;
312
313 // Select machine instruction, because of reg class constraining, insert
314 // copies from reg class to reg bank.
315 auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
316 {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
317 if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
318 *ST.getRegisterInfo(), RBI))
319 llvm_unreachable("failed to constrain BFE");
320
321 B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
322 MI.eraseFromParent();
323 }
324
lowerSplitTo32(MachineInstr & MI)325 void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
326 Register Dst = MI.getOperand(0).getReg();
327 LLT DstTy = MRI.getType(Dst);
328 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
329 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
330 auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
331 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
332 unsigned Opc = MI.getOpcode();
333 auto Flags = MI.getFlags();
334 auto Lo =
335 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
336 auto Hi =
337 B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
338 B.buildMergeLikeInstr(Dst, {Lo, Hi});
339 MI.eraseFromParent();
340 }
341
lowerSplitTo32Select(MachineInstr & MI)342 void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
343 Register Dst = MI.getOperand(0).getReg();
344 LLT DstTy = MRI.getType(Dst);
345 assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
346 (DstTy.isPointer() && DstTy.getSizeInBits() == 64));
347 LLT Ty = DstTy == V4S16 ? V2S16 : S32;
348 auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
349 auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
350 Register Cond = MI.getOperand(1).getReg();
351 auto Flags = MI.getFlags();
352 auto Lo =
353 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
354 auto Hi =
355 B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
356
357 B.buildMergeLikeInstr(Dst, {Lo, Hi});
358 MI.eraseFromParent();
359 }
360
lowerSplitTo32SExtInReg(MachineInstr & MI)361 void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
362 auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
363 int Amt = MI.getOperand(2).getImm();
364 Register Lo, Hi;
365 // Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
366 if (Amt <= 32) {
367 auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
368 if (Amt == 32) {
369 // Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
370 Lo = Freeze.getReg(0);
371 } else {
372 // Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
373 Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
374 }
375
376 auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
377 Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
378 } else {
379 // Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
380 Lo = Op1.getReg(0);
381 Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
382 }
383
384 B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
385 MI.eraseFromParent();
386 }
387
lower(MachineInstr & MI,const RegBankLLTMapping & Mapping,SmallSet<Register,4> & WaterfallSgprs)388 void RegBankLegalizeHelper::lower(MachineInstr &MI,
389 const RegBankLLTMapping &Mapping,
390 SmallSet<Register, 4> &WaterfallSgprs) {
391
392 switch (Mapping.LoweringMethod) {
393 case DoNotLower:
394 return;
395 case VccExtToSel:
396 return lowerVccExtToSel(MI);
397 case UniExtToSel: {
398 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
399 auto True = B.buildConstant({SgprRB, Ty},
400 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
401 auto False = B.buildConstant({SgprRB, Ty}, 0);
402 // Input to G_{Z|S}EXT is 'Legalizer legal' S1. Most common case is compare.
403 // We are making select here. S1 cond was already 'any-extended to S32' +
404 // 'AND with 1 to clean high bits' by Sgpr32AExtBoolInReg.
405 B.buildSelect(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), True,
406 False);
407 MI.eraseFromParent();
408 return;
409 }
410 case UnpackBitShift:
411 return lowerUnpackBitShift(MI);
412 case Ext32To64: {
413 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
414 MachineInstrBuilder Hi;
415 switch (MI.getOpcode()) {
416 case AMDGPU::G_ZEXT: {
417 Hi = B.buildConstant({RB, S32}, 0);
418 break;
419 }
420 case AMDGPU::G_SEXT: {
421 // Replicate sign bit from 32-bit extended part.
422 auto ShiftAmt = B.buildConstant({RB, S32}, 31);
423 Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt);
424 break;
425 }
426 case AMDGPU::G_ANYEXT: {
427 Hi = B.buildUndef({RB, S32});
428 break;
429 }
430 default:
431 llvm_unreachable("Unsuported Opcode in Ext32To64");
432 }
433
434 B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
435 {MI.getOperand(1).getReg(), Hi});
436 MI.eraseFromParent();
437 return;
438 }
439 case UniCstExt: {
440 uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
441 B.buildConstant(MI.getOperand(0).getReg(), ConstVal);
442
443 MI.eraseFromParent();
444 return;
445 }
446 case VgprToVccCopy: {
447 Register Src = MI.getOperand(1).getReg();
448 LLT Ty = MRI.getType(Src);
449 // Take lowest bit from each lane and put it in lane mask.
450 // Lowering via compare, but we need to clean high bits first as compare
451 // compares all bits in register.
452 Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
453 if (Ty == S64) {
454 auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
455 auto One = B.buildConstant(VgprRB_S32, 1);
456 auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
457 auto Zero = B.buildConstant(VgprRB_S32, 0);
458 auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
459 B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
460 } else {
461 assert(Ty == S32 || Ty == S16);
462 auto One = B.buildConstant({VgprRB, Ty}, 1);
463 B.buildAnd(BoolSrc, Src, One);
464 }
465 auto Zero = B.buildConstant({VgprRB, Ty}, 0);
466 B.buildICmp(CmpInst::ICMP_NE, MI.getOperand(0).getReg(), BoolSrc, Zero);
467 MI.eraseFromParent();
468 return;
469 }
470 case V_BFE:
471 return lowerV_BFE(MI);
472 case S_BFE:
473 return lowerS_BFE(MI);
474 case SplitTo32:
475 return lowerSplitTo32(MI);
476 case SplitTo32Select:
477 return lowerSplitTo32Select(MI);
478 case SplitTo32SExtInReg:
479 return lowerSplitTo32SExtInReg(MI);
480 case SplitLoad: {
481 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
482 unsigned Size = DstTy.getSizeInBits();
483 // Even split to 128-bit loads
484 if (Size > 128) {
485 LLT B128;
486 if (DstTy.isVector()) {
487 LLT EltTy = DstTy.getElementType();
488 B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy);
489 } else {
490 B128 = LLT::scalar(128);
491 }
492 if (Size / 128 == 2)
493 splitLoad(MI, {B128, B128});
494 else if (Size / 128 == 4)
495 splitLoad(MI, {B128, B128, B128, B128});
496 else {
497 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
498 llvm_unreachable("SplitLoad type not supported for MI");
499 }
500 }
501 // 64 and 32 bit load
502 else if (DstTy == S96)
503 splitLoad(MI, {S64, S32}, S32);
504 else if (DstTy == V3S32)
505 splitLoad(MI, {V2S32, S32}, S32);
506 else if (DstTy == V6S16)
507 splitLoad(MI, {V4S16, V2S16}, V2S16);
508 else {
509 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
510 llvm_unreachable("SplitLoad type not supported for MI");
511 }
512 break;
513 }
514 case WidenLoad: {
515 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
516 if (DstTy == S96)
517 widenLoad(MI, S128);
518 else if (DstTy == V3S32)
519 widenLoad(MI, V4S32, S32);
520 else if (DstTy == V6S16)
521 widenLoad(MI, V8S16, V2S16);
522 else {
523 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
524 llvm_unreachable("WidenLoad type not supported for MI");
525 }
526 break;
527 }
528 }
529
530 // TODO: executeInWaterfallLoop(... WaterfallSgprs)
531 }
532
getTyFromID(RegBankLLTMappingApplyID ID)533 LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
534 switch (ID) {
535 case Vcc:
536 case UniInVcc:
537 return LLT::scalar(1);
538 case Sgpr16:
539 case Vgpr16:
540 return LLT::scalar(16);
541 case Sgpr32:
542 case Sgpr32Trunc:
543 case Sgpr32AExt:
544 case Sgpr32AExtBoolInReg:
545 case Sgpr32SExt:
546 case Sgpr32ZExt:
547 case UniInVgprS32:
548 case Vgpr32:
549 case Vgpr32SExt:
550 case Vgpr32ZExt:
551 return LLT::scalar(32);
552 case Sgpr64:
553 case Vgpr64:
554 return LLT::scalar(64);
555 case Sgpr128:
556 case Vgpr128:
557 return LLT::scalar(128);
558 case VgprP0:
559 return LLT::pointer(0, 64);
560 case SgprP1:
561 case VgprP1:
562 return LLT::pointer(1, 64);
563 case SgprP3:
564 case VgprP3:
565 return LLT::pointer(3, 32);
566 case SgprP4:
567 case VgprP4:
568 return LLT::pointer(4, 64);
569 case SgprP5:
570 case VgprP5:
571 return LLT::pointer(5, 32);
572 case SgprV2S16:
573 case VgprV2S16:
574 case UniInVgprV2S16:
575 return LLT::fixed_vector(2, 16);
576 case SgprV2S32:
577 case VgprV2S32:
578 return LLT::fixed_vector(2, 32);
579 case SgprV4S32:
580 case VgprV4S32:
581 case UniInVgprV4S32:
582 return LLT::fixed_vector(4, 32);
583 default:
584 return LLT();
585 }
586 }
587
getBTyFromID(RegBankLLTMappingApplyID ID,LLT Ty)588 LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
589 switch (ID) {
590 case SgprB32:
591 case VgprB32:
592 case UniInVgprB32:
593 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
594 isAnyPtr(Ty, 32))
595 return Ty;
596 return LLT();
597 case SgprPtr32:
598 case VgprPtr32:
599 return isAnyPtr(Ty, 32) ? Ty : LLT();
600 case SgprPtr64:
601 case VgprPtr64:
602 return isAnyPtr(Ty, 64) ? Ty : LLT();
603 case SgprPtr128:
604 case VgprPtr128:
605 return isAnyPtr(Ty, 128) ? Ty : LLT();
606 case SgprB64:
607 case VgprB64:
608 case UniInVgprB64:
609 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
610 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
611 return Ty;
612 return LLT();
613 case SgprB96:
614 case VgprB96:
615 case UniInVgprB96:
616 if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) ||
617 Ty == LLT::fixed_vector(6, 16))
618 return Ty;
619 return LLT();
620 case SgprB128:
621 case VgprB128:
622 case UniInVgprB128:
623 if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) ||
624 Ty == LLT::fixed_vector(2, 64) || isAnyPtr(Ty, 128))
625 return Ty;
626 return LLT();
627 case SgprB256:
628 case VgprB256:
629 case UniInVgprB256:
630 if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) ||
631 Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16))
632 return Ty;
633 return LLT();
634 case SgprB512:
635 case VgprB512:
636 case UniInVgprB512:
637 if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) ||
638 Ty == LLT::fixed_vector(8, 64))
639 return Ty;
640 return LLT();
641 default:
642 return LLT();
643 }
644 }
645
646 const RegisterBank *
getRegBankFromID(RegBankLLTMappingApplyID ID)647 RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
648 switch (ID) {
649 case Vcc:
650 return VccRB;
651 case Sgpr16:
652 case Sgpr32:
653 case Sgpr64:
654 case Sgpr128:
655 case SgprP1:
656 case SgprP3:
657 case SgprP4:
658 case SgprP5:
659 case SgprPtr32:
660 case SgprPtr64:
661 case SgprPtr128:
662 case SgprV2S16:
663 case SgprV2S32:
664 case SgprV4S32:
665 case SgprB32:
666 case SgprB64:
667 case SgprB96:
668 case SgprB128:
669 case SgprB256:
670 case SgprB512:
671 case UniInVcc:
672 case UniInVgprS32:
673 case UniInVgprV2S16:
674 case UniInVgprV4S32:
675 case UniInVgprB32:
676 case UniInVgprB64:
677 case UniInVgprB96:
678 case UniInVgprB128:
679 case UniInVgprB256:
680 case UniInVgprB512:
681 case Sgpr32Trunc:
682 case Sgpr32AExt:
683 case Sgpr32AExtBoolInReg:
684 case Sgpr32SExt:
685 case Sgpr32ZExt:
686 return SgprRB;
687 case Vgpr16:
688 case Vgpr32:
689 case Vgpr64:
690 case Vgpr128:
691 case VgprP0:
692 case VgprP1:
693 case VgprP3:
694 case VgprP4:
695 case VgprP5:
696 case VgprPtr32:
697 case VgprPtr64:
698 case VgprPtr128:
699 case VgprV2S16:
700 case VgprV2S32:
701 case VgprV4S32:
702 case VgprB32:
703 case VgprB64:
704 case VgprB96:
705 case VgprB128:
706 case VgprB256:
707 case VgprB512:
708 case Vgpr32SExt:
709 case Vgpr32ZExt:
710 return VgprRB;
711 default:
712 return nullptr;
713 }
714 }
715
applyMappingDst(MachineInstr & MI,unsigned & OpIdx,const SmallVectorImpl<RegBankLLTMappingApplyID> & MethodIDs)716 void RegBankLegalizeHelper::applyMappingDst(
717 MachineInstr &MI, unsigned &OpIdx,
718 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
719 // Defs start from operand 0
720 for (; OpIdx < MethodIDs.size(); ++OpIdx) {
721 if (MethodIDs[OpIdx] == None)
722 continue;
723 MachineOperand &Op = MI.getOperand(OpIdx);
724 Register Reg = Op.getReg();
725 LLT Ty = MRI.getType(Reg);
726 [[maybe_unused]] const RegisterBank *RB = MRI.getRegBank(Reg);
727
728 switch (MethodIDs[OpIdx]) {
729 // vcc, sgpr and vgpr scalars, pointers and vectors
730 case Vcc:
731 case Sgpr16:
732 case Sgpr32:
733 case Sgpr64:
734 case Sgpr128:
735 case SgprP1:
736 case SgprP3:
737 case SgprP4:
738 case SgprP5:
739 case SgprV2S16:
740 case SgprV2S32:
741 case SgprV4S32:
742 case Vgpr16:
743 case Vgpr32:
744 case Vgpr64:
745 case Vgpr128:
746 case VgprP0:
747 case VgprP1:
748 case VgprP3:
749 case VgprP4:
750 case VgprP5:
751 case VgprV2S16:
752 case VgprV2S32:
753 case VgprV4S32: {
754 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
755 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
756 break;
757 }
758 // sgpr and vgpr B-types
759 case SgprB32:
760 case SgprB64:
761 case SgprB96:
762 case SgprB128:
763 case SgprB256:
764 case SgprB512:
765 case SgprPtr32:
766 case SgprPtr64:
767 case SgprPtr128:
768 case VgprB32:
769 case VgprB64:
770 case VgprB96:
771 case VgprB128:
772 case VgprB256:
773 case VgprB512:
774 case VgprPtr32:
775 case VgprPtr64:
776 case VgprPtr128: {
777 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
778 assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
779 break;
780 }
781 // uniform in vcc/vgpr: scalars, vectors and B-types
782 case UniInVcc: {
783 assert(Ty == S1);
784 assert(RB == SgprRB);
785 Register NewDst = MRI.createVirtualRegister(VccRB_S1);
786 Op.setReg(NewDst);
787 auto CopyS32_Vcc =
788 B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
789 B.buildTrunc(Reg, CopyS32_Vcc);
790 break;
791 }
792 case UniInVgprS32:
793 case UniInVgprV2S16:
794 case UniInVgprV4S32: {
795 assert(Ty == getTyFromID(MethodIDs[OpIdx]));
796 assert(RB == SgprRB);
797 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
798 Op.setReg(NewVgprDst);
799 buildReadAnyLane(B, Reg, NewVgprDst, RBI);
800 break;
801 }
802 case UniInVgprB32:
803 case UniInVgprB64:
804 case UniInVgprB96:
805 case UniInVgprB128:
806 case UniInVgprB256:
807 case UniInVgprB512: {
808 assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
809 assert(RB == SgprRB);
810 Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
811 Op.setReg(NewVgprDst);
812 AMDGPU::buildReadAnyLane(B, Reg, NewVgprDst, RBI);
813 break;
814 }
815 // sgpr trunc
816 case Sgpr32Trunc: {
817 assert(Ty.getSizeInBits() < 32);
818 assert(RB == SgprRB);
819 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
820 Op.setReg(NewDst);
821 B.buildTrunc(Reg, NewDst);
822 break;
823 }
824 case InvalidMapping: {
825 LLVM_DEBUG(dbgs() << "Instruction with Invalid mapping: "; MI.dump(););
826 llvm_unreachable("missing fast rule for MI");
827 }
828 default:
829 llvm_unreachable("ID not supported");
830 }
831 }
832 }
833
applyMappingSrc(MachineInstr & MI,unsigned & OpIdx,const SmallVectorImpl<RegBankLLTMappingApplyID> & MethodIDs,SmallSet<Register,4> & SgprWaterfallOperandRegs)834 void RegBankLegalizeHelper::applyMappingSrc(
835 MachineInstr &MI, unsigned &OpIdx,
836 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
837 SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
838 for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
839 if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
840 continue;
841
842 MachineOperand &Op = MI.getOperand(OpIdx);
843 Register Reg = Op.getReg();
844 LLT Ty = MRI.getType(Reg);
845 const RegisterBank *RB = MRI.getRegBank(Reg);
846
847 switch (MethodIDs[i]) {
848 case Vcc: {
849 assert(Ty == S1);
850 assert(RB == VccRB || RB == SgprRB);
851 if (RB == SgprRB) {
852 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
853 auto CopyVcc_Scc =
854 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
855 Op.setReg(CopyVcc_Scc.getReg(0));
856 }
857 break;
858 }
859 // sgpr scalars, pointers and vectors
860 case Sgpr16:
861 case Sgpr32:
862 case Sgpr64:
863 case Sgpr128:
864 case SgprP1:
865 case SgprP3:
866 case SgprP4:
867 case SgprP5:
868 case SgprV2S16:
869 case SgprV2S32:
870 case SgprV4S32: {
871 assert(Ty == getTyFromID(MethodIDs[i]));
872 assert(RB == getRegBankFromID(MethodIDs[i]));
873 break;
874 }
875 // sgpr B-types
876 case SgprB32:
877 case SgprB64:
878 case SgprB96:
879 case SgprB128:
880 case SgprB256:
881 case SgprB512:
882 case SgprPtr32:
883 case SgprPtr64:
884 case SgprPtr128: {
885 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
886 assert(RB == getRegBankFromID(MethodIDs[i]));
887 break;
888 }
889 // vgpr scalars, pointers and vectors
890 case Vgpr16:
891 case Vgpr32:
892 case Vgpr64:
893 case Vgpr128:
894 case VgprP0:
895 case VgprP1:
896 case VgprP3:
897 case VgprP4:
898 case VgprP5:
899 case VgprV2S16:
900 case VgprV2S32:
901 case VgprV4S32: {
902 assert(Ty == getTyFromID(MethodIDs[i]));
903 if (RB != VgprRB) {
904 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
905 Op.setReg(CopyToVgpr.getReg(0));
906 }
907 break;
908 }
909 // vgpr B-types
910 case VgprB32:
911 case VgprB64:
912 case VgprB96:
913 case VgprB128:
914 case VgprB256:
915 case VgprB512:
916 case VgprPtr32:
917 case VgprPtr64:
918 case VgprPtr128: {
919 assert(Ty == getBTyFromID(MethodIDs[i], Ty));
920 if (RB != VgprRB) {
921 auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
922 Op.setReg(CopyToVgpr.getReg(0));
923 }
924 break;
925 }
926 // sgpr and vgpr scalars with extend
927 case Sgpr32AExt: {
928 // Note: this ext allows S1, and it is meant to be combined away.
929 assert(Ty.getSizeInBits() < 32);
930 assert(RB == SgprRB);
931 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
932 Op.setReg(Aext.getReg(0));
933 break;
934 }
935 case Sgpr32AExtBoolInReg: {
936 // Note: this ext allows S1, and it is meant to be combined away.
937 assert(Ty.getSizeInBits() == 1);
938 assert(RB == SgprRB);
939 auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
940 // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
941 // most of times meant to be combined away in AMDGPURegBankCombiner.
942 auto Cst1 = B.buildConstant(SgprRB_S32, 1);
943 auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
944 Op.setReg(BoolInReg.getReg(0));
945 break;
946 }
947 case Sgpr32SExt: {
948 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
949 assert(RB == SgprRB);
950 auto Sext = B.buildSExt(SgprRB_S32, Reg);
951 Op.setReg(Sext.getReg(0));
952 break;
953 }
954 case Sgpr32ZExt: {
955 assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
956 assert(RB == SgprRB);
957 auto Zext = B.buildZExt({SgprRB, S32}, Reg);
958 Op.setReg(Zext.getReg(0));
959 break;
960 }
961 case Vgpr32SExt: {
962 // Note this ext allows S1, and it is meant to be combined away.
963 assert(Ty.getSizeInBits() < 32);
964 assert(RB == VgprRB);
965 auto Sext = B.buildSExt({VgprRB, S32}, Reg);
966 Op.setReg(Sext.getReg(0));
967 break;
968 }
969 case Vgpr32ZExt: {
970 // Note this ext allows S1, and it is meant to be combined away.
971 assert(Ty.getSizeInBits() < 32);
972 assert(RB == VgprRB);
973 auto Zext = B.buildZExt({VgprRB, S32}, Reg);
974 Op.setReg(Zext.getReg(0));
975 break;
976 }
977 default:
978 llvm_unreachable("ID not supported");
979 }
980 }
981 }
982
applyMappingPHI(MachineInstr & MI)983 void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
984 Register Dst = MI.getOperand(0).getReg();
985 LLT Ty = MRI.getType(Dst);
986
987 if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) {
988 B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI());
989
990 Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
991 MI.getOperand(0).setReg(NewDst);
992 B.buildTrunc(Dst, NewDst);
993
994 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
995 Register UseReg = MI.getOperand(i).getReg();
996
997 auto DefMI = MRI.getVRegDef(UseReg)->getIterator();
998 MachineBasicBlock *DefMBB = DefMI->getParent();
999
1000 B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
1001
1002 auto NewUse = B.buildAnyExt(SgprRB_S32, UseReg);
1003 MI.getOperand(i).setReg(NewUse.getReg(0));
1004 }
1005
1006 return;
1007 }
1008
1009 // ALL divergent i1 phis should be already lowered and inst-selected into PHI
1010 // with sgpr reg class and S1 LLT.
1011 // Note: this includes divergent phis that don't require lowering.
1012 if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
1013 LLVM_DEBUG(dbgs() << "Divergent S1 G_PHI: "; MI.dump(););
1014 llvm_unreachable("Make sure to run AMDGPUGlobalISelDivergenceLowering "
1015 "before RegBankLegalize to lower lane mask(vcc) phis");
1016 }
1017
1018 // We accept all types that can fit in some register class.
1019 // Uniform G_PHIs have all sgpr registers.
1020 // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
1021 if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
1022 Ty == LLT::pointer(4, 64)) {
1023 return;
1024 }
1025
1026 LLVM_DEBUG(dbgs() << "G_PHI not handled: "; MI.dump(););
1027 llvm_unreachable("type not supported");
1028 }
1029
verifyRegBankOnOperands(MachineInstr & MI,const RegisterBank * RB,MachineRegisterInfo & MRI,unsigned StartOpIdx,unsigned EndOpIdx)1030 [[maybe_unused]] static bool verifyRegBankOnOperands(MachineInstr &MI,
1031 const RegisterBank *RB,
1032 MachineRegisterInfo &MRI,
1033 unsigned StartOpIdx,
1034 unsigned EndOpIdx) {
1035 for (unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1036 if (MRI.getRegBankOrNull(MI.getOperand(i).getReg()) != RB)
1037 return false;
1038 }
1039 return true;
1040 }
1041
applyMappingTrivial(MachineInstr & MI)1042 void RegBankLegalizeHelper::applyMappingTrivial(MachineInstr &MI) {
1043 const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
1044 // Put RB on all registers
1045 unsigned NumDefs = MI.getNumDefs();
1046 unsigned NumOperands = MI.getNumOperands();
1047
1048 assert(verifyRegBankOnOperands(MI, RB, MRI, 0, NumDefs - 1));
1049 if (RB == SgprRB)
1050 assert(verifyRegBankOnOperands(MI, RB, MRI, NumDefs, NumOperands - 1));
1051
1052 if (RB == VgprRB) {
1053 B.setInstr(MI);
1054 for (unsigned i = NumDefs; i < NumOperands; ++i) {
1055 Register Reg = MI.getOperand(i).getReg();
1056 if (MRI.getRegBank(Reg) != RB) {
1057 auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1058 MI.getOperand(i).setReg(Copy.getReg(0));
1059 }
1060 }
1061 }
1062 }
1063