xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- AMDGPURegBankLegalizeHelper.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Implements actual lowering algorithms for each ID that can be used in
10 /// Rule.OperandMapping. Similar to legalizer helper but with register banks.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPURegBankLegalizeHelper.h"
15 #include "AMDGPUGlobalISelUtils.h"
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegBankLegalizeRules.h"
18 #include "AMDGPURegisterBankInfo.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
25 #include "llvm/IR/IntrinsicsAMDGPU.h"
26 
27 #define DEBUG_TYPE "amdgpu-regbanklegalize"
28 
29 using namespace llvm;
30 using namespace AMDGPU;
31 
RegBankLegalizeHelper(MachineIRBuilder & B,const MachineUniformityInfo & MUI,const RegisterBankInfo & RBI,const RegBankLegalizeRules & RBLRules)32 RegBankLegalizeHelper::RegBankLegalizeHelper(
33     MachineIRBuilder &B, const MachineUniformityInfo &MUI,
34     const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
35     : ST(B.getMF().getSubtarget<GCNSubtarget>()), B(B), MRI(*B.getMRI()),
36       MUI(MUI), RBI(RBI), RBLRules(RBLRules),
37       SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
38       VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
39       VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
40 
findRuleAndApplyMapping(MachineInstr & MI)41 void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
42   const SetOfRulesForOpcode &RuleSet = RBLRules.getRulesForOpc(MI);
43   const RegBankLLTMapping &Mapping = RuleSet.findMappingForMI(MI, MRI, MUI);
44 
45   SmallSet<Register, 4> WaterfallSgprs;
46   unsigned OpIdx = 0;
47   if (Mapping.DstOpMapping.size() > 0) {
48     B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
49     applyMappingDst(MI, OpIdx, Mapping.DstOpMapping);
50   }
51   if (Mapping.SrcOpMapping.size() > 0) {
52     B.setInstr(MI);
53     applyMappingSrc(MI, OpIdx, Mapping.SrcOpMapping, WaterfallSgprs);
54   }
55 
56   lower(MI, Mapping, WaterfallSgprs);
57 }
58 
splitLoad(MachineInstr & MI,ArrayRef<LLT> LLTBreakdown,LLT MergeTy)59 void RegBankLegalizeHelper::splitLoad(MachineInstr &MI,
60                                       ArrayRef<LLT> LLTBreakdown, LLT MergeTy) {
61   MachineFunction &MF = B.getMF();
62   assert(MI.getNumMemOperands() == 1);
63   MachineMemOperand &BaseMMO = **MI.memoperands_begin();
64   Register Dst = MI.getOperand(0).getReg();
65   const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
66   Register Base = MI.getOperand(1).getReg();
67   LLT PtrTy = MRI.getType(Base);
68   const RegisterBank *PtrRB = MRI.getRegBankOrNull(Base);
69   LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
70   SmallVector<Register, 4> LoadPartRegs;
71 
72   unsigned ByteOffset = 0;
73   for (LLT PartTy : LLTBreakdown) {
74     Register BasePlusOffset;
75     if (ByteOffset == 0) {
76       BasePlusOffset = Base;
77     } else {
78       auto Offset = B.buildConstant({PtrRB, OffsetTy}, ByteOffset);
79       BasePlusOffset = B.buildPtrAdd({PtrRB, PtrTy}, Base, Offset).getReg(0);
80     }
81     auto *OffsetMMO = MF.getMachineMemOperand(&BaseMMO, ByteOffset, PartTy);
82     auto LoadPart = B.buildLoad({DstRB, PartTy}, BasePlusOffset, *OffsetMMO);
83     LoadPartRegs.push_back(LoadPart.getReg(0));
84     ByteOffset += PartTy.getSizeInBytes();
85   }
86 
87   if (!MergeTy.isValid()) {
88     // Loads are of same size, concat or merge them together.
89     B.buildMergeLikeInstr(Dst, LoadPartRegs);
90   } else {
91     // Loads are not all of same size, need to unmerge them to smaller pieces
92     // of MergeTy type, then merge pieces to Dst.
93     SmallVector<Register, 4> MergeTyParts;
94     for (Register Reg : LoadPartRegs) {
95       if (MRI.getType(Reg) == MergeTy) {
96         MergeTyParts.push_back(Reg);
97       } else {
98         auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, Reg);
99         for (unsigned i = 0; i < Unmerge->getNumOperands() - 1; ++i)
100           MergeTyParts.push_back(Unmerge.getReg(i));
101       }
102     }
103     B.buildMergeLikeInstr(Dst, MergeTyParts);
104   }
105   MI.eraseFromParent();
106 }
107 
widenLoad(MachineInstr & MI,LLT WideTy,LLT MergeTy)108 void RegBankLegalizeHelper::widenLoad(MachineInstr &MI, LLT WideTy,
109                                       LLT MergeTy) {
110   MachineFunction &MF = B.getMF();
111   assert(MI.getNumMemOperands() == 1);
112   MachineMemOperand &BaseMMO = **MI.memoperands_begin();
113   Register Dst = MI.getOperand(0).getReg();
114   const RegisterBank *DstRB = MRI.getRegBankOrNull(Dst);
115   Register Base = MI.getOperand(1).getReg();
116 
117   MachineMemOperand *WideMMO = MF.getMachineMemOperand(&BaseMMO, 0, WideTy);
118   auto WideLoad = B.buildLoad({DstRB, WideTy}, Base, *WideMMO);
119 
120   if (WideTy.isScalar()) {
121     B.buildTrunc(Dst, WideLoad);
122   } else {
123     SmallVector<Register, 4> MergeTyParts;
124     auto Unmerge = B.buildUnmerge({DstRB, MergeTy}, WideLoad);
125 
126     LLT DstTy = MRI.getType(Dst);
127     unsigned NumElts = DstTy.getSizeInBits() / MergeTy.getSizeInBits();
128     for (unsigned i = 0; i < NumElts; ++i) {
129       MergeTyParts.push_back(Unmerge.getReg(i));
130     }
131     B.buildMergeLikeInstr(Dst, MergeTyParts);
132   }
133   MI.eraseFromParent();
134 }
135 
lowerVccExtToSel(MachineInstr & MI)136 void RegBankLegalizeHelper::lowerVccExtToSel(MachineInstr &MI) {
137   Register Dst = MI.getOperand(0).getReg();
138   LLT Ty = MRI.getType(Dst);
139   Register Src = MI.getOperand(1).getReg();
140   unsigned Opc = MI.getOpcode();
141   int TrueExtCst = Opc == G_SEXT ? -1 : 1;
142   if (Ty == S32 || Ty == S16) {
143     auto True = B.buildConstant({VgprRB, Ty}, TrueExtCst);
144     auto False = B.buildConstant({VgprRB, Ty}, 0);
145     B.buildSelect(Dst, Src, True, False);
146   } else if (Ty == S64) {
147     auto True = B.buildConstant({VgprRB_S32}, TrueExtCst);
148     auto False = B.buildConstant({VgprRB_S32}, 0);
149     auto Lo = B.buildSelect({VgprRB_S32}, Src, True, False);
150     MachineInstrBuilder Hi;
151     switch (Opc) {
152     case G_SEXT:
153       Hi = Lo;
154       break;
155     case G_ZEXT:
156       Hi = False;
157       break;
158     case G_ANYEXT:
159       Hi = B.buildUndef({VgprRB_S32});
160       break;
161     default:
162       llvm_unreachable("Opcode not supported");
163     }
164 
165     B.buildMergeValues(Dst, {Lo.getReg(0), Hi.getReg(0)});
166   } else {
167     llvm_unreachable("Type not supported");
168   }
169 
170   MI.eraseFromParent();
171 }
172 
unpackZExt(Register Reg)173 std::pair<Register, Register> RegBankLegalizeHelper::unpackZExt(Register Reg) {
174   auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
175   auto Mask = B.buildConstant(SgprRB_S32, 0x0000ffff);
176   auto Lo = B.buildAnd(SgprRB_S32, PackedS32, Mask);
177   auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
178   return {Lo.getReg(0), Hi.getReg(0)};
179 }
180 
unpackSExt(Register Reg)181 std::pair<Register, Register> RegBankLegalizeHelper::unpackSExt(Register Reg) {
182   auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
183   auto Lo = B.buildSExtInReg(SgprRB_S32, PackedS32, 16);
184   auto Hi = B.buildAShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
185   return {Lo.getReg(0), Hi.getReg(0)};
186 }
187 
unpackAExt(Register Reg)188 std::pair<Register, Register> RegBankLegalizeHelper::unpackAExt(Register Reg) {
189   auto PackedS32 = B.buildBitcast(SgprRB_S32, Reg);
190   auto Lo = PackedS32;
191   auto Hi = B.buildLShr(SgprRB_S32, PackedS32, B.buildConstant(SgprRB_S32, 16));
192   return {Lo.getReg(0), Hi.getReg(0)};
193 }
194 
lowerUnpackBitShift(MachineInstr & MI)195 void RegBankLegalizeHelper::lowerUnpackBitShift(MachineInstr &MI) {
196   Register Lo, Hi;
197   switch (MI.getOpcode()) {
198   case AMDGPU::G_SHL: {
199     auto [Val0, Val1] = unpackAExt(MI.getOperand(1).getReg());
200     auto [Amt0, Amt1] = unpackAExt(MI.getOperand(2).getReg());
201     Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
202     Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
203     break;
204   }
205   case AMDGPU::G_LSHR: {
206     auto [Val0, Val1] = unpackZExt(MI.getOperand(1).getReg());
207     auto [Amt0, Amt1] = unpackZExt(MI.getOperand(2).getReg());
208     Lo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val0, Amt0}).getReg(0);
209     Hi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Val1, Amt1}).getReg(0);
210     break;
211   }
212   case AMDGPU::G_ASHR: {
213     auto [Val0, Val1] = unpackSExt(MI.getOperand(1).getReg());
214     auto [Amt0, Amt1] = unpackSExt(MI.getOperand(2).getReg());
215     Lo = B.buildAShr(SgprRB_S32, Val0, Amt0).getReg(0);
216     Hi = B.buildAShr(SgprRB_S32, Val1, Amt1).getReg(0);
217     break;
218   }
219   default:
220     llvm_unreachable("Unpack lowering not implemented");
221   }
222   B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), {Lo, Hi});
223   MI.eraseFromParent();
224 }
225 
isSignedBFE(MachineInstr & MI)226 static bool isSignedBFE(MachineInstr &MI) {
227   if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
228     return (GI->is(Intrinsic::amdgcn_sbfe));
229 
230   return MI.getOpcode() == AMDGPU::G_SBFX;
231 }
232 
lowerV_BFE(MachineInstr & MI)233 void RegBankLegalizeHelper::lowerV_BFE(MachineInstr &MI) {
234   Register Dst = MI.getOperand(0).getReg();
235   assert(MRI.getType(Dst) == LLT::scalar(64));
236   bool Signed = isSignedBFE(MI);
237   unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
238   // Extract bitfield from Src, LSBit is the least-significant bit for the
239   // extraction (field offset) and Width is size of bitfield.
240   Register Src = MI.getOperand(FirstOpnd).getReg();
241   Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
242   Register Width = MI.getOperand(FirstOpnd + 2).getReg();
243   // Comments are for signed bitfield extract, similar for unsigned. x is sign
244   // bit. s is sign, l is LSB and y are remaining bits of bitfield to extract.
245 
246   // Src >> LSBit Hi|Lo: x?????syyyyyyl??? -> xxxx?????syyyyyyl
247   unsigned SHROpc = Signed ? AMDGPU::G_ASHR : AMDGPU::G_LSHR;
248   auto SHRSrc = B.buildInstr(SHROpc, {{VgprRB, S64}}, {Src, LSBit});
249 
250   auto ConstWidth = getIConstantVRegValWithLookThrough(Width, MRI);
251 
252   // Expand to Src >> LSBit << (64 - Width) >> (64 - Width)
253   // << (64 - Width): Hi|Lo: xxxx?????syyyyyyl -> syyyyyyl000000000
254   // >> (64 - Width): Hi|Lo: syyyyyyl000000000 -> ssssssssssyyyyyyl
255   if (!ConstWidth) {
256     auto Amt = B.buildSub(VgprRB_S32, B.buildConstant(SgprRB_S32, 64), Width);
257     auto SignBit = B.buildShl({VgprRB, S64}, SHRSrc, Amt);
258     B.buildInstr(SHROpc, {Dst}, {SignBit, Amt});
259     MI.eraseFromParent();
260     return;
261   }
262 
263   uint64_t WidthImm = ConstWidth->Value.getZExtValue();
264   auto UnmergeSHRSrc = B.buildUnmerge(VgprRB_S32, SHRSrc);
265   Register SHRSrcLo = UnmergeSHRSrc.getReg(0);
266   Register SHRSrcHi = UnmergeSHRSrc.getReg(1);
267   auto Zero = B.buildConstant({VgprRB, S32}, 0);
268   unsigned BFXOpc = Signed ? AMDGPU::G_SBFX : AMDGPU::G_UBFX;
269 
270   if (WidthImm <= 32) {
271     // SHRSrc Hi|Lo: ????????|???syyyl -> ????????|ssssyyyl
272     auto Lo = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcLo, Zero, Width});
273     MachineInstrBuilder Hi;
274     if (Signed) {
275       // SHRSrc Hi|Lo: ????????|ssssyyyl -> ssssssss|ssssyyyl
276       Hi = B.buildAShr(VgprRB_S32, Lo, B.buildConstant(VgprRB_S32, 31));
277     } else {
278       // SHRSrc Hi|Lo: ????????|000syyyl -> 00000000|000syyyl
279       Hi = Zero;
280     }
281     B.buildMergeLikeInstr(Dst, {Lo, Hi});
282   } else {
283     auto Amt = B.buildConstant(VgprRB_S32, WidthImm - 32);
284     // SHRSrc Hi|Lo: ??????sy|yyyyyyyl -> sssssssy|yyyyyyyl
285     auto Hi = B.buildInstr(BFXOpc, {VgprRB_S32}, {SHRSrcHi, Zero, Amt});
286     B.buildMergeLikeInstr(Dst, {SHRSrcLo, Hi});
287   }
288 
289   MI.eraseFromParent();
290 }
291 
lowerS_BFE(MachineInstr & MI)292 void RegBankLegalizeHelper::lowerS_BFE(MachineInstr &MI) {
293   Register DstReg = MI.getOperand(0).getReg();
294   LLT Ty = MRI.getType(DstReg);
295   bool Signed = isSignedBFE(MI);
296   unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1;
297   Register Src = MI.getOperand(FirstOpnd).getReg();
298   Register LSBit = MI.getOperand(FirstOpnd + 1).getReg();
299   Register Width = MI.getOperand(FirstOpnd + 2).getReg();
300   // For uniform bit field extract there are 4 available instructions, but
301   // LSBit(field offset) and Width(size of bitfield) need to be packed in S32,
302   // field offset in low and size in high 16 bits.
303 
304   // Src1 Hi16|Lo16 = Size|FieldOffset
305   auto Mask = B.buildConstant(SgprRB_S32, maskTrailingOnes<unsigned>(6));
306   auto FieldOffset = B.buildAnd(SgprRB_S32, LSBit, Mask);
307   auto Size = B.buildShl(SgprRB_S32, Width, B.buildConstant(SgprRB_S32, 16));
308   auto Src1 = B.buildOr(SgprRB_S32, FieldOffset, Size);
309   unsigned Opc32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
310   unsigned Opc64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
311   unsigned Opc = Ty == S32 ? Opc32 : Opc64;
312 
313   // Select machine instruction, because of reg class constraining, insert
314   // copies from reg class to reg bank.
315   auto S_BFE = B.buildInstr(Opc, {{SgprRB, Ty}},
316                             {B.buildCopy(Ty, Src), B.buildCopy(S32, Src1)});
317   if (!constrainSelectedInstRegOperands(*S_BFE, *ST.getInstrInfo(),
318                                         *ST.getRegisterInfo(), RBI))
319     llvm_unreachable("failed to constrain BFE");
320 
321   B.buildCopy(DstReg, S_BFE->getOperand(0).getReg());
322   MI.eraseFromParent();
323 }
324 
lowerSplitTo32(MachineInstr & MI)325 void RegBankLegalizeHelper::lowerSplitTo32(MachineInstr &MI) {
326   Register Dst = MI.getOperand(0).getReg();
327   LLT DstTy = MRI.getType(Dst);
328   assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64);
329   LLT Ty = DstTy == V4S16 ? V2S16 : S32;
330   auto Op1 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(1).getReg());
331   auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
332   unsigned Opc = MI.getOpcode();
333   auto Flags = MI.getFlags();
334   auto Lo =
335       B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(0), Op2.getReg(0)}, Flags);
336   auto Hi =
337       B.buildInstr(Opc, {{VgprRB, Ty}}, {Op1.getReg(1), Op2.getReg(1)}, Flags);
338   B.buildMergeLikeInstr(Dst, {Lo, Hi});
339   MI.eraseFromParent();
340 }
341 
lowerSplitTo32Select(MachineInstr & MI)342 void RegBankLegalizeHelper::lowerSplitTo32Select(MachineInstr &MI) {
343   Register Dst = MI.getOperand(0).getReg();
344   LLT DstTy = MRI.getType(Dst);
345   assert(DstTy == V4S16 || DstTy == V2S32 || DstTy == S64 ||
346          (DstTy.isPointer() && DstTy.getSizeInBits() == 64));
347   LLT Ty = DstTy == V4S16 ? V2S16 : S32;
348   auto Op2 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(2).getReg());
349   auto Op3 = B.buildUnmerge({VgprRB, Ty}, MI.getOperand(3).getReg());
350   Register Cond = MI.getOperand(1).getReg();
351   auto Flags = MI.getFlags();
352   auto Lo =
353       B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(0), Op3.getReg(0), Flags);
354   auto Hi =
355       B.buildSelect({VgprRB, Ty}, Cond, Op2.getReg(1), Op3.getReg(1), Flags);
356 
357   B.buildMergeLikeInstr(Dst, {Lo, Hi});
358   MI.eraseFromParent();
359 }
360 
lowerSplitTo32SExtInReg(MachineInstr & MI)361 void RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
362   auto Op1 = B.buildUnmerge(VgprRB_S32, MI.getOperand(1).getReg());
363   int Amt = MI.getOperand(2).getImm();
364   Register Lo, Hi;
365   // Hi|Lo: s sign bit, ?/x bits changed/not changed by sign-extend
366   if (Amt <= 32) {
367     auto Freeze = B.buildFreeze(VgprRB_S32, Op1.getReg(0));
368     if (Amt == 32) {
369       // Hi|Lo: ????????|sxxxxxxx -> ssssssss|sxxxxxxx
370       Lo = Freeze.getReg(0);
371     } else {
372       // Hi|Lo: ????????|???sxxxx -> ssssssss|ssssxxxx
373       Lo = B.buildSExtInReg(VgprRB_S32, Freeze, Amt).getReg(0);
374     }
375 
376     auto SignExtCst = B.buildConstant(SgprRB_S32, 31);
377     Hi = B.buildAShr(VgprRB_S32, Lo, SignExtCst).getReg(0);
378   } else {
379     // Hi|Lo: ?????sxx|xxxxxxxx -> ssssssxx|xxxxxxxx
380     Lo = Op1.getReg(0);
381     Hi = B.buildSExtInReg(VgprRB_S32, Op1.getReg(1), Amt - 32).getReg(0);
382   }
383 
384   B.buildMergeLikeInstr(MI.getOperand(0).getReg(), {Lo, Hi});
385   MI.eraseFromParent();
386 }
387 
lower(MachineInstr & MI,const RegBankLLTMapping & Mapping,SmallSet<Register,4> & WaterfallSgprs)388 void RegBankLegalizeHelper::lower(MachineInstr &MI,
389                                   const RegBankLLTMapping &Mapping,
390                                   SmallSet<Register, 4> &WaterfallSgprs) {
391 
392   switch (Mapping.LoweringMethod) {
393   case DoNotLower:
394     return;
395   case VccExtToSel:
396     return lowerVccExtToSel(MI);
397   case UniExtToSel: {
398     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
399     auto True = B.buildConstant({SgprRB, Ty},
400                                 MI.getOpcode() == AMDGPU::G_SEXT ? -1 : 1);
401     auto False = B.buildConstant({SgprRB, Ty}, 0);
402     // Input to G_{Z|S}EXT is 'Legalizer legal' S1. Most common case is compare.
403     // We are making select here. S1 cond was already 'any-extended to S32' +
404     // 'AND with 1 to clean high bits' by Sgpr32AExtBoolInReg.
405     B.buildSelect(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), True,
406                   False);
407     MI.eraseFromParent();
408     return;
409   }
410   case UnpackBitShift:
411     return lowerUnpackBitShift(MI);
412   case Ext32To64: {
413     const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
414     MachineInstrBuilder Hi;
415     switch (MI.getOpcode()) {
416     case AMDGPU::G_ZEXT: {
417       Hi = B.buildConstant({RB, S32}, 0);
418       break;
419     }
420     case AMDGPU::G_SEXT: {
421       // Replicate sign bit from 32-bit extended part.
422       auto ShiftAmt = B.buildConstant({RB, S32}, 31);
423       Hi = B.buildAShr({RB, S32}, MI.getOperand(1).getReg(), ShiftAmt);
424       break;
425     }
426     case AMDGPU::G_ANYEXT: {
427       Hi = B.buildUndef({RB, S32});
428       break;
429     }
430     default:
431       llvm_unreachable("Unsuported Opcode in Ext32To64");
432     }
433 
434     B.buildMergeLikeInstr(MI.getOperand(0).getReg(),
435                           {MI.getOperand(1).getReg(), Hi});
436     MI.eraseFromParent();
437     return;
438   }
439   case UniCstExt: {
440     uint64_t ConstVal = MI.getOperand(1).getCImm()->getZExtValue();
441     B.buildConstant(MI.getOperand(0).getReg(), ConstVal);
442 
443     MI.eraseFromParent();
444     return;
445   }
446   case VgprToVccCopy: {
447     Register Src = MI.getOperand(1).getReg();
448     LLT Ty = MRI.getType(Src);
449     // Take lowest bit from each lane and put it in lane mask.
450     // Lowering via compare, but we need to clean high bits first as compare
451     // compares all bits in register.
452     Register BoolSrc = MRI.createVirtualRegister({VgprRB, Ty});
453     if (Ty == S64) {
454       auto Src64 = B.buildUnmerge(VgprRB_S32, Src);
455       auto One = B.buildConstant(VgprRB_S32, 1);
456       auto AndLo = B.buildAnd(VgprRB_S32, Src64.getReg(0), One);
457       auto Zero = B.buildConstant(VgprRB_S32, 0);
458       auto AndHi = B.buildAnd(VgprRB_S32, Src64.getReg(1), Zero);
459       B.buildMergeLikeInstr(BoolSrc, {AndLo, AndHi});
460     } else {
461       assert(Ty == S32 || Ty == S16);
462       auto One = B.buildConstant({VgprRB, Ty}, 1);
463       B.buildAnd(BoolSrc, Src, One);
464     }
465     auto Zero = B.buildConstant({VgprRB, Ty}, 0);
466     B.buildICmp(CmpInst::ICMP_NE, MI.getOperand(0).getReg(), BoolSrc, Zero);
467     MI.eraseFromParent();
468     return;
469   }
470   case V_BFE:
471     return lowerV_BFE(MI);
472   case S_BFE:
473     return lowerS_BFE(MI);
474   case SplitTo32:
475     return lowerSplitTo32(MI);
476   case SplitTo32Select:
477     return lowerSplitTo32Select(MI);
478   case SplitTo32SExtInReg:
479     return lowerSplitTo32SExtInReg(MI);
480   case SplitLoad: {
481     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
482     unsigned Size = DstTy.getSizeInBits();
483     // Even split to 128-bit loads
484     if (Size > 128) {
485       LLT B128;
486       if (DstTy.isVector()) {
487         LLT EltTy = DstTy.getElementType();
488         B128 = LLT::fixed_vector(128 / EltTy.getSizeInBits(), EltTy);
489       } else {
490         B128 = LLT::scalar(128);
491       }
492       if (Size / 128 == 2)
493         splitLoad(MI, {B128, B128});
494       else if (Size / 128 == 4)
495         splitLoad(MI, {B128, B128, B128, B128});
496       else {
497         LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
498         llvm_unreachable("SplitLoad type not supported for MI");
499       }
500     }
501     // 64 and 32 bit load
502     else if (DstTy == S96)
503       splitLoad(MI, {S64, S32}, S32);
504     else if (DstTy == V3S32)
505       splitLoad(MI, {V2S32, S32}, S32);
506     else if (DstTy == V6S16)
507       splitLoad(MI, {V4S16, V2S16}, V2S16);
508     else {
509       LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
510       llvm_unreachable("SplitLoad type not supported for MI");
511     }
512     break;
513   }
514   case WidenLoad: {
515     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
516     if (DstTy == S96)
517       widenLoad(MI, S128);
518     else if (DstTy == V3S32)
519       widenLoad(MI, V4S32, S32);
520     else if (DstTy == V6S16)
521       widenLoad(MI, V8S16, V2S16);
522     else {
523       LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
524       llvm_unreachable("WidenLoad type not supported for MI");
525     }
526     break;
527   }
528   }
529 
530   // TODO: executeInWaterfallLoop(... WaterfallSgprs)
531 }
532 
getTyFromID(RegBankLLTMappingApplyID ID)533 LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
534   switch (ID) {
535   case Vcc:
536   case UniInVcc:
537     return LLT::scalar(1);
538   case Sgpr16:
539   case Vgpr16:
540     return LLT::scalar(16);
541   case Sgpr32:
542   case Sgpr32Trunc:
543   case Sgpr32AExt:
544   case Sgpr32AExtBoolInReg:
545   case Sgpr32SExt:
546   case Sgpr32ZExt:
547   case UniInVgprS32:
548   case Vgpr32:
549   case Vgpr32SExt:
550   case Vgpr32ZExt:
551     return LLT::scalar(32);
552   case Sgpr64:
553   case Vgpr64:
554     return LLT::scalar(64);
555   case Sgpr128:
556   case Vgpr128:
557     return LLT::scalar(128);
558   case VgprP0:
559     return LLT::pointer(0, 64);
560   case SgprP1:
561   case VgprP1:
562     return LLT::pointer(1, 64);
563   case SgprP3:
564   case VgprP3:
565     return LLT::pointer(3, 32);
566   case SgprP4:
567   case VgprP4:
568     return LLT::pointer(4, 64);
569   case SgprP5:
570   case VgprP5:
571     return LLT::pointer(5, 32);
572   case SgprV2S16:
573   case VgprV2S16:
574   case UniInVgprV2S16:
575     return LLT::fixed_vector(2, 16);
576   case SgprV2S32:
577   case VgprV2S32:
578     return LLT::fixed_vector(2, 32);
579   case SgprV4S32:
580   case VgprV4S32:
581   case UniInVgprV4S32:
582     return LLT::fixed_vector(4, 32);
583   default:
584     return LLT();
585   }
586 }
587 
getBTyFromID(RegBankLLTMappingApplyID ID,LLT Ty)588 LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
589   switch (ID) {
590   case SgprB32:
591   case VgprB32:
592   case UniInVgprB32:
593     if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
594         isAnyPtr(Ty, 32))
595       return Ty;
596     return LLT();
597   case SgprPtr32:
598   case VgprPtr32:
599     return isAnyPtr(Ty, 32) ? Ty : LLT();
600   case SgprPtr64:
601   case VgprPtr64:
602     return isAnyPtr(Ty, 64) ? Ty : LLT();
603   case SgprPtr128:
604   case VgprPtr128:
605     return isAnyPtr(Ty, 128) ? Ty : LLT();
606   case SgprB64:
607   case VgprB64:
608   case UniInVgprB64:
609     if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
610         Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
611       return Ty;
612     return LLT();
613   case SgprB96:
614   case VgprB96:
615   case UniInVgprB96:
616     if (Ty == LLT::scalar(96) || Ty == LLT::fixed_vector(3, 32) ||
617         Ty == LLT::fixed_vector(6, 16))
618       return Ty;
619     return LLT();
620   case SgprB128:
621   case VgprB128:
622   case UniInVgprB128:
623     if (Ty == LLT::scalar(128) || Ty == LLT::fixed_vector(4, 32) ||
624         Ty == LLT::fixed_vector(2, 64) || isAnyPtr(Ty, 128))
625       return Ty;
626     return LLT();
627   case SgprB256:
628   case VgprB256:
629   case UniInVgprB256:
630     if (Ty == LLT::scalar(256) || Ty == LLT::fixed_vector(8, 32) ||
631         Ty == LLT::fixed_vector(4, 64) || Ty == LLT::fixed_vector(16, 16))
632       return Ty;
633     return LLT();
634   case SgprB512:
635   case VgprB512:
636   case UniInVgprB512:
637     if (Ty == LLT::scalar(512) || Ty == LLT::fixed_vector(16, 32) ||
638         Ty == LLT::fixed_vector(8, 64))
639       return Ty;
640     return LLT();
641   default:
642     return LLT();
643   }
644 }
645 
646 const RegisterBank *
getRegBankFromID(RegBankLLTMappingApplyID ID)647 RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
648   switch (ID) {
649   case Vcc:
650     return VccRB;
651   case Sgpr16:
652   case Sgpr32:
653   case Sgpr64:
654   case Sgpr128:
655   case SgprP1:
656   case SgprP3:
657   case SgprP4:
658   case SgprP5:
659   case SgprPtr32:
660   case SgprPtr64:
661   case SgprPtr128:
662   case SgprV2S16:
663   case SgprV2S32:
664   case SgprV4S32:
665   case SgprB32:
666   case SgprB64:
667   case SgprB96:
668   case SgprB128:
669   case SgprB256:
670   case SgprB512:
671   case UniInVcc:
672   case UniInVgprS32:
673   case UniInVgprV2S16:
674   case UniInVgprV4S32:
675   case UniInVgprB32:
676   case UniInVgprB64:
677   case UniInVgprB96:
678   case UniInVgprB128:
679   case UniInVgprB256:
680   case UniInVgprB512:
681   case Sgpr32Trunc:
682   case Sgpr32AExt:
683   case Sgpr32AExtBoolInReg:
684   case Sgpr32SExt:
685   case Sgpr32ZExt:
686     return SgprRB;
687   case Vgpr16:
688   case Vgpr32:
689   case Vgpr64:
690   case Vgpr128:
691   case VgprP0:
692   case VgprP1:
693   case VgprP3:
694   case VgprP4:
695   case VgprP5:
696   case VgprPtr32:
697   case VgprPtr64:
698   case VgprPtr128:
699   case VgprV2S16:
700   case VgprV2S32:
701   case VgprV4S32:
702   case VgprB32:
703   case VgprB64:
704   case VgprB96:
705   case VgprB128:
706   case VgprB256:
707   case VgprB512:
708   case Vgpr32SExt:
709   case Vgpr32ZExt:
710     return VgprRB;
711   default:
712     return nullptr;
713   }
714 }
715 
applyMappingDst(MachineInstr & MI,unsigned & OpIdx,const SmallVectorImpl<RegBankLLTMappingApplyID> & MethodIDs)716 void RegBankLegalizeHelper::applyMappingDst(
717     MachineInstr &MI, unsigned &OpIdx,
718     const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
719   // Defs start from operand 0
720   for (; OpIdx < MethodIDs.size(); ++OpIdx) {
721     if (MethodIDs[OpIdx] == None)
722       continue;
723     MachineOperand &Op = MI.getOperand(OpIdx);
724     Register Reg = Op.getReg();
725     LLT Ty = MRI.getType(Reg);
726     [[maybe_unused]] const RegisterBank *RB = MRI.getRegBank(Reg);
727 
728     switch (MethodIDs[OpIdx]) {
729     // vcc, sgpr and vgpr scalars, pointers and vectors
730     case Vcc:
731     case Sgpr16:
732     case Sgpr32:
733     case Sgpr64:
734     case Sgpr128:
735     case SgprP1:
736     case SgprP3:
737     case SgprP4:
738     case SgprP5:
739     case SgprV2S16:
740     case SgprV2S32:
741     case SgprV4S32:
742     case Vgpr16:
743     case Vgpr32:
744     case Vgpr64:
745     case Vgpr128:
746     case VgprP0:
747     case VgprP1:
748     case VgprP3:
749     case VgprP4:
750     case VgprP5:
751     case VgprV2S16:
752     case VgprV2S32:
753     case VgprV4S32: {
754       assert(Ty == getTyFromID(MethodIDs[OpIdx]));
755       assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
756       break;
757     }
758     // sgpr and vgpr B-types
759     case SgprB32:
760     case SgprB64:
761     case SgprB96:
762     case SgprB128:
763     case SgprB256:
764     case SgprB512:
765     case SgprPtr32:
766     case SgprPtr64:
767     case SgprPtr128:
768     case VgprB32:
769     case VgprB64:
770     case VgprB96:
771     case VgprB128:
772     case VgprB256:
773     case VgprB512:
774     case VgprPtr32:
775     case VgprPtr64:
776     case VgprPtr128: {
777       assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
778       assert(RB == getRegBankFromID(MethodIDs[OpIdx]));
779       break;
780     }
781     // uniform in vcc/vgpr: scalars, vectors and B-types
782     case UniInVcc: {
783       assert(Ty == S1);
784       assert(RB == SgprRB);
785       Register NewDst = MRI.createVirtualRegister(VccRB_S1);
786       Op.setReg(NewDst);
787       auto CopyS32_Vcc =
788           B.buildInstr(AMDGPU::G_AMDGPU_COPY_SCC_VCC, {SgprRB_S32}, {NewDst});
789       B.buildTrunc(Reg, CopyS32_Vcc);
790       break;
791     }
792     case UniInVgprS32:
793     case UniInVgprV2S16:
794     case UniInVgprV4S32: {
795       assert(Ty == getTyFromID(MethodIDs[OpIdx]));
796       assert(RB == SgprRB);
797       Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
798       Op.setReg(NewVgprDst);
799       buildReadAnyLane(B, Reg, NewVgprDst, RBI);
800       break;
801     }
802     case UniInVgprB32:
803     case UniInVgprB64:
804     case UniInVgprB96:
805     case UniInVgprB128:
806     case UniInVgprB256:
807     case UniInVgprB512: {
808       assert(Ty == getBTyFromID(MethodIDs[OpIdx], Ty));
809       assert(RB == SgprRB);
810       Register NewVgprDst = MRI.createVirtualRegister({VgprRB, Ty});
811       Op.setReg(NewVgprDst);
812       AMDGPU::buildReadAnyLane(B, Reg, NewVgprDst, RBI);
813       break;
814     }
815     // sgpr trunc
816     case Sgpr32Trunc: {
817       assert(Ty.getSizeInBits() < 32);
818       assert(RB == SgprRB);
819       Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
820       Op.setReg(NewDst);
821       B.buildTrunc(Reg, NewDst);
822       break;
823     }
824     case InvalidMapping: {
825       LLVM_DEBUG(dbgs() << "Instruction with Invalid mapping: "; MI.dump(););
826       llvm_unreachable("missing fast rule for MI");
827     }
828     default:
829       llvm_unreachable("ID not supported");
830     }
831   }
832 }
833 
applyMappingSrc(MachineInstr & MI,unsigned & OpIdx,const SmallVectorImpl<RegBankLLTMappingApplyID> & MethodIDs,SmallSet<Register,4> & SgprWaterfallOperandRegs)834 void RegBankLegalizeHelper::applyMappingSrc(
835     MachineInstr &MI, unsigned &OpIdx,
836     const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
837     SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
838   for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
839     if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
840       continue;
841 
842     MachineOperand &Op = MI.getOperand(OpIdx);
843     Register Reg = Op.getReg();
844     LLT Ty = MRI.getType(Reg);
845     const RegisterBank *RB = MRI.getRegBank(Reg);
846 
847     switch (MethodIDs[i]) {
848     case Vcc: {
849       assert(Ty == S1);
850       assert(RB == VccRB || RB == SgprRB);
851       if (RB == SgprRB) {
852         auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
853         auto CopyVcc_Scc =
854             B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {VccRB_S1}, {Aext});
855         Op.setReg(CopyVcc_Scc.getReg(0));
856       }
857       break;
858     }
859     // sgpr scalars, pointers and vectors
860     case Sgpr16:
861     case Sgpr32:
862     case Sgpr64:
863     case Sgpr128:
864     case SgprP1:
865     case SgprP3:
866     case SgprP4:
867     case SgprP5:
868     case SgprV2S16:
869     case SgprV2S32:
870     case SgprV4S32: {
871       assert(Ty == getTyFromID(MethodIDs[i]));
872       assert(RB == getRegBankFromID(MethodIDs[i]));
873       break;
874     }
875     // sgpr B-types
876     case SgprB32:
877     case SgprB64:
878     case SgprB96:
879     case SgprB128:
880     case SgprB256:
881     case SgprB512:
882     case SgprPtr32:
883     case SgprPtr64:
884     case SgprPtr128: {
885       assert(Ty == getBTyFromID(MethodIDs[i], Ty));
886       assert(RB == getRegBankFromID(MethodIDs[i]));
887       break;
888     }
889     // vgpr scalars, pointers and vectors
890     case Vgpr16:
891     case Vgpr32:
892     case Vgpr64:
893     case Vgpr128:
894     case VgprP0:
895     case VgprP1:
896     case VgprP3:
897     case VgprP4:
898     case VgprP5:
899     case VgprV2S16:
900     case VgprV2S32:
901     case VgprV4S32: {
902       assert(Ty == getTyFromID(MethodIDs[i]));
903       if (RB != VgprRB) {
904         auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
905         Op.setReg(CopyToVgpr.getReg(0));
906       }
907       break;
908     }
909     // vgpr B-types
910     case VgprB32:
911     case VgprB64:
912     case VgprB96:
913     case VgprB128:
914     case VgprB256:
915     case VgprB512:
916     case VgprPtr32:
917     case VgprPtr64:
918     case VgprPtr128: {
919       assert(Ty == getBTyFromID(MethodIDs[i], Ty));
920       if (RB != VgprRB) {
921         auto CopyToVgpr = B.buildCopy({VgprRB, Ty}, Reg);
922         Op.setReg(CopyToVgpr.getReg(0));
923       }
924       break;
925     }
926     // sgpr and vgpr scalars with extend
927     case Sgpr32AExt: {
928       // Note: this ext allows S1, and it is meant to be combined away.
929       assert(Ty.getSizeInBits() < 32);
930       assert(RB == SgprRB);
931       auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
932       Op.setReg(Aext.getReg(0));
933       break;
934     }
935     case Sgpr32AExtBoolInReg: {
936       // Note: this ext allows S1, and it is meant to be combined away.
937       assert(Ty.getSizeInBits() == 1);
938       assert(RB == SgprRB);
939       auto Aext = B.buildAnyExt(SgprRB_S32, Reg);
940       // Zext SgprS1 is not legal, make AND with 1 instead. This instruction is
941       // most of times meant to be combined away in AMDGPURegBankCombiner.
942       auto Cst1 = B.buildConstant(SgprRB_S32, 1);
943       auto BoolInReg = B.buildAnd(SgprRB_S32, Aext, Cst1);
944       Op.setReg(BoolInReg.getReg(0));
945       break;
946     }
947     case Sgpr32SExt: {
948       assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
949       assert(RB == SgprRB);
950       auto Sext = B.buildSExt(SgprRB_S32, Reg);
951       Op.setReg(Sext.getReg(0));
952       break;
953     }
954     case Sgpr32ZExt: {
955       assert(1 < Ty.getSizeInBits() && Ty.getSizeInBits() < 32);
956       assert(RB == SgprRB);
957       auto Zext = B.buildZExt({SgprRB, S32}, Reg);
958       Op.setReg(Zext.getReg(0));
959       break;
960     }
961     case Vgpr32SExt: {
962       // Note this ext allows S1, and it is meant to be combined away.
963       assert(Ty.getSizeInBits() < 32);
964       assert(RB == VgprRB);
965       auto Sext = B.buildSExt({VgprRB, S32}, Reg);
966       Op.setReg(Sext.getReg(0));
967       break;
968     }
969     case Vgpr32ZExt: {
970       // Note this ext allows S1, and it is meant to be combined away.
971       assert(Ty.getSizeInBits() < 32);
972       assert(RB == VgprRB);
973       auto Zext = B.buildZExt({VgprRB, S32}, Reg);
974       Op.setReg(Zext.getReg(0));
975       break;
976     }
977     default:
978       llvm_unreachable("ID not supported");
979     }
980   }
981 }
982 
applyMappingPHI(MachineInstr & MI)983 void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
984   Register Dst = MI.getOperand(0).getReg();
985   LLT Ty = MRI.getType(Dst);
986 
987   if (Ty == LLT::scalar(1) && MUI.isUniform(Dst)) {
988     B.setInsertPt(*MI.getParent(), MI.getParent()->getFirstNonPHI());
989 
990     Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
991     MI.getOperand(0).setReg(NewDst);
992     B.buildTrunc(Dst, NewDst);
993 
994     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
995       Register UseReg = MI.getOperand(i).getReg();
996 
997       auto DefMI = MRI.getVRegDef(UseReg)->getIterator();
998       MachineBasicBlock *DefMBB = DefMI->getParent();
999 
1000       B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
1001 
1002       auto NewUse = B.buildAnyExt(SgprRB_S32, UseReg);
1003       MI.getOperand(i).setReg(NewUse.getReg(0));
1004     }
1005 
1006     return;
1007   }
1008 
1009   // ALL divergent i1 phis should be already lowered and inst-selected into PHI
1010   // with sgpr reg class and S1 LLT.
1011   // Note: this includes divergent phis that don't require lowering.
1012   if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
1013     LLVM_DEBUG(dbgs() << "Divergent S1 G_PHI: "; MI.dump(););
1014     llvm_unreachable("Make sure to run AMDGPUGlobalISelDivergenceLowering "
1015                      "before RegBankLegalize to lower lane mask(vcc) phis");
1016   }
1017 
1018   // We accept all types that can fit in some register class.
1019   // Uniform G_PHIs have all sgpr registers.
1020   // Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
1021   if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
1022       Ty == LLT::pointer(4, 64)) {
1023     return;
1024   }
1025 
1026   LLVM_DEBUG(dbgs() << "G_PHI not handled: "; MI.dump(););
1027   llvm_unreachable("type not supported");
1028 }
1029 
verifyRegBankOnOperands(MachineInstr & MI,const RegisterBank * RB,MachineRegisterInfo & MRI,unsigned StartOpIdx,unsigned EndOpIdx)1030 [[maybe_unused]] static bool verifyRegBankOnOperands(MachineInstr &MI,
1031                                                      const RegisterBank *RB,
1032                                                      MachineRegisterInfo &MRI,
1033                                                      unsigned StartOpIdx,
1034                                                      unsigned EndOpIdx) {
1035   for (unsigned i = StartOpIdx; i <= EndOpIdx; ++i) {
1036     if (MRI.getRegBankOrNull(MI.getOperand(i).getReg()) != RB)
1037       return false;
1038   }
1039   return true;
1040 }
1041 
applyMappingTrivial(MachineInstr & MI)1042 void RegBankLegalizeHelper::applyMappingTrivial(MachineInstr &MI) {
1043   const RegisterBank *RB = MRI.getRegBank(MI.getOperand(0).getReg());
1044   // Put RB on all registers
1045   unsigned NumDefs = MI.getNumDefs();
1046   unsigned NumOperands = MI.getNumOperands();
1047 
1048   assert(verifyRegBankOnOperands(MI, RB, MRI, 0, NumDefs - 1));
1049   if (RB == SgprRB)
1050     assert(verifyRegBankOnOperands(MI, RB, MRI, NumDefs, NumOperands - 1));
1051 
1052   if (RB == VgprRB) {
1053     B.setInstr(MI);
1054     for (unsigned i = NumDefs; i < NumOperands; ++i) {
1055       Register Reg = MI.getOperand(i).getReg();
1056       if (MRI.getRegBank(Reg) != RB) {
1057         auto Copy = B.buildCopy({VgprRB, MRI.getType(Reg)}, Reg);
1058         MI.getOperand(i).setReg(Copy.getReg(0));
1059       }
1060     }
1061   }
1062 }
1063