xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21 #include "llvm/CodeGen/GlobalISel/Utils.h"
22 #include "llvm/CodeGen/LowLevelTypeUtils.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterBank.h"
28 #include "llvm/CodeGen/RegisterBankInfo.h"
29 #include "llvm/CodeGen/TargetOpcodes.h"
30 #include "llvm/CodeGen/TargetRegisterInfo.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/IntrinsicsAArch64.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Threading.h"
35 #include <algorithm>
36 #include <cassert>
37 
38 #define GET_TARGET_REGBANK_IMPL
39 #include "AArch64GenRegisterBank.inc"
40 
41 // This file will be TableGen'ed at some point.
42 #include "AArch64GenRegisterBankInfo.def"
43 
44 using namespace llvm;
45 
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)46 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
47     const TargetRegisterInfo &TRI) {
48   static llvm::once_flag InitializeRegisterBankFlag;
49 
50   static auto InitializeRegisterBankOnce = [&]() {
51     // We have only one set of register banks, whatever the subtarget
52     // is. Therefore, the initialization of the RegBanks table should be
53     // done only once. Indeed the table of all register banks
54     // (AArch64::RegBanks) is unique in the compiler. At some point, it
55     // will get tablegen'ed and the whole constructor becomes empty.
56 
57     const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
58     (void)RBGPR;
59     assert(&AArch64::GPRRegBank == &RBGPR &&
60            "The order in RegBanks is messed up");
61 
62     const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
63     (void)RBFPR;
64     assert(&AArch64::FPRRegBank == &RBFPR &&
65            "The order in RegBanks is messed up");
66 
67     const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
68     (void)RBCCR;
69     assert(&AArch64::CCRegBank == &RBCCR &&
70            "The order in RegBanks is messed up");
71 
72     // The GPR register bank is fully defined by all the registers in
73     // GR64all + its subclasses.
74     assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
75            "Subclass not added?");
76     assert(getMaximumSize(RBGPR.getID()) == 128 &&
77            "GPRs should hold up to 128-bit");
78 
79     // The FPR register bank is fully defined by all the registers in
80     // GR64all + its subclasses.
81     assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
82            "Subclass not added?");
83     assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
84            "Subclass not added?");
85     assert(getMaximumSize(RBFPR.getID()) == 512 &&
86            "FPRs should hold up to 512-bit via QQQQ sequence");
87 
88     assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
89            "Class not added?");
90     assert(getMaximumSize(RBCCR.getID()) == 32 &&
91            "CCR should hold up to 32-bit");
92 
93     // Check that the TableGen'ed like file is in sync we our expectations.
94     // First, the Idx.
95     assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
96                                   {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
97            "PartialMappingIdx's are incorrectly ordered");
98     assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
99                                   {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
100                                    PMI_FPR256, PMI_FPR512}) &&
101            "PartialMappingIdx's are incorrectly ordered");
102 // Now, the content.
103 // Check partial mapping.
104 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
105   do {                                                                         \
106     assert(                                                                    \
107         checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
108         #Idx " is incorrectly initialized");                                   \
109   } while (false)
110 
111     CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
112     CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
113     CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
114     CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
115     CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
116     CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
117     CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
118     CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
119     CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
120 
121 // Check value mapping.
122 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
123   do {                                                                         \
124     assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size,            \
125                              PartialMappingIdx::PMI_First##RBName, Size,       \
126                              Offset) &&                                        \
127            #RBName #Size " " #Offset " is incorrectly initialized");           \
128   } while (false)
129 
130 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
131 
132     CHECK_VALUEMAP(GPR, 32);
133     CHECK_VALUEMAP(GPR, 64);
134     CHECK_VALUEMAP(GPR, 128);
135     CHECK_VALUEMAP(FPR, 16);
136     CHECK_VALUEMAP(FPR, 32);
137     CHECK_VALUEMAP(FPR, 64);
138     CHECK_VALUEMAP(FPR, 128);
139     CHECK_VALUEMAP(FPR, 256);
140     CHECK_VALUEMAP(FPR, 512);
141 
142 // Check the value mapping for 3-operands instructions where all the operands
143 // map to the same value mapping.
144 #define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
145   do {                                                                         \
146     CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
147     CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
148     CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
149   } while (false)
150 
151     CHECK_VALUEMAP_3OPS(GPR, 32);
152     CHECK_VALUEMAP_3OPS(GPR, 64);
153     CHECK_VALUEMAP_3OPS(GPR, 128);
154     CHECK_VALUEMAP_3OPS(FPR, 32);
155     CHECK_VALUEMAP_3OPS(FPR, 64);
156     CHECK_VALUEMAP_3OPS(FPR, 128);
157     CHECK_VALUEMAP_3OPS(FPR, 256);
158     CHECK_VALUEMAP_3OPS(FPR, 512);
159 
160 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
161   do {                                                                         \
162     unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min;               \
163     unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min;               \
164     (void)PartialMapDstIdx;                                                    \
165     (void)PartialMapSrcIdx;                                                    \
166     const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID,    \
167                                              AArch64::RBNameSrc##RegBankID,    \
168                                              TypeSize::getFixed(Size));        \
169     (void)Map;                                                                 \
170     assert(Map[0].BreakDown ==                                                 \
171                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
172            Map[0].NumBreakDowns == 1 &&                                        \
173            #RBNameDst #Size " Dst is incorrectly initialized");                \
174     assert(Map[1].BreakDown ==                                                 \
175                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
176            Map[1].NumBreakDowns == 1 &&                                        \
177            #RBNameSrc #Size " Src is incorrectly initialized");                \
178                                                                                \
179   } while (false)
180 
181     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
182     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
183     CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
184     CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
185     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
186     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
187     CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
188     CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
189 
190 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize)                                 \
191   do {                                                                         \
192     unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min;                    \
193     unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min;                    \
194     (void)PartialMapDstIdx;                                                    \
195     (void)PartialMapSrcIdx;                                                    \
196     const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize);               \
197     (void)Map;                                                                 \
198     assert(Map[0].BreakDown ==                                                 \
199                &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] &&  \
200            Map[0].NumBreakDowns == 1 && "FPR" #DstSize                         \
201                                         " Dst is incorrectly initialized");    \
202     assert(Map[1].BreakDown ==                                                 \
203                &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] &&  \
204            Map[1].NumBreakDowns == 1 && "FPR" #SrcSize                         \
205                                         " Src is incorrectly initialized");    \
206                                                                                \
207   } while (false)
208 
209     CHECK_VALUEMAP_FPEXT(32, 16);
210     CHECK_VALUEMAP_FPEXT(64, 16);
211     CHECK_VALUEMAP_FPEXT(64, 32);
212     CHECK_VALUEMAP_FPEXT(128, 64);
213 
214     assert(verify(TRI) && "Invalid register bank information");
215   };
216 
217   llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
218 }
219 
copyCost(const RegisterBank & A,const RegisterBank & B,const TypeSize Size) const220 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
221                                            const RegisterBank &B,
222                                            const TypeSize Size) const {
223   // What do we do with different size?
224   // copy are same size.
225   // Will introduce other hooks for different size:
226   // * extract cost.
227   // * build_sequence cost.
228 
229   // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
230   // FIXME: This should be deduced from the scheduling model.
231   if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
232     // FMOVXDr or FMOVWSr.
233     return 5;
234   if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
235     // FMOVDXr or FMOVSWr.
236     return 4;
237 
238   return RegisterBankInfo::copyCost(A, B, Size);
239 }
240 
241 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const242 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
243                                                 LLT) const {
244   switch (RC.getID()) {
245   case AArch64::FPR8RegClassID:
246   case AArch64::FPR16RegClassID:
247   case AArch64::FPR16_loRegClassID:
248   case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
249   case AArch64::FPR32RegClassID:
250   case AArch64::FPR64RegClassID:
251   case AArch64::FPR128RegClassID:
252   case AArch64::FPR64_loRegClassID:
253   case AArch64::FPR128_loRegClassID:
254   case AArch64::FPR128_0to7RegClassID:
255   case AArch64::DDRegClassID:
256   case AArch64::DDDRegClassID:
257   case AArch64::DDDDRegClassID:
258   case AArch64::QQRegClassID:
259   case AArch64::QQQRegClassID:
260   case AArch64::QQQQRegClassID:
261   case AArch64::ZPRRegClassID:
262   case AArch64::ZPR_3bRegClassID:
263     return getRegBank(AArch64::FPRRegBankID);
264   case AArch64::GPR32commonRegClassID:
265   case AArch64::GPR32RegClassID:
266   case AArch64::GPR32spRegClassID:
267   case AArch64::GPR32sponlyRegClassID:
268   case AArch64::GPR32argRegClassID:
269   case AArch64::GPR32allRegClassID:
270   case AArch64::GPR64commonRegClassID:
271   case AArch64::GPR64RegClassID:
272   case AArch64::GPR64spRegClassID:
273   case AArch64::GPR64sponlyRegClassID:
274   case AArch64::GPR64argRegClassID:
275   case AArch64::GPR64allRegClassID:
276   case AArch64::GPR64noipRegClassID:
277   case AArch64::GPR64common_and_GPR64noipRegClassID:
278   case AArch64::GPR64noip_and_tcGPR64RegClassID:
279   case AArch64::tcGPR64RegClassID:
280   case AArch64::tcGPRx16x17RegClassID:
281   case AArch64::tcGPRx17RegClassID:
282   case AArch64::tcGPRnotx16RegClassID:
283   case AArch64::WSeqPairsClassRegClassID:
284   case AArch64::XSeqPairsClassRegClassID:
285   case AArch64::MatrixIndexGPR32_8_11RegClassID:
286   case AArch64::MatrixIndexGPR32_12_15RegClassID:
287   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
288   case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
289     return getRegBank(AArch64::GPRRegBankID);
290   case AArch64::CCRRegClassID:
291     return getRegBank(AArch64::CCRegBankID);
292   default:
293     llvm_unreachable("Register class not supported");
294   }
295 }
296 
297 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const298 AArch64RegisterBankInfo::getInstrAlternativeMappings(
299     const MachineInstr &MI) const {
300   const MachineFunction &MF = *MI.getParent()->getParent();
301   const TargetSubtargetInfo &STI = MF.getSubtarget();
302   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
303   const MachineRegisterInfo &MRI = MF.getRegInfo();
304 
305   switch (MI.getOpcode()) {
306   case TargetOpcode::G_OR: {
307     // 32 and 64-bit or can be mapped on either FPR or
308     // GPR for the same cost.
309     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
310     if (Size != 32 && Size != 64)
311       break;
312 
313     // If the instruction has any implicit-defs or uses,
314     // do not mess with it.
315     if (MI.getNumOperands() != 3)
316       break;
317     InstructionMappings AltMappings;
318     const InstructionMapping &GPRMapping = getInstructionMapping(
319         /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
320         /*NumOperands*/ 3);
321     const InstructionMapping &FPRMapping = getInstructionMapping(
322         /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
323         /*NumOperands*/ 3);
324 
325     AltMappings.push_back(&GPRMapping);
326     AltMappings.push_back(&FPRMapping);
327     return AltMappings;
328   }
329   case TargetOpcode::G_BITCAST: {
330     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
331     if (Size != 32 && Size != 64)
332       break;
333 
334     // If the instruction has any implicit-defs or uses,
335     // do not mess with it.
336     if (MI.getNumOperands() != 2)
337       break;
338 
339     InstructionMappings AltMappings;
340     const InstructionMapping &GPRMapping = getInstructionMapping(
341         /*ID*/ 1, /*Cost*/ 1,
342         getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
343         /*NumOperands*/ 2);
344     const InstructionMapping &FPRMapping = getInstructionMapping(
345         /*ID*/ 2, /*Cost*/ 1,
346         getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
347         /*NumOperands*/ 2);
348     const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
349         /*ID*/ 3,
350         /*Cost*/
351         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
352                  TypeSize::getFixed(Size)),
353         getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
354         /*NumOperands*/ 2);
355     const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
356         /*ID*/ 3,
357         /*Cost*/
358         copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
359                  TypeSize::getFixed(Size)),
360         getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
361         /*NumOperands*/ 2);
362 
363     AltMappings.push_back(&GPRMapping);
364     AltMappings.push_back(&FPRMapping);
365     AltMappings.push_back(&GPRToFPRMapping);
366     AltMappings.push_back(&FPRToGPRMapping);
367     return AltMappings;
368   }
369   case TargetOpcode::G_LOAD: {
370     TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
371     if (Size != 64)
372       break;
373 
374     // If the instruction has any implicit-defs or uses,
375     // do not mess with it.
376     if (MI.getNumOperands() != 2)
377       break;
378 
379     InstructionMappings AltMappings;
380     const InstructionMapping &GPRMapping = getInstructionMapping(
381         /*ID*/ 1, /*Cost*/ 1,
382         getOperandsMapping(
383             {getValueMapping(PMI_FirstGPR, Size),
384              // Addresses are GPR 64-bit.
385              getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
386         /*NumOperands*/ 2);
387     const InstructionMapping &FPRMapping = getInstructionMapping(
388         /*ID*/ 2, /*Cost*/ 1,
389         getOperandsMapping(
390             {getValueMapping(PMI_FirstFPR, Size),
391              // Addresses are GPR 64-bit.
392              getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
393         /*NumOperands*/ 2);
394 
395     AltMappings.push_back(&GPRMapping);
396     AltMappings.push_back(&FPRMapping);
397     return AltMappings;
398   }
399   default:
400     break;
401   }
402   return RegisterBankInfo::getInstrAlternativeMappings(MI);
403 }
404 
applyMappingImpl(MachineIRBuilder & Builder,const OperandsMapper & OpdMapper) const405 void AArch64RegisterBankInfo::applyMappingImpl(
406     MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
407   MachineInstr &MI = OpdMapper.getMI();
408   MachineRegisterInfo &MRI = OpdMapper.getMRI();
409 
410   switch (MI.getOpcode()) {
411   case TargetOpcode::G_OR:
412   case TargetOpcode::G_BITCAST:
413   case TargetOpcode::G_LOAD:
414     // Those ID must match getInstrAlternativeMappings.
415     assert((OpdMapper.getInstrMapping().getID() >= 1 &&
416             OpdMapper.getInstrMapping().getID() <= 4) &&
417            "Don't know how to handle that ID");
418     return applyDefaultMapping(OpdMapper);
419   case TargetOpcode::G_INSERT_VECTOR_ELT: {
420     // Extend smaller gpr operands to 32 bit.
421     Builder.setInsertPt(*MI.getParent(), MI.getIterator());
422     auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
423     MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID));
424     MI.getOperand(2).setReg(Ext.getReg(0));
425     return applyDefaultMapping(OpdMapper);
426   }
427   default:
428     llvm_unreachable("Don't know how to handle that operation");
429   }
430 }
431 
432 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const433 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
434     const MachineInstr &MI) const {
435   const unsigned Opc = MI.getOpcode();
436   const MachineFunction &MF = *MI.getParent()->getParent();
437   const MachineRegisterInfo &MRI = MF.getRegInfo();
438 
439   unsigned NumOperands = MI.getNumOperands();
440   assert(NumOperands <= 3 &&
441          "This code is for instructions with 3 or less operands");
442 
443   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
444   TypeSize Size = Ty.getSizeInBits();
445   bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
446 
447   PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
448 
449 #ifndef NDEBUG
450   // Make sure all the operands are using similar size and type.
451   // Should probably be checked by the machine verifier.
452   // This code won't catch cases where the number of lanes is
453   // different between the operands.
454   // If we want to go to that level of details, it is probably
455   // best to check that the types are the same, period.
456   // Currently, we just check that the register banks are the same
457   // for each types.
458   for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
459     LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
460     assert(
461         AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
462             RBIdx, OpTy.getSizeInBits()) ==
463             AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
464         "Operand has incompatible size");
465     bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
466     (void)OpIsFPR;
467     assert(IsFPR == OpIsFPR && "Operand has incompatible type");
468   }
469 #endif // End NDEBUG.
470 
471   return getInstructionMapping(DefaultMappingID, 1,
472                                getValueMapping(RBIdx, Size), NumOperands);
473 }
474 
475 /// \returns true if a given intrinsic only uses and defines FPRs.
isFPIntrinsic(const MachineRegisterInfo & MRI,const MachineInstr & MI)476 static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
477                           const MachineInstr &MI) {
478   // TODO: Add more intrinsics.
479   switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
480   default:
481     return false;
482   case Intrinsic::aarch64_neon_uaddlv:
483   case Intrinsic::aarch64_neon_uaddv:
484   case Intrinsic::aarch64_neon_saddv:
485   case Intrinsic::aarch64_neon_umaxv:
486   case Intrinsic::aarch64_neon_smaxv:
487   case Intrinsic::aarch64_neon_uminv:
488   case Intrinsic::aarch64_neon_sminv:
489   case Intrinsic::aarch64_neon_faddv:
490   case Intrinsic::aarch64_neon_fmaxv:
491   case Intrinsic::aarch64_neon_fminv:
492   case Intrinsic::aarch64_neon_fmaxnmv:
493   case Intrinsic::aarch64_neon_fminnmv:
494     return true;
495   case Intrinsic::aarch64_neon_saddlv: {
496     const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
497     return SrcTy.getElementType().getSizeInBits() >= 16 &&
498            SrcTy.getElementCount().getFixedValue() >= 4;
499   }
500   }
501 }
502 
isPHIWithFPContraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const unsigned Depth) const503 bool AArch64RegisterBankInfo::isPHIWithFPContraints(
504     const MachineInstr &MI, const MachineRegisterInfo &MRI,
505     const TargetRegisterInfo &TRI, const unsigned Depth) const {
506   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
507     return false;
508 
509   return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
510                 [&](const MachineInstr &UseMI) {
511                   if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
512                     return true;
513                   return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
514                 });
515 }
516 
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const517 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
518                                                const MachineRegisterInfo &MRI,
519                                                const TargetRegisterInfo &TRI,
520                                                unsigned Depth) const {
521   unsigned Op = MI.getOpcode();
522   if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
523     return true;
524 
525   // Do we have an explicit floating point instruction?
526   if (isPreISelGenericFloatingPointOpcode(Op))
527     return true;
528 
529   // No. Check if we have a copy-like instruction. If we do, then we could
530   // still be fed by floating point instructions.
531   if (Op != TargetOpcode::COPY && !MI.isPHI() &&
532       !isPreISelGenericOptimizationHint(Op))
533     return false;
534 
535   // Check if we already know the register bank.
536   auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
537   if (RB == &AArch64::FPRRegBank)
538     return true;
539   if (RB == &AArch64::GPRRegBank)
540     return false;
541 
542   // We don't know anything.
543   //
544   // If we have a phi, we may be able to infer that it will be assigned a FPR
545   // based off of its inputs.
546   if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
547     return false;
548 
549   return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
550     return Op.isReg() &&
551            onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
552   });
553 }
554 
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const555 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
556                                          const MachineRegisterInfo &MRI,
557                                          const TargetRegisterInfo &TRI,
558                                          unsigned Depth) const {
559   switch (MI.getOpcode()) {
560   case TargetOpcode::G_FPTOSI:
561   case TargetOpcode::G_FPTOUI:
562   case TargetOpcode::G_FCMP:
563   case TargetOpcode::G_LROUND:
564   case TargetOpcode::G_LLROUND:
565     return true;
566   default:
567     break;
568   }
569   return hasFPConstraints(MI, MRI, TRI, Depth);
570 }
571 
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const572 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
573                                             const MachineRegisterInfo &MRI,
574                                             const TargetRegisterInfo &TRI,
575                                             unsigned Depth) const {
576   switch (MI.getOpcode()) {
577   case AArch64::G_DUP:
578   case TargetOpcode::G_SITOFP:
579   case TargetOpcode::G_UITOFP:
580   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
581   case TargetOpcode::G_INSERT_VECTOR_ELT:
582   case TargetOpcode::G_BUILD_VECTOR:
583   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
584     return true;
585   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
586     switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
587     case Intrinsic::aarch64_neon_ld1x2:
588     case Intrinsic::aarch64_neon_ld1x3:
589     case Intrinsic::aarch64_neon_ld1x4:
590     case Intrinsic::aarch64_neon_ld2:
591     case Intrinsic::aarch64_neon_ld2lane:
592     case Intrinsic::aarch64_neon_ld2r:
593     case Intrinsic::aarch64_neon_ld3:
594     case Intrinsic::aarch64_neon_ld3lane:
595     case Intrinsic::aarch64_neon_ld3r:
596     case Intrinsic::aarch64_neon_ld4:
597     case Intrinsic::aarch64_neon_ld4lane:
598     case Intrinsic::aarch64_neon_ld4r:
599       return true;
600     default:
601       break;
602     }
603     break;
604   default:
605     break;
606   }
607   return hasFPConstraints(MI, MRI, TRI, Depth);
608 }
609 
isLoadFromFPType(const MachineInstr & MI) const610 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
611   // GMemOperation because we also want to match indexed loads.
612   auto *MemOp = cast<GMemOperation>(&MI);
613   const Value *LdVal = MemOp->getMMO().getValue();
614   if (!LdVal)
615     return false;
616 
617   Type *EltTy = nullptr;
618   if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
619     EltTy = GV->getValueType();
620     // Look at the first element of the struct to determine the type we are
621     // loading
622     while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
623       if (StructEltTy->getNumElements() == 0)
624         break;
625       EltTy = StructEltTy->getTypeAtIndex(0U);
626     }
627     // Look at the first element of the array to determine its type
628     if (isa<ArrayType>(EltTy))
629       EltTy = EltTy->getArrayElementType();
630   } else {
631     // FIXME: grubbing around uses is pretty ugly, but with no more
632     // `getPointerElementType` there's not much else we can do.
633     for (const auto *LdUser : LdVal->users()) {
634       if (isa<LoadInst>(LdUser)) {
635         EltTy = LdUser->getType();
636         break;
637       }
638       if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
639         EltTy = LdUser->getOperand(0)->getType();
640         break;
641       }
642     }
643   }
644   return EltTy && EltTy->isFPOrFPVectorTy();
645 }
646 
647 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const648 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
649   const unsigned Opc = MI.getOpcode();
650 
651   // Try the default logic for non-generic instructions that are either copies
652   // or already have some operands assigned to banks.
653   if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
654       Opc == TargetOpcode::G_PHI) {
655     const RegisterBankInfo::InstructionMapping &Mapping =
656         getInstrMappingImpl(MI);
657     if (Mapping.isValid())
658       return Mapping;
659   }
660 
661   const MachineFunction &MF = *MI.getParent()->getParent();
662   const MachineRegisterInfo &MRI = MF.getRegInfo();
663   const TargetSubtargetInfo &STI = MF.getSubtarget();
664   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
665 
666   switch (Opc) {
667     // G_{F|S|U}REM are not listed because they are not legal.
668     // Arithmetic ops.
669   case TargetOpcode::G_ADD:
670   case TargetOpcode::G_SUB:
671   case TargetOpcode::G_PTR_ADD:
672   case TargetOpcode::G_MUL:
673   case TargetOpcode::G_SDIV:
674   case TargetOpcode::G_UDIV:
675     // Bitwise ops.
676   case TargetOpcode::G_AND:
677   case TargetOpcode::G_OR:
678   case TargetOpcode::G_XOR:
679     // Floating point ops.
680   case TargetOpcode::G_FADD:
681   case TargetOpcode::G_FSUB:
682   case TargetOpcode::G_FMUL:
683   case TargetOpcode::G_FDIV:
684   case TargetOpcode::G_FMAXIMUM:
685   case TargetOpcode::G_FMINIMUM:
686     return getSameKindOfOperandsMapping(MI);
687   case TargetOpcode::G_FPEXT: {
688     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
689     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
690     return getInstructionMapping(
691         DefaultMappingID, /*Cost*/ 1,
692         getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
693         /*NumOperands*/ 2);
694   }
695     // Shifts.
696   case TargetOpcode::G_SHL:
697   case TargetOpcode::G_LSHR:
698   case TargetOpcode::G_ASHR: {
699     LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
700     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
701     if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
702       return getInstructionMapping(DefaultMappingID, 1,
703                                    &ValMappings[Shift64Imm], 3);
704     return getSameKindOfOperandsMapping(MI);
705   }
706   case TargetOpcode::COPY: {
707     Register DstReg = MI.getOperand(0).getReg();
708     Register SrcReg = MI.getOperand(1).getReg();
709     // Check if one of the register is not a generic register.
710     if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
711         (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
712       const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
713       const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
714       if (!DstRB)
715         DstRB = SrcRB;
716       else if (!SrcRB)
717         SrcRB = DstRB;
718       // If both RB are null that means both registers are generic.
719       // We shouldn't be here.
720       assert(DstRB && SrcRB && "Both RegBank were nullptr");
721       TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
722       return getInstructionMapping(
723           DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
724           getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
725           // We only care about the mapping of the destination.
726           /*NumOperands*/ 1);
727     }
728     // Both registers are generic, use G_BITCAST.
729     [[fallthrough]];
730   }
731   case TargetOpcode::G_BITCAST: {
732     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
733     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
734     TypeSize Size = DstTy.getSizeInBits();
735     bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
736     bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
737     const RegisterBank &DstRB =
738         DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
739     const RegisterBank &SrcRB =
740         SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
741     return getInstructionMapping(
742         DefaultMappingID, copyCost(DstRB, SrcRB, Size),
743         getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
744         // We only care about the mapping of the destination for COPY.
745         /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
746   }
747   default:
748     break;
749   }
750 
751   unsigned NumOperands = MI.getNumOperands();
752   unsigned MappingID = DefaultMappingID;
753 
754   // Track the size and bank of each register.  We don't do partial mappings.
755   SmallVector<unsigned, 4> OpSize(NumOperands);
756   SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
757   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
758     auto &MO = MI.getOperand(Idx);
759     if (!MO.isReg() || !MO.getReg())
760       continue;
761 
762     LLT Ty = MRI.getType(MO.getReg());
763     if (!Ty.isValid())
764       continue;
765     OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
766 
767     // As a top-level guess, vectors including both scalable and non-scalable
768     // ones go in FPRs, scalars and pointers in GPRs.
769     // For floating-point instructions, scalars go in FPRs.
770     if (Ty.isVector())
771       OpRegBankIdx[Idx] = PMI_FirstFPR;
772     else if (isPreISelGenericFloatingPointOpcode(Opc) ||
773              Ty.getSizeInBits() > 64)
774       OpRegBankIdx[Idx] = PMI_FirstFPR;
775     else
776       OpRegBankIdx[Idx] = PMI_FirstGPR;
777   }
778 
779   unsigned Cost = 1;
780   // Some of the floating-point instructions have mixed GPR and FPR operands:
781   // fine-tune the computed mapping.
782   switch (Opc) {
783   case AArch64::G_DUP: {
784     Register ScalarReg = MI.getOperand(1).getReg();
785     LLT ScalarTy = MRI.getType(ScalarReg);
786     auto ScalarDef = MRI.getVRegDef(ScalarReg);
787     // We want to select dup(load) into LD1R.
788     if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
789       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
790     // s8 is an exception for G_DUP, which we always want on gpr.
791     else if (ScalarTy.getSizeInBits() != 8 &&
792              (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
793               onlyDefinesFP(*ScalarDef, MRI, TRI)))
794       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
795     else
796       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
797     break;
798   }
799   case TargetOpcode::G_TRUNC: {
800     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
801     if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
802       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
803     break;
804   }
805   case TargetOpcode::G_SITOFP:
806   case TargetOpcode::G_UITOFP: {
807     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
808       break;
809     // Integer to FP conversions don't necessarily happen between GPR -> FPR
810     // regbanks. They can also be done within an FPR register.
811     Register SrcReg = MI.getOperand(1).getReg();
812     if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
813       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
814     else
815       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
816     break;
817   }
818   case TargetOpcode::G_FPTOSI:
819   case TargetOpcode::G_FPTOUI:
820   case TargetOpcode::G_INTRINSIC_LRINT:
821   case TargetOpcode::G_INTRINSIC_LLRINT:
822     if (MRI.getType(MI.getOperand(0).getReg()).isVector())
823       break;
824     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
825     break;
826   case TargetOpcode::G_FCMP: {
827     // If the result is a vector, it must use a FPR.
828     AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
829         MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
830                                                           : PMI_FirstGPR;
831     OpRegBankIdx = {Idx0,
832                     /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
833     break;
834   }
835   case TargetOpcode::G_BITCAST:
836     // This is going to be a cross register bank copy and this is expensive.
837     if (OpRegBankIdx[0] != OpRegBankIdx[1])
838       Cost = copyCost(
839           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
840           *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
841           TypeSize::getFixed(OpSize[0]));
842     break;
843   case TargetOpcode::G_LOAD: {
844     // Loading in vector unit is slightly more expensive.
845     // This is actually only true for the LD1R and co instructions,
846     // but anyway for the fast mode this number does not matter and
847     // for the greedy mode the cost of the cross bank copy will
848     // offset this number.
849     // FIXME: Should be derived from the scheduling model.
850     if (OpRegBankIdx[0] != PMI_FirstGPR) {
851       Cost = 2;
852       break;
853     }
854 
855     if (cast<GLoad>(MI).isAtomic()) {
856       // Atomics always use GPR destinations. Don't refine any further.
857       OpRegBankIdx[0] = PMI_FirstGPR;
858       break;
859     }
860 
861     // Try to guess the type of the load from the MMO.
862     if (isLoadFromFPType(MI)) {
863       OpRegBankIdx[0] = PMI_FirstFPR;
864       break;
865     }
866 
867     // Check if that load feeds fp instructions.
868     // In that case, we want the default mapping to be on FPR
869     // instead of blind map every scalar to GPR.
870     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
871                [&](const MachineInstr &UseMI) {
872                  // If we have at least one direct or indirect use
873                  // in a FP instruction,
874                  // assume this was a floating point load in the IR. If it was
875                  // not, we would have had a bitcast before reaching that
876                  // instruction.
877                  //
878                  // Int->FP conversion operations are also captured in
879                  // onlyDefinesFP().
880 
881                  if (isPHIWithFPContraints(UseMI, MRI, TRI))
882                    return true;
883 
884                  return onlyUsesFP(UseMI, MRI, TRI) ||
885                         onlyDefinesFP(UseMI, MRI, TRI);
886                }))
887       OpRegBankIdx[0] = PMI_FirstFPR;
888     break;
889   }
890   case TargetOpcode::G_STORE:
891     // Check if that store is fed by fp instructions.
892     if (OpRegBankIdx[0] == PMI_FirstGPR) {
893       Register VReg = MI.getOperand(0).getReg();
894       if (!VReg)
895         break;
896       MachineInstr *DefMI = MRI.getVRegDef(VReg);
897       if (onlyDefinesFP(*DefMI, MRI, TRI))
898         OpRegBankIdx[0] = PMI_FirstFPR;
899       break;
900     }
901     break;
902   case TargetOpcode::G_INDEXED_STORE:
903     if (OpRegBankIdx[1] == PMI_FirstGPR) {
904       Register VReg = MI.getOperand(1).getReg();
905       if (!VReg)
906         break;
907       MachineInstr *DefMI = MRI.getVRegDef(VReg);
908       if (onlyDefinesFP(*DefMI, MRI, TRI))
909         OpRegBankIdx[1] = PMI_FirstFPR;
910       break;
911     }
912     break;
913   case TargetOpcode::G_INDEXED_SEXTLOAD:
914   case TargetOpcode::G_INDEXED_ZEXTLOAD:
915     // These should always be GPR.
916     OpRegBankIdx[0] = PMI_FirstGPR;
917     break;
918   case TargetOpcode::G_INDEXED_LOAD: {
919     if (isLoadFromFPType(MI))
920       OpRegBankIdx[0] = PMI_FirstFPR;
921     break;
922   }
923   case TargetOpcode::G_SELECT: {
924     // If the destination is FPR, preserve that.
925     if (OpRegBankIdx[0] != PMI_FirstGPR)
926       break;
927 
928     // If we're taking in vectors, we have no choice but to put everything on
929     // FPRs, except for the condition. The condition must always be on a GPR.
930     LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
931     if (SrcTy.isVector()) {
932       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
933       break;
934     }
935 
936     // Try to minimize the number of copies. If we have more floating point
937     // constrained values than not, then we'll put everything on FPR. Otherwise,
938     // everything has to be on GPR.
939     unsigned NumFP = 0;
940 
941     // Check if the uses of the result always produce floating point values.
942     //
943     // For example:
944     //
945     // %z = G_SELECT %cond %x %y
946     // fpr = G_FOO %z ...
947     if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
948                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
949       ++NumFP;
950 
951     // Check if the defs of the source values always produce floating point
952     // values.
953     //
954     // For example:
955     //
956     // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
957     // %z = G_SELECT %cond %x %y
958     //
959     // Also check whether or not the sources have already been decided to be
960     // FPR. Keep track of this.
961     //
962     // This doesn't check the condition, since it's just whatever is in NZCV.
963     // This isn't passed explicitly in a register to fcsel/csel.
964     for (unsigned Idx = 2; Idx < 4; ++Idx) {
965       Register VReg = MI.getOperand(Idx).getReg();
966       MachineInstr *DefMI = MRI.getVRegDef(VReg);
967       if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
968           onlyDefinesFP(*DefMI, MRI, TRI))
969         ++NumFP;
970     }
971 
972     // If we have more FP constraints than not, then move everything over to
973     // FPR.
974     if (NumFP >= 2)
975       OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
976 
977     break;
978   }
979   case TargetOpcode::G_UNMERGE_VALUES: {
980     // If the first operand belongs to a FPR register bank, then make sure that
981     // we preserve that.
982     if (OpRegBankIdx[0] != PMI_FirstGPR)
983       break;
984 
985     LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
986     // UNMERGE into scalars from a vector should always use FPR.
987     // Likewise if any of the uses are FP instructions.
988     if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
989         any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
990                [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
991       // Set the register bank of every operand to FPR.
992       for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
993            Idx < NumOperands; ++Idx)
994         OpRegBankIdx[Idx] = PMI_FirstFPR;
995     }
996     break;
997   }
998   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
999     // Destination and source need to be FPRs.
1000     OpRegBankIdx[0] = PMI_FirstFPR;
1001     OpRegBankIdx[1] = PMI_FirstFPR;
1002 
1003     // Index needs to be a GPR.
1004     OpRegBankIdx[2] = PMI_FirstGPR;
1005     break;
1006   case TargetOpcode::G_INSERT_VECTOR_ELT:
1007     OpRegBankIdx[0] = PMI_FirstFPR;
1008     OpRegBankIdx[1] = PMI_FirstFPR;
1009 
1010     // The element may be either a GPR or FPR. Preserve that behaviour.
1011     if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1012       OpRegBankIdx[2] = PMI_FirstFPR;
1013     else {
1014       // If the type is i8/i16, and the regank will be GPR, then we change the
1015       // type to i32 in applyMappingImpl.
1016       LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1017       if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
1018         MappingID = 1;
1019       OpRegBankIdx[2] = PMI_FirstGPR;
1020     }
1021 
1022     // Index needs to be a GPR.
1023     OpRegBankIdx[3] = PMI_FirstGPR;
1024     break;
1025   case TargetOpcode::G_EXTRACT: {
1026     // For s128 sources we have to use fpr unless we know otherwise.
1027     auto Src = MI.getOperand(1).getReg();
1028     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1029     if (SrcTy.getSizeInBits() != 128)
1030       break;
1031     auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1032                    ? PMI_FirstGPR
1033                    : PMI_FirstFPR;
1034     OpRegBankIdx[0] = Idx;
1035     OpRegBankIdx[1] = Idx;
1036     break;
1037   }
1038   case TargetOpcode::G_BUILD_VECTOR: {
1039     // If the first source operand belongs to a FPR register bank, then make
1040     // sure that we preserve that.
1041     if (OpRegBankIdx[1] != PMI_FirstGPR)
1042       break;
1043     Register VReg = MI.getOperand(1).getReg();
1044     if (!VReg)
1045       break;
1046 
1047     // Get the instruction that defined the source operand reg, and check if
1048     // it's a floating point operation. Or, if it's a type like s16 which
1049     // doesn't have a exact size gpr register class. The exception is if the
1050     // build_vector has all constant operands, which may be better to leave as
1051     // gpr without copies, so it can be matched in imported patterns.
1052     MachineInstr *DefMI = MRI.getVRegDef(VReg);
1053     unsigned DefOpc = DefMI->getOpcode();
1054     const LLT SrcTy = MRI.getType(VReg);
1055     if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1056           return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1057                                    TargetOpcode::G_CONSTANT;
1058         }))
1059       break;
1060     if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
1061         SrcTy.getSizeInBits() < 32 ||
1062         getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1063       // Have a floating point op.
1064       // Make sure every operand gets mapped to a FPR register class.
1065       unsigned NumOperands = MI.getNumOperands();
1066       for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1067         OpRegBankIdx[Idx] = PMI_FirstFPR;
1068     }
1069     break;
1070   }
1071   case TargetOpcode::G_VECREDUCE_FADD:
1072   case TargetOpcode::G_VECREDUCE_FMUL:
1073   case TargetOpcode::G_VECREDUCE_FMAX:
1074   case TargetOpcode::G_VECREDUCE_FMIN:
1075   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1076   case TargetOpcode::G_VECREDUCE_FMINIMUM:
1077   case TargetOpcode::G_VECREDUCE_ADD:
1078   case TargetOpcode::G_VECREDUCE_MUL:
1079   case TargetOpcode::G_VECREDUCE_AND:
1080   case TargetOpcode::G_VECREDUCE_OR:
1081   case TargetOpcode::G_VECREDUCE_XOR:
1082   case TargetOpcode::G_VECREDUCE_SMAX:
1083   case TargetOpcode::G_VECREDUCE_SMIN:
1084   case TargetOpcode::G_VECREDUCE_UMAX:
1085   case TargetOpcode::G_VECREDUCE_UMIN:
1086     // Reductions produce a scalar value from a vector, the scalar should be on
1087     // FPR bank.
1088     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1089     break;
1090   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1091   case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1092     // These reductions also take a scalar accumulator input.
1093     // Assign them FPR for now.
1094     OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1095     break;
1096   case TargetOpcode::G_INTRINSIC:
1097   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1098     // Check if we know that the intrinsic has any constraints on its register
1099     // banks. If it does, then update the mapping accordingly.
1100     unsigned Idx = 0;
1101     if (onlyDefinesFP(MI, MRI, TRI))
1102       for (const auto &Op : MI.defs()) {
1103         if (Op.isReg())
1104           OpRegBankIdx[Idx] = PMI_FirstFPR;
1105         ++Idx;
1106       }
1107     else
1108       Idx += MI.getNumExplicitDefs();
1109 
1110     if (onlyUsesFP(MI, MRI, TRI))
1111       for (const auto &Op : MI.explicit_uses()) {
1112         if (Op.isReg())
1113           OpRegBankIdx[Idx] = PMI_FirstFPR;
1114         ++Idx;
1115       }
1116     break;
1117   }
1118   case TargetOpcode::G_LROUND:
1119   case TargetOpcode::G_LLROUND: {
1120     // Source is always floating point and destination is always integer.
1121     OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1122     break;
1123   }
1124   }
1125 
1126   // Finally construct the computed mapping.
1127   SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1128   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1129     if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1130       LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1131       if (!Ty.isValid())
1132         continue;
1133       auto Mapping =
1134           getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
1135       if (!Mapping->isValid())
1136         return getInvalidInstructionMapping();
1137 
1138       OpdsMapping[Idx] = Mapping;
1139     }
1140   }
1141 
1142   return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping),
1143                                NumOperands);
1144 }
1145