1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the RegisterBankInfo class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64RegisterBankInfo.h"
15 #include "AArch64RegisterInfo.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21 #include "llvm/CodeGen/GlobalISel/Utils.h"
22 #include "llvm/CodeGen/LowLevelTypeUtils.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterBank.h"
28 #include "llvm/CodeGen/RegisterBankInfo.h"
29 #include "llvm/CodeGen/TargetOpcodes.h"
30 #include "llvm/CodeGen/TargetRegisterInfo.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/IntrinsicsAArch64.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/Threading.h"
35 #include <algorithm>
36 #include <cassert>
37
38 #define GET_TARGET_REGBANK_IMPL
39 #include "AArch64GenRegisterBank.inc"
40
41 // This file will be TableGen'ed at some point.
42 #include "AArch64GenRegisterBankInfo.def"
43
44 using namespace llvm;
45
AArch64RegisterBankInfo(const TargetRegisterInfo & TRI)46 AArch64RegisterBankInfo::AArch64RegisterBankInfo(
47 const TargetRegisterInfo &TRI) {
48 static llvm::once_flag InitializeRegisterBankFlag;
49
50 static auto InitializeRegisterBankOnce = [&]() {
51 // We have only one set of register banks, whatever the subtarget
52 // is. Therefore, the initialization of the RegBanks table should be
53 // done only once. Indeed the table of all register banks
54 // (AArch64::RegBanks) is unique in the compiler. At some point, it
55 // will get tablegen'ed and the whole constructor becomes empty.
56
57 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
58 (void)RBGPR;
59 assert(&AArch64::GPRRegBank == &RBGPR &&
60 "The order in RegBanks is messed up");
61
62 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
63 (void)RBFPR;
64 assert(&AArch64::FPRRegBank == &RBFPR &&
65 "The order in RegBanks is messed up");
66
67 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID);
68 (void)RBCCR;
69 assert(&AArch64::CCRegBank == &RBCCR &&
70 "The order in RegBanks is messed up");
71
72 // The GPR register bank is fully defined by all the registers in
73 // GR64all + its subclasses.
74 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
75 "Subclass not added?");
76 assert(getMaximumSize(RBGPR.getID()) == 128 &&
77 "GPRs should hold up to 128-bit");
78
79 // The FPR register bank is fully defined by all the registers in
80 // GR64all + its subclasses.
81 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
82 "Subclass not added?");
83 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
84 "Subclass not added?");
85 assert(getMaximumSize(RBFPR.getID()) == 512 &&
86 "FPRs should hold up to 512-bit via QQQQ sequence");
87
88 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
89 "Class not added?");
90 assert(getMaximumSize(RBCCR.getID()) == 32 &&
91 "CCR should hold up to 32-bit");
92
93 // Check that the TableGen'ed like file is in sync we our expectations.
94 // First, the Idx.
95 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
96 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) &&
97 "PartialMappingIdx's are incorrectly ordered");
98 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR,
99 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128,
100 PMI_FPR256, PMI_FPR512}) &&
101 "PartialMappingIdx's are incorrectly ordered");
102 // Now, the content.
103 // Check partial mapping.
104 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
105 do { \
106 assert( \
107 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
108 #Idx " is incorrectly initialized"); \
109 } while (false)
110
111 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
112 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
113 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR);
114 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR);
115 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
116 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
117 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
118 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
119 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
120
121 // Check value mapping.
122 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
123 do { \
124 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
125 PartialMappingIdx::PMI_First##RBName, Size, \
126 Offset) && \
127 #RBName #Size " " #Offset " is incorrectly initialized"); \
128 } while (false)
129
130 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
131
132 CHECK_VALUEMAP(GPR, 32);
133 CHECK_VALUEMAP(GPR, 64);
134 CHECK_VALUEMAP(GPR, 128);
135 CHECK_VALUEMAP(FPR, 16);
136 CHECK_VALUEMAP(FPR, 32);
137 CHECK_VALUEMAP(FPR, 64);
138 CHECK_VALUEMAP(FPR, 128);
139 CHECK_VALUEMAP(FPR, 256);
140 CHECK_VALUEMAP(FPR, 512);
141
142 // Check the value mapping for 3-operands instructions where all the operands
143 // map to the same value mapping.
144 #define CHECK_VALUEMAP_3OPS(RBName, Size) \
145 do { \
146 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
147 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
148 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
149 } while (false)
150
151 CHECK_VALUEMAP_3OPS(GPR, 32);
152 CHECK_VALUEMAP_3OPS(GPR, 64);
153 CHECK_VALUEMAP_3OPS(GPR, 128);
154 CHECK_VALUEMAP_3OPS(FPR, 32);
155 CHECK_VALUEMAP_3OPS(FPR, 64);
156 CHECK_VALUEMAP_3OPS(FPR, 128);
157 CHECK_VALUEMAP_3OPS(FPR, 256);
158 CHECK_VALUEMAP_3OPS(FPR, 512);
159
160 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
161 do { \
162 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
163 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
164 (void)PartialMapDstIdx; \
165 (void)PartialMapSrcIdx; \
166 const ValueMapping *Map = getCopyMapping(AArch64::RBNameDst##RegBankID, \
167 AArch64::RBNameSrc##RegBankID, \
168 TypeSize::getFixed(Size)); \
169 (void)Map; \
170 assert(Map[0].BreakDown == \
171 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
172 Map[0].NumBreakDowns == 1 && \
173 #RBNameDst #Size " Dst is incorrectly initialized"); \
174 assert(Map[1].BreakDown == \
175 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
176 Map[1].NumBreakDowns == 1 && \
177 #RBNameSrc #Size " Src is incorrectly initialized"); \
178 \
179 } while (false)
180
181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
183 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
184 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
187 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
188 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
189
190 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \
191 do { \
192 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \
193 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \
194 (void)PartialMapDstIdx; \
195 (void)PartialMapSrcIdx; \
196 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \
197 (void)Map; \
198 assert(Map[0].BreakDown == \
199 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
200 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \
201 " Dst is incorrectly initialized"); \
202 assert(Map[1].BreakDown == \
203 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
204 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \
205 " Src is incorrectly initialized"); \
206 \
207 } while (false)
208
209 CHECK_VALUEMAP_FPEXT(32, 16);
210 CHECK_VALUEMAP_FPEXT(64, 16);
211 CHECK_VALUEMAP_FPEXT(64, 32);
212 CHECK_VALUEMAP_FPEXT(128, 64);
213
214 assert(verify(TRI) && "Invalid register bank information");
215 };
216
217 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
218 }
219
copyCost(const RegisterBank & A,const RegisterBank & B,const TypeSize Size) const220 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
221 const RegisterBank &B,
222 const TypeSize Size) const {
223 // What do we do with different size?
224 // copy are same size.
225 // Will introduce other hooks for different size:
226 // * extract cost.
227 // * build_sequence cost.
228
229 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
230 // FIXME: This should be deduced from the scheduling model.
231 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
232 // FMOVXDr or FMOVWSr.
233 return 5;
234 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
235 // FMOVDXr or FMOVSWr.
236 return 4;
237
238 return RegisterBankInfo::copyCost(A, B, Size);
239 }
240
241 const RegisterBank &
getRegBankFromRegClass(const TargetRegisterClass & RC,LLT) const242 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
243 LLT) const {
244 switch (RC.getID()) {
245 case AArch64::FPR8RegClassID:
246 case AArch64::FPR16RegClassID:
247 case AArch64::FPR16_loRegClassID:
248 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID:
249 case AArch64::FPR32RegClassID:
250 case AArch64::FPR64RegClassID:
251 case AArch64::FPR128RegClassID:
252 case AArch64::FPR64_loRegClassID:
253 case AArch64::FPR128_loRegClassID:
254 case AArch64::FPR128_0to7RegClassID:
255 case AArch64::DDRegClassID:
256 case AArch64::DDDRegClassID:
257 case AArch64::DDDDRegClassID:
258 case AArch64::QQRegClassID:
259 case AArch64::QQQRegClassID:
260 case AArch64::QQQQRegClassID:
261 case AArch64::ZPRRegClassID:
262 case AArch64::ZPR_3bRegClassID:
263 return getRegBank(AArch64::FPRRegBankID);
264 case AArch64::GPR32commonRegClassID:
265 case AArch64::GPR32RegClassID:
266 case AArch64::GPR32spRegClassID:
267 case AArch64::GPR32sponlyRegClassID:
268 case AArch64::GPR32argRegClassID:
269 case AArch64::GPR32allRegClassID:
270 case AArch64::GPR64commonRegClassID:
271 case AArch64::GPR64RegClassID:
272 case AArch64::GPR64spRegClassID:
273 case AArch64::GPR64sponlyRegClassID:
274 case AArch64::GPR64argRegClassID:
275 case AArch64::GPR64allRegClassID:
276 case AArch64::GPR64noipRegClassID:
277 case AArch64::GPR64common_and_GPR64noipRegClassID:
278 case AArch64::GPR64noip_and_tcGPR64RegClassID:
279 case AArch64::tcGPR64RegClassID:
280 case AArch64::tcGPRx16x17RegClassID:
281 case AArch64::tcGPRx17RegClassID:
282 case AArch64::tcGPRnotx16RegClassID:
283 case AArch64::WSeqPairsClassRegClassID:
284 case AArch64::XSeqPairsClassRegClassID:
285 case AArch64::MatrixIndexGPR32_8_11RegClassID:
286 case AArch64::MatrixIndexGPR32_12_15RegClassID:
287 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID:
288 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
289 return getRegBank(AArch64::GPRRegBankID);
290 case AArch64::CCRRegClassID:
291 return getRegBank(AArch64::CCRegBankID);
292 default:
293 llvm_unreachable("Register class not supported");
294 }
295 }
296
297 RegisterBankInfo::InstructionMappings
getInstrAlternativeMappings(const MachineInstr & MI) const298 AArch64RegisterBankInfo::getInstrAlternativeMappings(
299 const MachineInstr &MI) const {
300 const MachineFunction &MF = *MI.getParent()->getParent();
301 const TargetSubtargetInfo &STI = MF.getSubtarget();
302 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
303 const MachineRegisterInfo &MRI = MF.getRegInfo();
304
305 switch (MI.getOpcode()) {
306 case TargetOpcode::G_OR: {
307 // 32 and 64-bit or can be mapped on either FPR or
308 // GPR for the same cost.
309 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
310 if (Size != 32 && Size != 64)
311 break;
312
313 // If the instruction has any implicit-defs or uses,
314 // do not mess with it.
315 if (MI.getNumOperands() != 3)
316 break;
317 InstructionMappings AltMappings;
318 const InstructionMapping &GPRMapping = getInstructionMapping(
319 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
320 /*NumOperands*/ 3);
321 const InstructionMapping &FPRMapping = getInstructionMapping(
322 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
323 /*NumOperands*/ 3);
324
325 AltMappings.push_back(&GPRMapping);
326 AltMappings.push_back(&FPRMapping);
327 return AltMappings;
328 }
329 case TargetOpcode::G_BITCAST: {
330 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
331 if (Size != 32 && Size != 64)
332 break;
333
334 // If the instruction has any implicit-defs or uses,
335 // do not mess with it.
336 if (MI.getNumOperands() != 2)
337 break;
338
339 InstructionMappings AltMappings;
340 const InstructionMapping &GPRMapping = getInstructionMapping(
341 /*ID*/ 1, /*Cost*/ 1,
342 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
343 /*NumOperands*/ 2);
344 const InstructionMapping &FPRMapping = getInstructionMapping(
345 /*ID*/ 2, /*Cost*/ 1,
346 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
347 /*NumOperands*/ 2);
348 const InstructionMapping &GPRToFPRMapping = getInstructionMapping(
349 /*ID*/ 3,
350 /*Cost*/
351 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
352 TypeSize::getFixed(Size)),
353 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
354 /*NumOperands*/ 2);
355 const InstructionMapping &FPRToGPRMapping = getInstructionMapping(
356 /*ID*/ 3,
357 /*Cost*/
358 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank,
359 TypeSize::getFixed(Size)),
360 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
361 /*NumOperands*/ 2);
362
363 AltMappings.push_back(&GPRMapping);
364 AltMappings.push_back(&FPRMapping);
365 AltMappings.push_back(&GPRToFPRMapping);
366 AltMappings.push_back(&FPRToGPRMapping);
367 return AltMappings;
368 }
369 case TargetOpcode::G_LOAD: {
370 TypeSize Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
371 if (Size != 64)
372 break;
373
374 // If the instruction has any implicit-defs or uses,
375 // do not mess with it.
376 if (MI.getNumOperands() != 2)
377 break;
378
379 InstructionMappings AltMappings;
380 const InstructionMapping &GPRMapping = getInstructionMapping(
381 /*ID*/ 1, /*Cost*/ 1,
382 getOperandsMapping(
383 {getValueMapping(PMI_FirstGPR, Size),
384 // Addresses are GPR 64-bit.
385 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
386 /*NumOperands*/ 2);
387 const InstructionMapping &FPRMapping = getInstructionMapping(
388 /*ID*/ 2, /*Cost*/ 1,
389 getOperandsMapping(
390 {getValueMapping(PMI_FirstFPR, Size),
391 // Addresses are GPR 64-bit.
392 getValueMapping(PMI_FirstGPR, TypeSize::getFixed(64))}),
393 /*NumOperands*/ 2);
394
395 AltMappings.push_back(&GPRMapping);
396 AltMappings.push_back(&FPRMapping);
397 return AltMappings;
398 }
399 default:
400 break;
401 }
402 return RegisterBankInfo::getInstrAlternativeMappings(MI);
403 }
404
applyMappingImpl(MachineIRBuilder & Builder,const OperandsMapper & OpdMapper) const405 void AArch64RegisterBankInfo::applyMappingImpl(
406 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
407 MachineInstr &MI = OpdMapper.getMI();
408 MachineRegisterInfo &MRI = OpdMapper.getMRI();
409
410 switch (MI.getOpcode()) {
411 case TargetOpcode::G_OR:
412 case TargetOpcode::G_BITCAST:
413 case TargetOpcode::G_LOAD:
414 // Those ID must match getInstrAlternativeMappings.
415 assert((OpdMapper.getInstrMapping().getID() >= 1 &&
416 OpdMapper.getInstrMapping().getID() <= 4) &&
417 "Don't know how to handle that ID");
418 return applyDefaultMapping(OpdMapper);
419 case TargetOpcode::G_INSERT_VECTOR_ELT: {
420 // Extend smaller gpr operands to 32 bit.
421 Builder.setInsertPt(*MI.getParent(), MI.getIterator());
422 auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
423 MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID));
424 MI.getOperand(2).setReg(Ext.getReg(0));
425 return applyDefaultMapping(OpdMapper);
426 }
427 default:
428 llvm_unreachable("Don't know how to handle that operation");
429 }
430 }
431
432 const RegisterBankInfo::InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr & MI) const433 AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
434 const MachineInstr &MI) const {
435 const unsigned Opc = MI.getOpcode();
436 const MachineFunction &MF = *MI.getParent()->getParent();
437 const MachineRegisterInfo &MRI = MF.getRegInfo();
438
439 unsigned NumOperands = MI.getNumOperands();
440 assert(NumOperands <= 3 &&
441 "This code is for instructions with 3 or less operands");
442
443 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
444 TypeSize Size = Ty.getSizeInBits();
445 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
446
447 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
448
449 #ifndef NDEBUG
450 // Make sure all the operands are using similar size and type.
451 // Should probably be checked by the machine verifier.
452 // This code won't catch cases where the number of lanes is
453 // different between the operands.
454 // If we want to go to that level of details, it is probably
455 // best to check that the types are the same, period.
456 // Currently, we just check that the register banks are the same
457 // for each types.
458 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
459 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
460 assert(
461 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
462 RBIdx, OpTy.getSizeInBits()) ==
463 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
464 "Operand has incompatible size");
465 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
466 (void)OpIsFPR;
467 assert(IsFPR == OpIsFPR && "Operand has incompatible type");
468 }
469 #endif // End NDEBUG.
470
471 return getInstructionMapping(DefaultMappingID, 1,
472 getValueMapping(RBIdx, Size), NumOperands);
473 }
474
475 /// \returns true if a given intrinsic only uses and defines FPRs.
isFPIntrinsic(const MachineRegisterInfo & MRI,const MachineInstr & MI)476 static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
477 const MachineInstr &MI) {
478 // TODO: Add more intrinsics.
479 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
480 default:
481 return false;
482 case Intrinsic::aarch64_neon_uaddlv:
483 case Intrinsic::aarch64_neon_uaddv:
484 case Intrinsic::aarch64_neon_saddv:
485 case Intrinsic::aarch64_neon_umaxv:
486 case Intrinsic::aarch64_neon_smaxv:
487 case Intrinsic::aarch64_neon_uminv:
488 case Intrinsic::aarch64_neon_sminv:
489 case Intrinsic::aarch64_neon_faddv:
490 case Intrinsic::aarch64_neon_fmaxv:
491 case Intrinsic::aarch64_neon_fminv:
492 case Intrinsic::aarch64_neon_fmaxnmv:
493 case Intrinsic::aarch64_neon_fminnmv:
494 return true;
495 case Intrinsic::aarch64_neon_saddlv: {
496 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
497 return SrcTy.getElementType().getSizeInBits() >= 16 &&
498 SrcTy.getElementCount().getFixedValue() >= 4;
499 }
500 }
501 }
502
isPHIWithFPContraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,const unsigned Depth) const503 bool AArch64RegisterBankInfo::isPHIWithFPContraints(
504 const MachineInstr &MI, const MachineRegisterInfo &MRI,
505 const TargetRegisterInfo &TRI, const unsigned Depth) const {
506 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
507 return false;
508
509 return any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
510 [&](const MachineInstr &UseMI) {
511 if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
512 return true;
513 return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
514 });
515 }
516
hasFPConstraints(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const517 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
518 const MachineRegisterInfo &MRI,
519 const TargetRegisterInfo &TRI,
520 unsigned Depth) const {
521 unsigned Op = MI.getOpcode();
522 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
523 return true;
524
525 // Do we have an explicit floating point instruction?
526 if (isPreISelGenericFloatingPointOpcode(Op))
527 return true;
528
529 // No. Check if we have a copy-like instruction. If we do, then we could
530 // still be fed by floating point instructions.
531 if (Op != TargetOpcode::COPY && !MI.isPHI() &&
532 !isPreISelGenericOptimizationHint(Op))
533 return false;
534
535 // Check if we already know the register bank.
536 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
537 if (RB == &AArch64::FPRRegBank)
538 return true;
539 if (RB == &AArch64::GPRRegBank)
540 return false;
541
542 // We don't know anything.
543 //
544 // If we have a phi, we may be able to infer that it will be assigned a FPR
545 // based off of its inputs.
546 if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
547 return false;
548
549 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
550 return Op.isReg() &&
551 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
552 });
553 }
554
onlyUsesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const555 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
556 const MachineRegisterInfo &MRI,
557 const TargetRegisterInfo &TRI,
558 unsigned Depth) const {
559 switch (MI.getOpcode()) {
560 case TargetOpcode::G_FPTOSI:
561 case TargetOpcode::G_FPTOUI:
562 case TargetOpcode::G_FCMP:
563 case TargetOpcode::G_LROUND:
564 case TargetOpcode::G_LLROUND:
565 return true;
566 default:
567 break;
568 }
569 return hasFPConstraints(MI, MRI, TRI, Depth);
570 }
571
onlyDefinesFP(const MachineInstr & MI,const MachineRegisterInfo & MRI,const TargetRegisterInfo & TRI,unsigned Depth) const572 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
573 const MachineRegisterInfo &MRI,
574 const TargetRegisterInfo &TRI,
575 unsigned Depth) const {
576 switch (MI.getOpcode()) {
577 case AArch64::G_DUP:
578 case TargetOpcode::G_SITOFP:
579 case TargetOpcode::G_UITOFP:
580 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
581 case TargetOpcode::G_INSERT_VECTOR_ELT:
582 case TargetOpcode::G_BUILD_VECTOR:
583 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
584 return true;
585 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
586 switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
587 case Intrinsic::aarch64_neon_ld1x2:
588 case Intrinsic::aarch64_neon_ld1x3:
589 case Intrinsic::aarch64_neon_ld1x4:
590 case Intrinsic::aarch64_neon_ld2:
591 case Intrinsic::aarch64_neon_ld2lane:
592 case Intrinsic::aarch64_neon_ld2r:
593 case Intrinsic::aarch64_neon_ld3:
594 case Intrinsic::aarch64_neon_ld3lane:
595 case Intrinsic::aarch64_neon_ld3r:
596 case Intrinsic::aarch64_neon_ld4:
597 case Intrinsic::aarch64_neon_ld4lane:
598 case Intrinsic::aarch64_neon_ld4r:
599 return true;
600 default:
601 break;
602 }
603 break;
604 default:
605 break;
606 }
607 return hasFPConstraints(MI, MRI, TRI, Depth);
608 }
609
isLoadFromFPType(const MachineInstr & MI) const610 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
611 // GMemOperation because we also want to match indexed loads.
612 auto *MemOp = cast<GMemOperation>(&MI);
613 const Value *LdVal = MemOp->getMMO().getValue();
614 if (!LdVal)
615 return false;
616
617 Type *EltTy = nullptr;
618 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
619 EltTy = GV->getValueType();
620 // Look at the first element of the struct to determine the type we are
621 // loading
622 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) {
623 if (StructEltTy->getNumElements() == 0)
624 break;
625 EltTy = StructEltTy->getTypeAtIndex(0U);
626 }
627 // Look at the first element of the array to determine its type
628 if (isa<ArrayType>(EltTy))
629 EltTy = EltTy->getArrayElementType();
630 } else {
631 // FIXME: grubbing around uses is pretty ugly, but with no more
632 // `getPointerElementType` there's not much else we can do.
633 for (const auto *LdUser : LdVal->users()) {
634 if (isa<LoadInst>(LdUser)) {
635 EltTy = LdUser->getType();
636 break;
637 }
638 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
639 EltTy = LdUser->getOperand(0)->getType();
640 break;
641 }
642 }
643 }
644 return EltTy && EltTy->isFPOrFPVectorTy();
645 }
646
647 const RegisterBankInfo::InstructionMapping &
getInstrMapping(const MachineInstr & MI) const648 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
649 const unsigned Opc = MI.getOpcode();
650
651 // Try the default logic for non-generic instructions that are either copies
652 // or already have some operands assigned to banks.
653 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) ||
654 Opc == TargetOpcode::G_PHI) {
655 const RegisterBankInfo::InstructionMapping &Mapping =
656 getInstrMappingImpl(MI);
657 if (Mapping.isValid())
658 return Mapping;
659 }
660
661 const MachineFunction &MF = *MI.getParent()->getParent();
662 const MachineRegisterInfo &MRI = MF.getRegInfo();
663 const TargetSubtargetInfo &STI = MF.getSubtarget();
664 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
665
666 switch (Opc) {
667 // G_{F|S|U}REM are not listed because they are not legal.
668 // Arithmetic ops.
669 case TargetOpcode::G_ADD:
670 case TargetOpcode::G_SUB:
671 case TargetOpcode::G_PTR_ADD:
672 case TargetOpcode::G_MUL:
673 case TargetOpcode::G_SDIV:
674 case TargetOpcode::G_UDIV:
675 // Bitwise ops.
676 case TargetOpcode::G_AND:
677 case TargetOpcode::G_OR:
678 case TargetOpcode::G_XOR:
679 // Floating point ops.
680 case TargetOpcode::G_FADD:
681 case TargetOpcode::G_FSUB:
682 case TargetOpcode::G_FMUL:
683 case TargetOpcode::G_FDIV:
684 case TargetOpcode::G_FMAXIMUM:
685 case TargetOpcode::G_FMINIMUM:
686 return getSameKindOfOperandsMapping(MI);
687 case TargetOpcode::G_FPEXT: {
688 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
689 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
690 return getInstructionMapping(
691 DefaultMappingID, /*Cost*/ 1,
692 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()),
693 /*NumOperands*/ 2);
694 }
695 // Shifts.
696 case TargetOpcode::G_SHL:
697 case TargetOpcode::G_LSHR:
698 case TargetOpcode::G_ASHR: {
699 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg());
700 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
701 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32)
702 return getInstructionMapping(DefaultMappingID, 1,
703 &ValMappings[Shift64Imm], 3);
704 return getSameKindOfOperandsMapping(MI);
705 }
706 case TargetOpcode::COPY: {
707 Register DstReg = MI.getOperand(0).getReg();
708 Register SrcReg = MI.getOperand(1).getReg();
709 // Check if one of the register is not a generic register.
710 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) ||
711 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) {
712 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI);
713 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI);
714 if (!DstRB)
715 DstRB = SrcRB;
716 else if (!SrcRB)
717 SrcRB = DstRB;
718 // If both RB are null that means both registers are generic.
719 // We shouldn't be here.
720 assert(DstRB && SrcRB && "Both RegBank were nullptr");
721 TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
722 return getInstructionMapping(
723 DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
724 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
725 // We only care about the mapping of the destination.
726 /*NumOperands*/ 1);
727 }
728 // Both registers are generic, use G_BITCAST.
729 [[fallthrough]];
730 }
731 case TargetOpcode::G_BITCAST: {
732 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
733 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
734 TypeSize Size = DstTy.getSizeInBits();
735 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64;
736 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64;
737 const RegisterBank &DstRB =
738 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
739 const RegisterBank &SrcRB =
740 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
741 return getInstructionMapping(
742 DefaultMappingID, copyCost(DstRB, SrcRB, Size),
743 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
744 // We only care about the mapping of the destination for COPY.
745 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1);
746 }
747 default:
748 break;
749 }
750
751 unsigned NumOperands = MI.getNumOperands();
752 unsigned MappingID = DefaultMappingID;
753
754 // Track the size and bank of each register. We don't do partial mappings.
755 SmallVector<unsigned, 4> OpSize(NumOperands);
756 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
757 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
758 auto &MO = MI.getOperand(Idx);
759 if (!MO.isReg() || !MO.getReg())
760 continue;
761
762 LLT Ty = MRI.getType(MO.getReg());
763 if (!Ty.isValid())
764 continue;
765 OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue();
766
767 // As a top-level guess, vectors including both scalable and non-scalable
768 // ones go in FPRs, scalars and pointers in GPRs.
769 // For floating-point instructions, scalars go in FPRs.
770 if (Ty.isVector())
771 OpRegBankIdx[Idx] = PMI_FirstFPR;
772 else if (isPreISelGenericFloatingPointOpcode(Opc) ||
773 Ty.getSizeInBits() > 64)
774 OpRegBankIdx[Idx] = PMI_FirstFPR;
775 else
776 OpRegBankIdx[Idx] = PMI_FirstGPR;
777 }
778
779 unsigned Cost = 1;
780 // Some of the floating-point instructions have mixed GPR and FPR operands:
781 // fine-tune the computed mapping.
782 switch (Opc) {
783 case AArch64::G_DUP: {
784 Register ScalarReg = MI.getOperand(1).getReg();
785 LLT ScalarTy = MRI.getType(ScalarReg);
786 auto ScalarDef = MRI.getVRegDef(ScalarReg);
787 // We want to select dup(load) into LD1R.
788 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD)
789 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
790 // s8 is an exception for G_DUP, which we always want on gpr.
791 else if (ScalarTy.getSizeInBits() != 8 &&
792 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
793 onlyDefinesFP(*ScalarDef, MRI, TRI)))
794 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
795 else
796 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
797 break;
798 }
799 case TargetOpcode::G_TRUNC: {
800 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
801 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128)
802 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
803 break;
804 }
805 case TargetOpcode::G_SITOFP:
806 case TargetOpcode::G_UITOFP: {
807 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
808 break;
809 // Integer to FP conversions don't necessarily happen between GPR -> FPR
810 // regbanks. They can also be done within an FPR register.
811 Register SrcReg = MI.getOperand(1).getReg();
812 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
813 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
814 else
815 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
816 break;
817 }
818 case TargetOpcode::G_FPTOSI:
819 case TargetOpcode::G_FPTOUI:
820 case TargetOpcode::G_INTRINSIC_LRINT:
821 case TargetOpcode::G_INTRINSIC_LLRINT:
822 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
823 break;
824 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
825 break;
826 case TargetOpcode::G_FCMP: {
827 // If the result is a vector, it must use a FPR.
828 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
829 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR
830 : PMI_FirstGPR;
831 OpRegBankIdx = {Idx0,
832 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
833 break;
834 }
835 case TargetOpcode::G_BITCAST:
836 // This is going to be a cross register bank copy and this is expensive.
837 if (OpRegBankIdx[0] != OpRegBankIdx[1])
838 Cost = copyCost(
839 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
840 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
841 TypeSize::getFixed(OpSize[0]));
842 break;
843 case TargetOpcode::G_LOAD: {
844 // Loading in vector unit is slightly more expensive.
845 // This is actually only true for the LD1R and co instructions,
846 // but anyway for the fast mode this number does not matter and
847 // for the greedy mode the cost of the cross bank copy will
848 // offset this number.
849 // FIXME: Should be derived from the scheduling model.
850 if (OpRegBankIdx[0] != PMI_FirstGPR) {
851 Cost = 2;
852 break;
853 }
854
855 if (cast<GLoad>(MI).isAtomic()) {
856 // Atomics always use GPR destinations. Don't refine any further.
857 OpRegBankIdx[0] = PMI_FirstGPR;
858 break;
859 }
860
861 // Try to guess the type of the load from the MMO.
862 if (isLoadFromFPType(MI)) {
863 OpRegBankIdx[0] = PMI_FirstFPR;
864 break;
865 }
866
867 // Check if that load feeds fp instructions.
868 // In that case, we want the default mapping to be on FPR
869 // instead of blind map every scalar to GPR.
870 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
871 [&](const MachineInstr &UseMI) {
872 // If we have at least one direct or indirect use
873 // in a FP instruction,
874 // assume this was a floating point load in the IR. If it was
875 // not, we would have had a bitcast before reaching that
876 // instruction.
877 //
878 // Int->FP conversion operations are also captured in
879 // onlyDefinesFP().
880
881 if (isPHIWithFPContraints(UseMI, MRI, TRI))
882 return true;
883
884 return onlyUsesFP(UseMI, MRI, TRI) ||
885 onlyDefinesFP(UseMI, MRI, TRI);
886 }))
887 OpRegBankIdx[0] = PMI_FirstFPR;
888 break;
889 }
890 case TargetOpcode::G_STORE:
891 // Check if that store is fed by fp instructions.
892 if (OpRegBankIdx[0] == PMI_FirstGPR) {
893 Register VReg = MI.getOperand(0).getReg();
894 if (!VReg)
895 break;
896 MachineInstr *DefMI = MRI.getVRegDef(VReg);
897 if (onlyDefinesFP(*DefMI, MRI, TRI))
898 OpRegBankIdx[0] = PMI_FirstFPR;
899 break;
900 }
901 break;
902 case TargetOpcode::G_INDEXED_STORE:
903 if (OpRegBankIdx[1] == PMI_FirstGPR) {
904 Register VReg = MI.getOperand(1).getReg();
905 if (!VReg)
906 break;
907 MachineInstr *DefMI = MRI.getVRegDef(VReg);
908 if (onlyDefinesFP(*DefMI, MRI, TRI))
909 OpRegBankIdx[1] = PMI_FirstFPR;
910 break;
911 }
912 break;
913 case TargetOpcode::G_INDEXED_SEXTLOAD:
914 case TargetOpcode::G_INDEXED_ZEXTLOAD:
915 // These should always be GPR.
916 OpRegBankIdx[0] = PMI_FirstGPR;
917 break;
918 case TargetOpcode::G_INDEXED_LOAD: {
919 if (isLoadFromFPType(MI))
920 OpRegBankIdx[0] = PMI_FirstFPR;
921 break;
922 }
923 case TargetOpcode::G_SELECT: {
924 // If the destination is FPR, preserve that.
925 if (OpRegBankIdx[0] != PMI_FirstGPR)
926 break;
927
928 // If we're taking in vectors, we have no choice but to put everything on
929 // FPRs, except for the condition. The condition must always be on a GPR.
930 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
931 if (SrcTy.isVector()) {
932 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
933 break;
934 }
935
936 // Try to minimize the number of copies. If we have more floating point
937 // constrained values than not, then we'll put everything on FPR. Otherwise,
938 // everything has to be on GPR.
939 unsigned NumFP = 0;
940
941 // Check if the uses of the result always produce floating point values.
942 //
943 // For example:
944 //
945 // %z = G_SELECT %cond %x %y
946 // fpr = G_FOO %z ...
947 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
948 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); }))
949 ++NumFP;
950
951 // Check if the defs of the source values always produce floating point
952 // values.
953 //
954 // For example:
955 //
956 // %x = G_SOMETHING_ALWAYS_FLOAT %a ...
957 // %z = G_SELECT %cond %x %y
958 //
959 // Also check whether or not the sources have already been decided to be
960 // FPR. Keep track of this.
961 //
962 // This doesn't check the condition, since it's just whatever is in NZCV.
963 // This isn't passed explicitly in a register to fcsel/csel.
964 for (unsigned Idx = 2; Idx < 4; ++Idx) {
965 Register VReg = MI.getOperand(Idx).getReg();
966 MachineInstr *DefMI = MRI.getVRegDef(VReg);
967 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank ||
968 onlyDefinesFP(*DefMI, MRI, TRI))
969 ++NumFP;
970 }
971
972 // If we have more FP constraints than not, then move everything over to
973 // FPR.
974 if (NumFP >= 2)
975 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR};
976
977 break;
978 }
979 case TargetOpcode::G_UNMERGE_VALUES: {
980 // If the first operand belongs to a FPR register bank, then make sure that
981 // we preserve that.
982 if (OpRegBankIdx[0] != PMI_FirstGPR)
983 break;
984
985 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
986 // UNMERGE into scalars from a vector should always use FPR.
987 // Likewise if any of the uses are FP instructions.
988 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) ||
989 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
990 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) {
991 // Set the register bank of every operand to FPR.
992 for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
993 Idx < NumOperands; ++Idx)
994 OpRegBankIdx[Idx] = PMI_FirstFPR;
995 }
996 break;
997 }
998 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
999 // Destination and source need to be FPRs.
1000 OpRegBankIdx[0] = PMI_FirstFPR;
1001 OpRegBankIdx[1] = PMI_FirstFPR;
1002
1003 // Index needs to be a GPR.
1004 OpRegBankIdx[2] = PMI_FirstGPR;
1005 break;
1006 case TargetOpcode::G_INSERT_VECTOR_ELT:
1007 OpRegBankIdx[0] = PMI_FirstFPR;
1008 OpRegBankIdx[1] = PMI_FirstFPR;
1009
1010 // The element may be either a GPR or FPR. Preserve that behaviour.
1011 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank)
1012 OpRegBankIdx[2] = PMI_FirstFPR;
1013 else {
1014 // If the type is i8/i16, and the regank will be GPR, then we change the
1015 // type to i32 in applyMappingImpl.
1016 LLT Ty = MRI.getType(MI.getOperand(2).getReg());
1017 if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16)
1018 MappingID = 1;
1019 OpRegBankIdx[2] = PMI_FirstGPR;
1020 }
1021
1022 // Index needs to be a GPR.
1023 OpRegBankIdx[3] = PMI_FirstGPR;
1024 break;
1025 case TargetOpcode::G_EXTRACT: {
1026 // For s128 sources we have to use fpr unless we know otherwise.
1027 auto Src = MI.getOperand(1).getReg();
1028 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
1029 if (SrcTy.getSizeInBits() != 128)
1030 break;
1031 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass
1032 ? PMI_FirstGPR
1033 : PMI_FirstFPR;
1034 OpRegBankIdx[0] = Idx;
1035 OpRegBankIdx[1] = Idx;
1036 break;
1037 }
1038 case TargetOpcode::G_BUILD_VECTOR: {
1039 // If the first source operand belongs to a FPR register bank, then make
1040 // sure that we preserve that.
1041 if (OpRegBankIdx[1] != PMI_FirstGPR)
1042 break;
1043 Register VReg = MI.getOperand(1).getReg();
1044 if (!VReg)
1045 break;
1046
1047 // Get the instruction that defined the source operand reg, and check if
1048 // it's a floating point operation. Or, if it's a type like s16 which
1049 // doesn't have a exact size gpr register class. The exception is if the
1050 // build_vector has all constant operands, which may be better to leave as
1051 // gpr without copies, so it can be matched in imported patterns.
1052 MachineInstr *DefMI = MRI.getVRegDef(VReg);
1053 unsigned DefOpc = DefMI->getOpcode();
1054 const LLT SrcTy = MRI.getType(VReg);
1055 if (all_of(MI.operands(), [&](const MachineOperand &Op) {
1056 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() ==
1057 TargetOpcode::G_CONSTANT;
1058 }))
1059 break;
1060 if (isPreISelGenericFloatingPointOpcode(DefOpc) ||
1061 SrcTy.getSizeInBits() < 32 ||
1062 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) {
1063 // Have a floating point op.
1064 // Make sure every operand gets mapped to a FPR register class.
1065 unsigned NumOperands = MI.getNumOperands();
1066 for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
1067 OpRegBankIdx[Idx] = PMI_FirstFPR;
1068 }
1069 break;
1070 }
1071 case TargetOpcode::G_VECREDUCE_FADD:
1072 case TargetOpcode::G_VECREDUCE_FMUL:
1073 case TargetOpcode::G_VECREDUCE_FMAX:
1074 case TargetOpcode::G_VECREDUCE_FMIN:
1075 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1076 case TargetOpcode::G_VECREDUCE_FMINIMUM:
1077 case TargetOpcode::G_VECREDUCE_ADD:
1078 case TargetOpcode::G_VECREDUCE_MUL:
1079 case TargetOpcode::G_VECREDUCE_AND:
1080 case TargetOpcode::G_VECREDUCE_OR:
1081 case TargetOpcode::G_VECREDUCE_XOR:
1082 case TargetOpcode::G_VECREDUCE_SMAX:
1083 case TargetOpcode::G_VECREDUCE_SMIN:
1084 case TargetOpcode::G_VECREDUCE_UMAX:
1085 case TargetOpcode::G_VECREDUCE_UMIN:
1086 // Reductions produce a scalar value from a vector, the scalar should be on
1087 // FPR bank.
1088 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
1089 break;
1090 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
1091 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
1092 // These reductions also take a scalar accumulator input.
1093 // Assign them FPR for now.
1094 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR};
1095 break;
1096 case TargetOpcode::G_INTRINSIC:
1097 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
1098 // Check if we know that the intrinsic has any constraints on its register
1099 // banks. If it does, then update the mapping accordingly.
1100 unsigned Idx = 0;
1101 if (onlyDefinesFP(MI, MRI, TRI))
1102 for (const auto &Op : MI.defs()) {
1103 if (Op.isReg())
1104 OpRegBankIdx[Idx] = PMI_FirstFPR;
1105 ++Idx;
1106 }
1107 else
1108 Idx += MI.getNumExplicitDefs();
1109
1110 if (onlyUsesFP(MI, MRI, TRI))
1111 for (const auto &Op : MI.explicit_uses()) {
1112 if (Op.isReg())
1113 OpRegBankIdx[Idx] = PMI_FirstFPR;
1114 ++Idx;
1115 }
1116 break;
1117 }
1118 case TargetOpcode::G_LROUND:
1119 case TargetOpcode::G_LLROUND: {
1120 // Source is always floating point and destination is always integer.
1121 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1122 break;
1123 }
1124 }
1125
1126 // Finally construct the computed mapping.
1127 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
1128 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
1129 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) {
1130 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg());
1131 if (!Ty.isValid())
1132 continue;
1133 auto Mapping =
1134 getValueMapping(OpRegBankIdx[Idx], TypeSize::getFixed(OpSize[Idx]));
1135 if (!Mapping->isValid())
1136 return getInvalidInstructionMapping();
1137
1138 OpdsMapping[Idx] = Mapping;
1139 }
1140 }
1141
1142 return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping),
1143 NumOperands);
1144 }
1145