1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64RegisterInfo.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Utils.h" 21 #include "llvm/CodeGen/LowLevelTypeUtils.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterBank.h" 27 #include "llvm/CodeGen/RegisterBankInfo.h" 28 #include "llvm/CodeGen/TargetOpcodes.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/IntrinsicsAArch64.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/Threading.h" 34 #include <algorithm> 35 #include <cassert> 36 37 #define GET_TARGET_REGBANK_IMPL 38 #include "AArch64GenRegisterBank.inc" 39 40 // This file will be TableGen'ed at some point. 41 #include "AArch64GenRegisterBankInfo.def" 42 43 using namespace llvm; 44 45 AArch64RegisterBankInfo::AArch64RegisterBankInfo( 46 const TargetRegisterInfo &TRI) { 47 static llvm::once_flag InitializeRegisterBankFlag; 48 49 static auto InitializeRegisterBankOnce = [&]() { 50 // We have only one set of register banks, whatever the subtarget 51 // is. Therefore, the initialization of the RegBanks table should be 52 // done only once. Indeed the table of all register banks 53 // (AArch64::RegBanks) is unique in the compiler. At some point, it 54 // will get tablegen'ed and the whole constructor becomes empty. 55 56 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 57 (void)RBGPR; 58 assert(&AArch64::GPRRegBank == &RBGPR && 59 "The order in RegBanks is messed up"); 60 61 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 62 (void)RBFPR; 63 assert(&AArch64::FPRRegBank == &RBFPR && 64 "The order in RegBanks is messed up"); 65 66 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 67 (void)RBCCR; 68 assert(&AArch64::CCRegBank == &RBCCR && 69 "The order in RegBanks is messed up"); 70 71 // The GPR register bank is fully defined by all the registers in 72 // GR64all + its subclasses. 73 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 74 "Subclass not added?"); 75 assert(getMaximumSize(RBGPR.getID()) == 128 && 76 "GPRs should hold up to 128-bit"); 77 78 // The FPR register bank is fully defined by all the registers in 79 // GR64all + its subclasses. 80 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 81 "Subclass not added?"); 82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 83 "Subclass not added?"); 84 assert(getMaximumSize(RBFPR.getID()) == 512 && 85 "FPRs should hold up to 512-bit via QQQQ sequence"); 86 87 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 88 "Class not added?"); 89 assert(getMaximumSize(RBCCR.getID()) == 32 && 90 "CCR should hold up to 32-bit"); 91 92 // Check that the TableGen'ed like file is in sync we our expectations. 93 // First, the Idx. 94 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 95 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 96 "PartialMappingIdx's are incorrectly ordered"); 97 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 98 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 99 PMI_FPR256, PMI_FPR512}) && 100 "PartialMappingIdx's are incorrectly ordered"); 101 // Now, the content. 102 // Check partial mapping. 103 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 104 do { \ 105 assert( \ 106 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 107 #Idx " is incorrectly initialized"); \ 108 } while (false) 109 110 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 111 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 112 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 113 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 114 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 116 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 117 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 118 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 119 120 // Check value mapping. 121 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 122 do { \ 123 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 124 PartialMappingIdx::PMI_First##RBName, Size, \ 125 Offset) && \ 126 #RBName #Size " " #Offset " is incorrectly initialized"); \ 127 } while (false) 128 129 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 130 131 CHECK_VALUEMAP(GPR, 32); 132 CHECK_VALUEMAP(GPR, 64); 133 CHECK_VALUEMAP(GPR, 128); 134 CHECK_VALUEMAP(FPR, 16); 135 CHECK_VALUEMAP(FPR, 32); 136 CHECK_VALUEMAP(FPR, 64); 137 CHECK_VALUEMAP(FPR, 128); 138 CHECK_VALUEMAP(FPR, 256); 139 CHECK_VALUEMAP(FPR, 512); 140 141 // Check the value mapping for 3-operands instructions where all the operands 142 // map to the same value mapping. 143 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 144 do { \ 145 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 146 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 147 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 148 } while (false) 149 150 CHECK_VALUEMAP_3OPS(GPR, 32); 151 CHECK_VALUEMAP_3OPS(GPR, 64); 152 CHECK_VALUEMAP_3OPS(GPR, 128); 153 CHECK_VALUEMAP_3OPS(FPR, 32); 154 CHECK_VALUEMAP_3OPS(FPR, 64); 155 CHECK_VALUEMAP_3OPS(FPR, 128); 156 CHECK_VALUEMAP_3OPS(FPR, 256); 157 CHECK_VALUEMAP_3OPS(FPR, 512); 158 159 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 160 do { \ 161 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 162 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 163 (void)PartialMapDstIdx; \ 164 (void)PartialMapSrcIdx; \ 165 const ValueMapping *Map = getCopyMapping( \ 166 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ 167 (void)Map; \ 168 assert(Map[0].BreakDown == \ 169 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 170 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ 171 " Dst is incorrectly initialized"); \ 172 assert(Map[1].BreakDown == \ 173 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 174 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ 175 " Src is incorrectly initialized"); \ 176 \ 177 } while (false) 178 179 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 180 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 183 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 184 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 187 188 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 189 do { \ 190 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 191 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 192 (void)PartialMapDstIdx; \ 193 (void)PartialMapSrcIdx; \ 194 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 195 (void)Map; \ 196 assert(Map[0].BreakDown == \ 197 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 198 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 199 " Dst is incorrectly initialized"); \ 200 assert(Map[1].BreakDown == \ 201 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 202 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 203 " Src is incorrectly initialized"); \ 204 \ 205 } while (false) 206 207 CHECK_VALUEMAP_FPEXT(32, 16); 208 CHECK_VALUEMAP_FPEXT(64, 16); 209 CHECK_VALUEMAP_FPEXT(64, 32); 210 CHECK_VALUEMAP_FPEXT(128, 64); 211 212 assert(verify(TRI) && "Invalid register bank information"); 213 }; 214 215 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 216 } 217 218 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 219 const RegisterBank &B, 220 TypeSize Size) const { 221 // What do we do with different size? 222 // copy are same size. 223 // Will introduce other hooks for different size: 224 // * extract cost. 225 // * build_sequence cost. 226 227 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 228 // FIXME: This should be deduced from the scheduling model. 229 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 230 // FMOVXDr or FMOVWSr. 231 return 5; 232 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 233 // FMOVDXr or FMOVSWr. 234 return 4; 235 236 return RegisterBankInfo::copyCost(A, B, Size); 237 } 238 239 const RegisterBank & 240 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 241 LLT) const { 242 switch (RC.getID()) { 243 case AArch64::FPR8RegClassID: 244 case AArch64::FPR16RegClassID: 245 case AArch64::FPR16_loRegClassID: 246 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 247 case AArch64::FPR32RegClassID: 248 case AArch64::FPR64RegClassID: 249 case AArch64::FPR128RegClassID: 250 case AArch64::FPR64_loRegClassID: 251 case AArch64::FPR128_loRegClassID: 252 case AArch64::FPR128_0to7RegClassID: 253 case AArch64::DDRegClassID: 254 case AArch64::DDDRegClassID: 255 case AArch64::DDDDRegClassID: 256 case AArch64::QQRegClassID: 257 case AArch64::QQQRegClassID: 258 case AArch64::QQQQRegClassID: 259 return getRegBank(AArch64::FPRRegBankID); 260 case AArch64::GPR32commonRegClassID: 261 case AArch64::GPR32RegClassID: 262 case AArch64::GPR32spRegClassID: 263 case AArch64::GPR32sponlyRegClassID: 264 case AArch64::GPR32argRegClassID: 265 case AArch64::GPR32allRegClassID: 266 case AArch64::GPR64commonRegClassID: 267 case AArch64::GPR64RegClassID: 268 case AArch64::GPR64spRegClassID: 269 case AArch64::GPR64sponlyRegClassID: 270 case AArch64::GPR64argRegClassID: 271 case AArch64::GPR64allRegClassID: 272 case AArch64::GPR64noipRegClassID: 273 case AArch64::GPR64common_and_GPR64noipRegClassID: 274 case AArch64::GPR64noip_and_tcGPR64RegClassID: 275 case AArch64::tcGPR64RegClassID: 276 case AArch64::rtcGPR64RegClassID: 277 case AArch64::WSeqPairsClassRegClassID: 278 case AArch64::XSeqPairsClassRegClassID: 279 case AArch64::MatrixIndexGPR32_8_11RegClassID: 280 case AArch64::MatrixIndexGPR32_12_15RegClassID: 281 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID: 282 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: 283 return getRegBank(AArch64::GPRRegBankID); 284 case AArch64::CCRRegClassID: 285 return getRegBank(AArch64::CCRegBankID); 286 default: 287 llvm_unreachable("Register class not supported"); 288 } 289 } 290 291 RegisterBankInfo::InstructionMappings 292 AArch64RegisterBankInfo::getInstrAlternativeMappings( 293 const MachineInstr &MI) const { 294 const MachineFunction &MF = *MI.getParent()->getParent(); 295 const TargetSubtargetInfo &STI = MF.getSubtarget(); 296 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 297 const MachineRegisterInfo &MRI = MF.getRegInfo(); 298 299 switch (MI.getOpcode()) { 300 case TargetOpcode::G_OR: { 301 // 32 and 64-bit or can be mapped on either FPR or 302 // GPR for the same cost. 303 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 304 if (Size != 32 && Size != 64) 305 break; 306 307 // If the instruction has any implicit-defs or uses, 308 // do not mess with it. 309 if (MI.getNumOperands() != 3) 310 break; 311 InstructionMappings AltMappings; 312 const InstructionMapping &GPRMapping = getInstructionMapping( 313 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 314 /*NumOperands*/ 3); 315 const InstructionMapping &FPRMapping = getInstructionMapping( 316 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 317 /*NumOperands*/ 3); 318 319 AltMappings.push_back(&GPRMapping); 320 AltMappings.push_back(&FPRMapping); 321 return AltMappings; 322 } 323 case TargetOpcode::G_BITCAST: { 324 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 325 if (Size != 32 && Size != 64) 326 break; 327 328 // If the instruction has any implicit-defs or uses, 329 // do not mess with it. 330 if (MI.getNumOperands() != 2) 331 break; 332 333 InstructionMappings AltMappings; 334 const InstructionMapping &GPRMapping = getInstructionMapping( 335 /*ID*/ 1, /*Cost*/ 1, 336 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 337 /*NumOperands*/ 2); 338 const InstructionMapping &FPRMapping = getInstructionMapping( 339 /*ID*/ 2, /*Cost*/ 1, 340 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 341 /*NumOperands*/ 2); 342 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 343 /*ID*/ 3, 344 /*Cost*/ 345 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 346 TypeSize::getFixed(Size)), 347 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 348 /*NumOperands*/ 2); 349 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 350 /*ID*/ 3, 351 /*Cost*/ 352 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 353 TypeSize::getFixed(Size)), 354 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 355 /*NumOperands*/ 2); 356 357 AltMappings.push_back(&GPRMapping); 358 AltMappings.push_back(&FPRMapping); 359 AltMappings.push_back(&GPRToFPRMapping); 360 AltMappings.push_back(&FPRToGPRMapping); 361 return AltMappings; 362 } 363 case TargetOpcode::G_LOAD: { 364 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 365 if (Size != 64) 366 break; 367 368 // If the instruction has any implicit-defs or uses, 369 // do not mess with it. 370 if (MI.getNumOperands() != 2) 371 break; 372 373 InstructionMappings AltMappings; 374 const InstructionMapping &GPRMapping = getInstructionMapping( 375 /*ID*/ 1, /*Cost*/ 1, 376 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), 377 // Addresses are GPR 64-bit. 378 getValueMapping(PMI_FirstGPR, 64)}), 379 /*NumOperands*/ 2); 380 const InstructionMapping &FPRMapping = getInstructionMapping( 381 /*ID*/ 2, /*Cost*/ 1, 382 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), 383 // Addresses are GPR 64-bit. 384 getValueMapping(PMI_FirstGPR, 64)}), 385 /*NumOperands*/ 2); 386 387 AltMappings.push_back(&GPRMapping); 388 AltMappings.push_back(&FPRMapping); 389 return AltMappings; 390 } 391 default: 392 break; 393 } 394 return RegisterBankInfo::getInstrAlternativeMappings(MI); 395 } 396 397 void AArch64RegisterBankInfo::applyMappingImpl( 398 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { 399 switch (OpdMapper.getMI().getOpcode()) { 400 case TargetOpcode::G_OR: 401 case TargetOpcode::G_BITCAST: 402 case TargetOpcode::G_LOAD: 403 // Those ID must match getInstrAlternativeMappings. 404 assert((OpdMapper.getInstrMapping().getID() >= 1 && 405 OpdMapper.getInstrMapping().getID() <= 4) && 406 "Don't know how to handle that ID"); 407 return applyDefaultMapping(OpdMapper); 408 default: 409 llvm_unreachable("Don't know how to handle that operation"); 410 } 411 } 412 413 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, 414 /// having only floating-point operands. 415 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { 416 switch (Opc) { 417 case TargetOpcode::G_FADD: 418 case TargetOpcode::G_FSUB: 419 case TargetOpcode::G_FMUL: 420 case TargetOpcode::G_FMA: 421 case TargetOpcode::G_FDIV: 422 case TargetOpcode::G_FCONSTANT: 423 case TargetOpcode::G_FPEXT: 424 case TargetOpcode::G_FPTRUNC: 425 case TargetOpcode::G_FCEIL: 426 case TargetOpcode::G_FFLOOR: 427 case TargetOpcode::G_FNEARBYINT: 428 case TargetOpcode::G_FNEG: 429 case TargetOpcode::G_FCOS: 430 case TargetOpcode::G_FSIN: 431 case TargetOpcode::G_FLOG10: 432 case TargetOpcode::G_FLOG: 433 case TargetOpcode::G_FLOG2: 434 case TargetOpcode::G_FSQRT: 435 case TargetOpcode::G_FABS: 436 case TargetOpcode::G_FEXP: 437 case TargetOpcode::G_FRINT: 438 case TargetOpcode::G_INTRINSIC_TRUNC: 439 case TargetOpcode::G_INTRINSIC_ROUND: 440 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 441 case TargetOpcode::G_FMAXNUM: 442 case TargetOpcode::G_FMINNUM: 443 case TargetOpcode::G_FMAXIMUM: 444 case TargetOpcode::G_FMINIMUM: 445 return true; 446 } 447 return false; 448 } 449 450 const RegisterBankInfo::InstructionMapping & 451 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 452 const MachineInstr &MI) const { 453 const unsigned Opc = MI.getOpcode(); 454 const MachineFunction &MF = *MI.getParent()->getParent(); 455 const MachineRegisterInfo &MRI = MF.getRegInfo(); 456 457 unsigned NumOperands = MI.getNumOperands(); 458 assert(NumOperands <= 3 && 459 "This code is for instructions with 3 or less operands"); 460 461 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 462 unsigned Size = Ty.getSizeInBits(); 463 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 464 465 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 466 467 #ifndef NDEBUG 468 // Make sure all the operands are using similar size and type. 469 // Should probably be checked by the machine verifier. 470 // This code won't catch cases where the number of lanes is 471 // different between the operands. 472 // If we want to go to that level of details, it is probably 473 // best to check that the types are the same, period. 474 // Currently, we just check that the register banks are the same 475 // for each types. 476 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 477 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 478 assert( 479 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 480 RBIdx, OpTy.getSizeInBits()) == 481 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 482 "Operand has incompatible size"); 483 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 484 (void)OpIsFPR; 485 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 486 } 487 #endif // End NDEBUG. 488 489 return getInstructionMapping(DefaultMappingID, 1, 490 getValueMapping(RBIdx, Size), NumOperands); 491 } 492 493 /// \returns true if a given intrinsic only uses and defines FPRs. 494 static bool isFPIntrinsic(const MachineRegisterInfo &MRI, 495 const MachineInstr &MI) { 496 // TODO: Add more intrinsics. 497 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 498 default: 499 return false; 500 case Intrinsic::aarch64_neon_uaddlv: 501 case Intrinsic::aarch64_neon_uaddv: 502 case Intrinsic::aarch64_neon_saddv: 503 case Intrinsic::aarch64_neon_umaxv: 504 case Intrinsic::aarch64_neon_smaxv: 505 case Intrinsic::aarch64_neon_uminv: 506 case Intrinsic::aarch64_neon_sminv: 507 case Intrinsic::aarch64_neon_faddv: 508 case Intrinsic::aarch64_neon_fmaxv: 509 case Intrinsic::aarch64_neon_fminv: 510 case Intrinsic::aarch64_neon_fmaxnmv: 511 case Intrinsic::aarch64_neon_fminnmv: 512 return true; 513 case Intrinsic::aarch64_neon_saddlv: { 514 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 515 return SrcTy.getElementType().getSizeInBits() >= 16 && 516 SrcTy.getElementCount().getFixedValue() >= 4; 517 } 518 } 519 } 520 521 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 522 const MachineRegisterInfo &MRI, 523 const TargetRegisterInfo &TRI, 524 unsigned Depth) const { 525 unsigned Op = MI.getOpcode(); 526 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) 527 return true; 528 529 // Do we have an explicit floating point instruction? 530 if (isPreISelGenericFloatingPointOpcode(Op)) 531 return true; 532 533 // No. Check if we have a copy-like instruction. If we do, then we could 534 // still be fed by floating point instructions. 535 if (Op != TargetOpcode::COPY && !MI.isPHI() && 536 !isPreISelGenericOptimizationHint(Op)) 537 return false; 538 539 // Check if we already know the register bank. 540 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 541 if (RB == &AArch64::FPRRegBank) 542 return true; 543 if (RB == &AArch64::GPRRegBank) 544 return false; 545 546 // We don't know anything. 547 // 548 // If we have a phi, we may be able to infer that it will be assigned a FPR 549 // based off of its inputs. 550 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 551 return false; 552 553 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 554 return Op.isReg() && 555 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 556 }); 557 } 558 559 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 560 const MachineRegisterInfo &MRI, 561 const TargetRegisterInfo &TRI, 562 unsigned Depth) const { 563 switch (MI.getOpcode()) { 564 case TargetOpcode::G_FPTOSI: 565 case TargetOpcode::G_FPTOUI: 566 case TargetOpcode::G_FCMP: 567 case TargetOpcode::G_LROUND: 568 case TargetOpcode::G_LLROUND: 569 return true; 570 default: 571 break; 572 } 573 return hasFPConstraints(MI, MRI, TRI, Depth); 574 } 575 576 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 577 const MachineRegisterInfo &MRI, 578 const TargetRegisterInfo &TRI, 579 unsigned Depth) const { 580 switch (MI.getOpcode()) { 581 case AArch64::G_DUP: 582 case TargetOpcode::G_SITOFP: 583 case TargetOpcode::G_UITOFP: 584 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 585 case TargetOpcode::G_INSERT_VECTOR_ELT: 586 case TargetOpcode::G_BUILD_VECTOR: 587 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 588 return true; 589 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 590 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 591 case Intrinsic::aarch64_neon_ld1x2: 592 case Intrinsic::aarch64_neon_ld1x3: 593 case Intrinsic::aarch64_neon_ld1x4: 594 case Intrinsic::aarch64_neon_ld2: 595 case Intrinsic::aarch64_neon_ld2lane: 596 case Intrinsic::aarch64_neon_ld2r: 597 case Intrinsic::aarch64_neon_ld3: 598 case Intrinsic::aarch64_neon_ld3lane: 599 case Intrinsic::aarch64_neon_ld3r: 600 case Intrinsic::aarch64_neon_ld4: 601 case Intrinsic::aarch64_neon_ld4lane: 602 case Intrinsic::aarch64_neon_ld4r: 603 return true; 604 default: 605 break; 606 } 607 break; 608 default: 609 break; 610 } 611 return hasFPConstraints(MI, MRI, TRI, Depth); 612 } 613 614 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { 615 // GMemOperation because we also want to match indexed loads. 616 auto *MemOp = cast<GMemOperation>(&MI); 617 const Value *LdVal = MemOp->getMMO().getValue(); 618 if (!LdVal) 619 return false; 620 621 Type *EltTy = nullptr; 622 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) { 623 EltTy = GV->getValueType(); 624 // Look at the first element of the struct to determine the type we are 625 // loading 626 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) { 627 if (StructEltTy->getNumElements() == 0) 628 break; 629 EltTy = StructEltTy->getTypeAtIndex(0U); 630 } 631 // Look at the first element of the array to determine its type 632 if (isa<ArrayType>(EltTy)) 633 EltTy = EltTy->getArrayElementType(); 634 } else { 635 // FIXME: grubbing around uses is pretty ugly, but with no more 636 // `getPointerElementType` there's not much else we can do. 637 for (const auto *LdUser : LdVal->users()) { 638 if (isa<LoadInst>(LdUser)) { 639 EltTy = LdUser->getType(); 640 break; 641 } 642 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) { 643 EltTy = LdUser->getOperand(0)->getType(); 644 break; 645 } 646 } 647 } 648 return EltTy && EltTy->isFPOrFPVectorTy(); 649 } 650 651 const RegisterBankInfo::InstructionMapping & 652 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 653 const unsigned Opc = MI.getOpcode(); 654 655 // Try the default logic for non-generic instructions that are either copies 656 // or already have some operands assigned to banks. 657 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 658 Opc == TargetOpcode::G_PHI) { 659 const RegisterBankInfo::InstructionMapping &Mapping = 660 getInstrMappingImpl(MI); 661 if (Mapping.isValid()) 662 return Mapping; 663 } 664 665 const MachineFunction &MF = *MI.getParent()->getParent(); 666 const MachineRegisterInfo &MRI = MF.getRegInfo(); 667 const TargetSubtargetInfo &STI = MF.getSubtarget(); 668 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 669 670 switch (Opc) { 671 // G_{F|S|U}REM are not listed because they are not legal. 672 // Arithmetic ops. 673 case TargetOpcode::G_ADD: 674 case TargetOpcode::G_SUB: 675 case TargetOpcode::G_PTR_ADD: 676 case TargetOpcode::G_MUL: 677 case TargetOpcode::G_SDIV: 678 case TargetOpcode::G_UDIV: 679 // Bitwise ops. 680 case TargetOpcode::G_AND: 681 case TargetOpcode::G_OR: 682 case TargetOpcode::G_XOR: 683 // Floating point ops. 684 case TargetOpcode::G_FADD: 685 case TargetOpcode::G_FSUB: 686 case TargetOpcode::G_FMUL: 687 case TargetOpcode::G_FDIV: 688 case TargetOpcode::G_FMAXIMUM: 689 case TargetOpcode::G_FMINIMUM: 690 return getSameKindOfOperandsMapping(MI); 691 case TargetOpcode::G_FPEXT: { 692 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 693 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 694 return getInstructionMapping( 695 DefaultMappingID, /*Cost*/ 1, 696 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 697 /*NumOperands*/ 2); 698 } 699 // Shifts. 700 case TargetOpcode::G_SHL: 701 case TargetOpcode::G_LSHR: 702 case TargetOpcode::G_ASHR: { 703 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 704 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 705 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 706 return getInstructionMapping(DefaultMappingID, 1, 707 &ValMappings[Shift64Imm], 3); 708 return getSameKindOfOperandsMapping(MI); 709 } 710 case TargetOpcode::COPY: { 711 Register DstReg = MI.getOperand(0).getReg(); 712 Register SrcReg = MI.getOperand(1).getReg(); 713 // Check if one of the register is not a generic register. 714 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) || 715 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) { 716 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 717 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 718 if (!DstRB) 719 DstRB = SrcRB; 720 else if (!SrcRB) 721 SrcRB = DstRB; 722 // If both RB are null that means both registers are generic. 723 // We shouldn't be here. 724 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 725 unsigned Size = getSizeInBits(DstReg, MRI, TRI); 726 return getInstructionMapping( 727 DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)), 728 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 729 // We only care about the mapping of the destination. 730 /*NumOperands*/ 1); 731 } 732 // Both registers are generic, use G_BITCAST. 733 [[fallthrough]]; 734 } 735 case TargetOpcode::G_BITCAST: { 736 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 737 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 738 unsigned Size = DstTy.getSizeInBits(); 739 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 740 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 741 const RegisterBank &DstRB = 742 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 743 const RegisterBank &SrcRB = 744 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 745 return getInstructionMapping( 746 DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)), 747 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 748 // We only care about the mapping of the destination for COPY. 749 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 750 } 751 default: 752 break; 753 } 754 755 unsigned NumOperands = MI.getNumOperands(); 756 757 // Track the size and bank of each register. We don't do partial mappings. 758 SmallVector<unsigned, 4> OpSize(NumOperands); 759 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 760 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 761 auto &MO = MI.getOperand(Idx); 762 if (!MO.isReg() || !MO.getReg()) 763 continue; 764 765 LLT Ty = MRI.getType(MO.getReg()); 766 if (!Ty.isValid()) 767 continue; 768 OpSize[Idx] = Ty.getSizeInBits(); 769 770 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. 771 // For floating-point instructions, scalars go in FPRs. 772 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || 773 Ty.getSizeInBits() > 64) 774 OpRegBankIdx[Idx] = PMI_FirstFPR; 775 else 776 OpRegBankIdx[Idx] = PMI_FirstGPR; 777 } 778 779 unsigned Cost = 1; 780 // Some of the floating-point instructions have mixed GPR and FPR operands: 781 // fine-tune the computed mapping. 782 switch (Opc) { 783 case AArch64::G_DUP: { 784 Register ScalarReg = MI.getOperand(1).getReg(); 785 LLT ScalarTy = MRI.getType(ScalarReg); 786 auto ScalarDef = MRI.getVRegDef(ScalarReg); 787 // We want to select dup(load) into LD1R. 788 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD) 789 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 790 // s8 is an exception for G_DUP, which we always want on gpr. 791 else if (ScalarTy.getSizeInBits() != 8 && 792 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 793 onlyDefinesFP(*ScalarDef, MRI, TRI))) 794 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 795 else 796 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 797 break; 798 } 799 case TargetOpcode::G_TRUNC: { 800 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 801 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 802 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 803 break; 804 } 805 case TargetOpcode::G_SITOFP: 806 case TargetOpcode::G_UITOFP: { 807 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 808 break; 809 // Integer to FP conversions don't necessarily happen between GPR -> FPR 810 // regbanks. They can also be done within an FPR register. 811 Register SrcReg = MI.getOperand(1).getReg(); 812 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 813 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 814 else 815 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 816 break; 817 } 818 case TargetOpcode::G_FPTOSI: 819 case TargetOpcode::G_FPTOUI: 820 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 821 break; 822 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 823 break; 824 case TargetOpcode::G_FCMP: { 825 // If the result is a vector, it must use a FPR. 826 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 827 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 828 : PMI_FirstGPR; 829 OpRegBankIdx = {Idx0, 830 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 831 break; 832 } 833 case TargetOpcode::G_BITCAST: 834 // This is going to be a cross register bank copy and this is expensive. 835 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 836 Cost = copyCost( 837 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 838 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 839 TypeSize::getFixed(OpSize[0])); 840 break; 841 case TargetOpcode::G_LOAD: { 842 // Loading in vector unit is slightly more expensive. 843 // This is actually only true for the LD1R and co instructions, 844 // but anyway for the fast mode this number does not matter and 845 // for the greedy mode the cost of the cross bank copy will 846 // offset this number. 847 // FIXME: Should be derived from the scheduling model. 848 if (OpRegBankIdx[0] != PMI_FirstGPR) { 849 Cost = 2; 850 break; 851 } 852 853 if (cast<GLoad>(MI).isAtomic()) { 854 // Atomics always use GPR destinations. Don't refine any further. 855 OpRegBankIdx[0] = PMI_FirstGPR; 856 break; 857 } 858 859 // Try to guess the type of the load from the MMO. 860 if (isLoadFromFPType(MI)) { 861 OpRegBankIdx[0] = PMI_FirstFPR; 862 break; 863 } 864 865 // Check if that load feeds fp instructions. 866 // In that case, we want the default mapping to be on FPR 867 // instead of blind map every scalar to GPR. 868 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 869 [&](const MachineInstr &UseMI) { 870 // If we have at least one direct use in a FP instruction, 871 // assume this was a floating point load in the IR. If it was 872 // not, we would have had a bitcast before reaching that 873 // instruction. 874 // 875 // Int->FP conversion operations are also captured in 876 // onlyDefinesFP(). 877 return onlyUsesFP(UseMI, MRI, TRI) || 878 onlyDefinesFP(UseMI, MRI, TRI); 879 })) 880 OpRegBankIdx[0] = PMI_FirstFPR; 881 break; 882 } 883 case TargetOpcode::G_STORE: 884 // Check if that store is fed by fp instructions. 885 if (OpRegBankIdx[0] == PMI_FirstGPR) { 886 Register VReg = MI.getOperand(0).getReg(); 887 if (!VReg) 888 break; 889 MachineInstr *DefMI = MRI.getVRegDef(VReg); 890 if (onlyDefinesFP(*DefMI, MRI, TRI)) 891 OpRegBankIdx[0] = PMI_FirstFPR; 892 break; 893 } 894 break; 895 case TargetOpcode::G_INDEXED_STORE: 896 if (OpRegBankIdx[1] == PMI_FirstGPR) { 897 Register VReg = MI.getOperand(1).getReg(); 898 if (!VReg) 899 break; 900 MachineInstr *DefMI = MRI.getVRegDef(VReg); 901 if (onlyDefinesFP(*DefMI, MRI, TRI)) 902 OpRegBankIdx[1] = PMI_FirstFPR; 903 break; 904 } 905 break; 906 case TargetOpcode::G_INDEXED_SEXTLOAD: 907 case TargetOpcode::G_INDEXED_ZEXTLOAD: 908 // These should always be GPR. 909 OpRegBankIdx[0] = PMI_FirstGPR; 910 break; 911 case TargetOpcode::G_INDEXED_LOAD: { 912 if (isLoadFromFPType(MI)) 913 OpRegBankIdx[0] = PMI_FirstFPR; 914 break; 915 } 916 case TargetOpcode::G_SELECT: { 917 // If the destination is FPR, preserve that. 918 if (OpRegBankIdx[0] != PMI_FirstGPR) 919 break; 920 921 // If we're taking in vectors, we have no choice but to put everything on 922 // FPRs, except for the condition. The condition must always be on a GPR. 923 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 924 if (SrcTy.isVector()) { 925 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 926 break; 927 } 928 929 // Try to minimize the number of copies. If we have more floating point 930 // constrained values than not, then we'll put everything on FPR. Otherwise, 931 // everything has to be on GPR. 932 unsigned NumFP = 0; 933 934 // Check if the uses of the result always produce floating point values. 935 // 936 // For example: 937 // 938 // %z = G_SELECT %cond %x %y 939 // fpr = G_FOO %z ... 940 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 941 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 942 ++NumFP; 943 944 // Check if the defs of the source values always produce floating point 945 // values. 946 // 947 // For example: 948 // 949 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 950 // %z = G_SELECT %cond %x %y 951 // 952 // Also check whether or not the sources have already been decided to be 953 // FPR. Keep track of this. 954 // 955 // This doesn't check the condition, since it's just whatever is in NZCV. 956 // This isn't passed explicitly in a register to fcsel/csel. 957 for (unsigned Idx = 2; Idx < 4; ++Idx) { 958 Register VReg = MI.getOperand(Idx).getReg(); 959 MachineInstr *DefMI = MRI.getVRegDef(VReg); 960 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 961 onlyDefinesFP(*DefMI, MRI, TRI)) 962 ++NumFP; 963 } 964 965 // If we have more FP constraints than not, then move everything over to 966 // FPR. 967 if (NumFP >= 2) 968 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 969 970 break; 971 } 972 case TargetOpcode::G_UNMERGE_VALUES: { 973 // If the first operand belongs to a FPR register bank, then make sure that 974 // we preserve that. 975 if (OpRegBankIdx[0] != PMI_FirstGPR) 976 break; 977 978 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 979 // UNMERGE into scalars from a vector should always use FPR. 980 // Likewise if any of the uses are FP instructions. 981 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 982 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 983 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 984 // Set the register bank of every operand to FPR. 985 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 986 Idx < NumOperands; ++Idx) 987 OpRegBankIdx[Idx] = PMI_FirstFPR; 988 } 989 break; 990 } 991 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 992 // Destination and source need to be FPRs. 993 OpRegBankIdx[0] = PMI_FirstFPR; 994 OpRegBankIdx[1] = PMI_FirstFPR; 995 996 // Index needs to be a GPR. 997 OpRegBankIdx[2] = PMI_FirstGPR; 998 break; 999 case TargetOpcode::G_INSERT_VECTOR_ELT: 1000 OpRegBankIdx[0] = PMI_FirstFPR; 1001 OpRegBankIdx[1] = PMI_FirstFPR; 1002 1003 // The element may be either a GPR or FPR. Preserve that behaviour. 1004 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 1005 OpRegBankIdx[2] = PMI_FirstFPR; 1006 else 1007 OpRegBankIdx[2] = PMI_FirstGPR; 1008 1009 // Index needs to be a GPR. 1010 OpRegBankIdx[3] = PMI_FirstGPR; 1011 break; 1012 case TargetOpcode::G_EXTRACT: { 1013 // For s128 sources we have to use fpr unless we know otherwise. 1014 auto Src = MI.getOperand(1).getReg(); 1015 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 1016 if (SrcTy.getSizeInBits() != 128) 1017 break; 1018 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 1019 ? PMI_FirstGPR 1020 : PMI_FirstFPR; 1021 OpRegBankIdx[0] = Idx; 1022 OpRegBankIdx[1] = Idx; 1023 break; 1024 } 1025 case TargetOpcode::G_BUILD_VECTOR: { 1026 // If the first source operand belongs to a FPR register bank, then make 1027 // sure that we preserve that. 1028 if (OpRegBankIdx[1] != PMI_FirstGPR) 1029 break; 1030 Register VReg = MI.getOperand(1).getReg(); 1031 if (!VReg) 1032 break; 1033 1034 // Get the instruction that defined the source operand reg, and check if 1035 // it's a floating point operation. Or, if it's a type like s16 which 1036 // doesn't have a exact size gpr register class. The exception is if the 1037 // build_vector has all constant operands, which may be better to leave as 1038 // gpr without copies, so it can be matched in imported patterns. 1039 MachineInstr *DefMI = MRI.getVRegDef(VReg); 1040 unsigned DefOpc = DefMI->getOpcode(); 1041 const LLT SrcTy = MRI.getType(VReg); 1042 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 1043 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 1044 TargetOpcode::G_CONSTANT; 1045 })) 1046 break; 1047 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 1048 SrcTy.getSizeInBits() < 32 || 1049 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 1050 // Have a floating point op. 1051 // Make sure every operand gets mapped to a FPR register class. 1052 unsigned NumOperands = MI.getNumOperands(); 1053 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 1054 OpRegBankIdx[Idx] = PMI_FirstFPR; 1055 } 1056 break; 1057 } 1058 case TargetOpcode::G_VECREDUCE_FADD: 1059 case TargetOpcode::G_VECREDUCE_FMUL: 1060 case TargetOpcode::G_VECREDUCE_FMAX: 1061 case TargetOpcode::G_VECREDUCE_FMIN: 1062 case TargetOpcode::G_VECREDUCE_FMAXIMUM: 1063 case TargetOpcode::G_VECREDUCE_FMINIMUM: 1064 case TargetOpcode::G_VECREDUCE_ADD: 1065 case TargetOpcode::G_VECREDUCE_MUL: 1066 case TargetOpcode::G_VECREDUCE_AND: 1067 case TargetOpcode::G_VECREDUCE_OR: 1068 case TargetOpcode::G_VECREDUCE_XOR: 1069 case TargetOpcode::G_VECREDUCE_SMAX: 1070 case TargetOpcode::G_VECREDUCE_SMIN: 1071 case TargetOpcode::G_VECREDUCE_UMAX: 1072 case TargetOpcode::G_VECREDUCE_UMIN: 1073 // Reductions produce a scalar value from a vector, the scalar should be on 1074 // FPR bank. 1075 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 1076 break; 1077 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 1078 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 1079 // These reductions also take a scalar accumulator input. 1080 // Assign them FPR for now. 1081 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 1082 break; 1083 case TargetOpcode::G_INTRINSIC: 1084 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { 1085 // Check if we know that the intrinsic has any constraints on its register 1086 // banks. If it does, then update the mapping accordingly. 1087 unsigned Idx = 0; 1088 if (onlyDefinesFP(MI, MRI, TRI)) 1089 for (const auto &Op : MI.defs()) { 1090 if (Op.isReg()) 1091 OpRegBankIdx[Idx] = PMI_FirstFPR; 1092 ++Idx; 1093 } 1094 else 1095 Idx += MI.getNumExplicitDefs(); 1096 1097 if (onlyUsesFP(MI, MRI, TRI)) 1098 for (const auto &Op : MI.explicit_uses()) { 1099 if (Op.isReg()) 1100 OpRegBankIdx[Idx] = PMI_FirstFPR; 1101 ++Idx; 1102 } 1103 break; 1104 } 1105 case TargetOpcode::G_LROUND: 1106 case TargetOpcode::G_LLROUND: { 1107 // Source is always floating point and destination is always integer. 1108 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 1109 break; 1110 } 1111 } 1112 1113 // Finally construct the computed mapping. 1114 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 1115 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 1116 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 1117 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); 1118 if (!Ty.isValid()) 1119 continue; 1120 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); 1121 if (!Mapping->isValid()) 1122 return getInvalidInstructionMapping(); 1123 1124 OpdsMapping[Idx] = Mapping; 1125 } 1126 } 1127 1128 return getInstructionMapping(DefaultMappingID, Cost, 1129 getOperandsMapping(OpdsMapping), NumOperands); 1130 } 1131