1 //===- AArch64RegisterBankInfo.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the RegisterBankInfo class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterBankInfo.h" 15 #include "AArch64RegisterInfo.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Utils.h" 21 #include "llvm/CodeGen/LowLevelTypeUtils.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstr.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterBank.h" 27 #include "llvm/CodeGen/RegisterBankInfo.h" 28 #include "llvm/CodeGen/TargetOpcodes.h" 29 #include "llvm/CodeGen/TargetRegisterInfo.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/IntrinsicsAArch64.h" 32 #include "llvm/Support/ErrorHandling.h" 33 #include "llvm/Support/Threading.h" 34 #include <algorithm> 35 #include <cassert> 36 37 #define GET_TARGET_REGBANK_IMPL 38 #include "AArch64GenRegisterBank.inc" 39 40 // This file will be TableGen'ed at some point. 41 #include "AArch64GenRegisterBankInfo.def" 42 43 using namespace llvm; 44 45 AArch64RegisterBankInfo::AArch64RegisterBankInfo( 46 const TargetRegisterInfo &TRI) { 47 static llvm::once_flag InitializeRegisterBankFlag; 48 49 static auto InitializeRegisterBankOnce = [&]() { 50 // We have only one set of register banks, whatever the subtarget 51 // is. Therefore, the initialization of the RegBanks table should be 52 // done only once. Indeed the table of all register banks 53 // (AArch64::RegBanks) is unique in the compiler. At some point, it 54 // will get tablegen'ed and the whole constructor becomes empty. 55 56 const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID); 57 (void)RBGPR; 58 assert(&AArch64::GPRRegBank == &RBGPR && 59 "The order in RegBanks is messed up"); 60 61 const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID); 62 (void)RBFPR; 63 assert(&AArch64::FPRRegBank == &RBFPR && 64 "The order in RegBanks is messed up"); 65 66 const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); 67 (void)RBCCR; 68 assert(&AArch64::CCRegBank == &RBCCR && 69 "The order in RegBanks is messed up"); 70 71 // The GPR register bank is fully defined by all the registers in 72 // GR64all + its subclasses. 73 assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && 74 "Subclass not added?"); 75 assert(getMaximumSize(RBGPR.getID()) == 128 && 76 "GPRs should hold up to 128-bit"); 77 78 // The FPR register bank is fully defined by all the registers in 79 // GR64all + its subclasses. 80 assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) && 81 "Subclass not added?"); 82 assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) && 83 "Subclass not added?"); 84 assert(getMaximumSize(RBFPR.getID()) == 512 && 85 "FPRs should hold up to 512-bit via QQQQ sequence"); 86 87 assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) && 88 "Class not added?"); 89 assert(getMaximumSize(RBCCR.getID()) == 32 && 90 "CCR should hold up to 32-bit"); 91 92 // Check that the TableGen'ed like file is in sync we our expectations. 93 // First, the Idx. 94 assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, 95 {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && 96 "PartialMappingIdx's are incorrectly ordered"); 97 assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, 98 {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, 99 PMI_FPR256, PMI_FPR512}) && 100 "PartialMappingIdx's are incorrectly ordered"); 101 // Now, the content. 102 // Check partial mapping. 103 #define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \ 104 do { \ 105 assert( \ 106 checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \ 107 #Idx " is incorrectly initialized"); \ 108 } while (false) 109 110 CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); 111 CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); 112 CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); 113 CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); 114 CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); 115 CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); 116 CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR); 117 CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR); 118 CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR); 119 120 // Check value mapping. 121 #define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \ 122 do { \ 123 assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \ 124 PartialMappingIdx::PMI_First##RBName, Size, \ 125 Offset) && \ 126 #RBName #Size " " #Offset " is incorrectly initialized"); \ 127 } while (false) 128 129 #define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0) 130 131 CHECK_VALUEMAP(GPR, 32); 132 CHECK_VALUEMAP(GPR, 64); 133 CHECK_VALUEMAP(GPR, 128); 134 CHECK_VALUEMAP(FPR, 16); 135 CHECK_VALUEMAP(FPR, 32); 136 CHECK_VALUEMAP(FPR, 64); 137 CHECK_VALUEMAP(FPR, 128); 138 CHECK_VALUEMAP(FPR, 256); 139 CHECK_VALUEMAP(FPR, 512); 140 141 // Check the value mapping for 3-operands instructions where all the operands 142 // map to the same value mapping. 143 #define CHECK_VALUEMAP_3OPS(RBName, Size) \ 144 do { \ 145 CHECK_VALUEMAP_IMPL(RBName, Size, 0); \ 146 CHECK_VALUEMAP_IMPL(RBName, Size, 1); \ 147 CHECK_VALUEMAP_IMPL(RBName, Size, 2); \ 148 } while (false) 149 150 CHECK_VALUEMAP_3OPS(GPR, 32); 151 CHECK_VALUEMAP_3OPS(GPR, 64); 152 CHECK_VALUEMAP_3OPS(GPR, 128); 153 CHECK_VALUEMAP_3OPS(FPR, 32); 154 CHECK_VALUEMAP_3OPS(FPR, 64); 155 CHECK_VALUEMAP_3OPS(FPR, 128); 156 CHECK_VALUEMAP_3OPS(FPR, 256); 157 CHECK_VALUEMAP_3OPS(FPR, 512); 158 159 #define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \ 160 do { \ 161 unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \ 162 unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \ 163 (void)PartialMapDstIdx; \ 164 (void)PartialMapSrcIdx; \ 165 const ValueMapping *Map = getCopyMapping( \ 166 AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \ 167 (void)Map; \ 168 assert(Map[0].BreakDown == \ 169 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 170 Map[0].NumBreakDowns == 1 && #RBNameDst #Size \ 171 " Dst is incorrectly initialized"); \ 172 assert(Map[1].BreakDown == \ 173 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 174 Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \ 175 " Src is incorrectly initialized"); \ 176 \ 177 } while (false) 178 179 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32); 180 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32); 181 CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64); 182 CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64); 183 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32); 184 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32); 185 CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64); 186 CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64); 187 188 #define CHECK_VALUEMAP_FPEXT(DstSize, SrcSize) \ 189 do { \ 190 unsigned PartialMapDstIdx = PMI_FPR##DstSize - PMI_Min; \ 191 unsigned PartialMapSrcIdx = PMI_FPR##SrcSize - PMI_Min; \ 192 (void)PartialMapDstIdx; \ 193 (void)PartialMapSrcIdx; \ 194 const ValueMapping *Map = getFPExtMapping(DstSize, SrcSize); \ 195 (void)Map; \ 196 assert(Map[0].BreakDown == \ 197 &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \ 198 Map[0].NumBreakDowns == 1 && "FPR" #DstSize \ 199 " Dst is incorrectly initialized"); \ 200 assert(Map[1].BreakDown == \ 201 &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \ 202 Map[1].NumBreakDowns == 1 && "FPR" #SrcSize \ 203 " Src is incorrectly initialized"); \ 204 \ 205 } while (false) 206 207 CHECK_VALUEMAP_FPEXT(32, 16); 208 CHECK_VALUEMAP_FPEXT(64, 16); 209 CHECK_VALUEMAP_FPEXT(64, 32); 210 CHECK_VALUEMAP_FPEXT(128, 64); 211 212 assert(verify(TRI) && "Invalid register bank information"); 213 }; 214 215 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce); 216 } 217 218 unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A, 219 const RegisterBank &B, 220 TypeSize Size) const { 221 // What do we do with different size? 222 // copy are same size. 223 // Will introduce other hooks for different size: 224 // * extract cost. 225 // * build_sequence cost. 226 227 // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV. 228 // FIXME: This should be deduced from the scheduling model. 229 if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank) 230 // FMOVXDr or FMOVWSr. 231 return 5; 232 if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank) 233 // FMOVDXr or FMOVSWr. 234 return 4; 235 236 return RegisterBankInfo::copyCost(A, B, Size); 237 } 238 239 const RegisterBank & 240 AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, 241 LLT) const { 242 switch (RC.getID()) { 243 case AArch64::FPR8RegClassID: 244 case AArch64::FPR16RegClassID: 245 case AArch64::FPR16_loRegClassID: 246 case AArch64::FPR32_with_hsub_in_FPR16_loRegClassID: 247 case AArch64::FPR32RegClassID: 248 case AArch64::FPR64RegClassID: 249 case AArch64::FPR128RegClassID: 250 case AArch64::FPR64_loRegClassID: 251 case AArch64::FPR128_loRegClassID: 252 case AArch64::FPR128_0to7RegClassID: 253 case AArch64::DDRegClassID: 254 case AArch64::DDDRegClassID: 255 case AArch64::DDDDRegClassID: 256 case AArch64::QQRegClassID: 257 case AArch64::QQQRegClassID: 258 case AArch64::QQQQRegClassID: 259 return getRegBank(AArch64::FPRRegBankID); 260 case AArch64::GPR32commonRegClassID: 261 case AArch64::GPR32RegClassID: 262 case AArch64::GPR32spRegClassID: 263 case AArch64::GPR32sponlyRegClassID: 264 case AArch64::GPR32argRegClassID: 265 case AArch64::GPR32allRegClassID: 266 case AArch64::GPR64commonRegClassID: 267 case AArch64::GPR64RegClassID: 268 case AArch64::GPR64spRegClassID: 269 case AArch64::GPR64sponlyRegClassID: 270 case AArch64::GPR64argRegClassID: 271 case AArch64::GPR64allRegClassID: 272 case AArch64::GPR64noipRegClassID: 273 case AArch64::GPR64common_and_GPR64noipRegClassID: 274 case AArch64::GPR64noip_and_tcGPR64RegClassID: 275 case AArch64::tcGPR64RegClassID: 276 case AArch64::rtcGPR64RegClassID: 277 case AArch64::WSeqPairsClassRegClassID: 278 case AArch64::XSeqPairsClassRegClassID: 279 case AArch64::MatrixIndexGPR32_8_11RegClassID: 280 case AArch64::MatrixIndexGPR32_12_15RegClassID: 281 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_8_11RegClassID: 282 case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID: 283 return getRegBank(AArch64::GPRRegBankID); 284 case AArch64::CCRRegClassID: 285 return getRegBank(AArch64::CCRegBankID); 286 default: 287 llvm_unreachable("Register class not supported"); 288 } 289 } 290 291 RegisterBankInfo::InstructionMappings 292 AArch64RegisterBankInfo::getInstrAlternativeMappings( 293 const MachineInstr &MI) const { 294 const MachineFunction &MF = *MI.getParent()->getParent(); 295 const TargetSubtargetInfo &STI = MF.getSubtarget(); 296 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 297 const MachineRegisterInfo &MRI = MF.getRegInfo(); 298 299 switch (MI.getOpcode()) { 300 case TargetOpcode::G_OR: { 301 // 32 and 64-bit or can be mapped on either FPR or 302 // GPR for the same cost. 303 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 304 if (Size != 32 && Size != 64) 305 break; 306 307 // If the instruction has any implicit-defs or uses, 308 // do not mess with it. 309 if (MI.getNumOperands() != 3) 310 break; 311 InstructionMappings AltMappings; 312 const InstructionMapping &GPRMapping = getInstructionMapping( 313 /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), 314 /*NumOperands*/ 3); 315 const InstructionMapping &FPRMapping = getInstructionMapping( 316 /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), 317 /*NumOperands*/ 3); 318 319 AltMappings.push_back(&GPRMapping); 320 AltMappings.push_back(&FPRMapping); 321 return AltMappings; 322 } 323 case TargetOpcode::G_BITCAST: { 324 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 325 if (Size != 32 && Size != 64) 326 break; 327 328 // If the instruction has any implicit-defs or uses, 329 // do not mess with it. 330 if (MI.getNumOperands() != 2) 331 break; 332 333 InstructionMappings AltMappings; 334 const InstructionMapping &GPRMapping = getInstructionMapping( 335 /*ID*/ 1, /*Cost*/ 1, 336 getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), 337 /*NumOperands*/ 2); 338 const InstructionMapping &FPRMapping = getInstructionMapping( 339 /*ID*/ 2, /*Cost*/ 1, 340 getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), 341 /*NumOperands*/ 2); 342 const InstructionMapping &GPRToFPRMapping = getInstructionMapping( 343 /*ID*/ 3, 344 /*Cost*/ 345 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 346 TypeSize::getFixed(Size)), 347 getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), 348 /*NumOperands*/ 2); 349 const InstructionMapping &FPRToGPRMapping = getInstructionMapping( 350 /*ID*/ 3, 351 /*Cost*/ 352 copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, 353 TypeSize::getFixed(Size)), 354 getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), 355 /*NumOperands*/ 2); 356 357 AltMappings.push_back(&GPRMapping); 358 AltMappings.push_back(&FPRMapping); 359 AltMappings.push_back(&GPRToFPRMapping); 360 AltMappings.push_back(&FPRToGPRMapping); 361 return AltMappings; 362 } 363 case TargetOpcode::G_LOAD: { 364 unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); 365 if (Size != 64) 366 break; 367 368 // If the instruction has any implicit-defs or uses, 369 // do not mess with it. 370 if (MI.getNumOperands() != 2) 371 break; 372 373 InstructionMappings AltMappings; 374 const InstructionMapping &GPRMapping = getInstructionMapping( 375 /*ID*/ 1, /*Cost*/ 1, 376 getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), 377 // Addresses are GPR 64-bit. 378 getValueMapping(PMI_FirstGPR, 64)}), 379 /*NumOperands*/ 2); 380 const InstructionMapping &FPRMapping = getInstructionMapping( 381 /*ID*/ 2, /*Cost*/ 1, 382 getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), 383 // Addresses are GPR 64-bit. 384 getValueMapping(PMI_FirstGPR, 64)}), 385 /*NumOperands*/ 2); 386 387 AltMappings.push_back(&GPRMapping); 388 AltMappings.push_back(&FPRMapping); 389 return AltMappings; 390 } 391 default: 392 break; 393 } 394 return RegisterBankInfo::getInstrAlternativeMappings(MI); 395 } 396 397 void AArch64RegisterBankInfo::applyMappingImpl( 398 MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { 399 switch (OpdMapper.getMI().getOpcode()) { 400 case TargetOpcode::G_OR: 401 case TargetOpcode::G_BITCAST: 402 case TargetOpcode::G_LOAD: 403 // Those ID must match getInstrAlternativeMappings. 404 assert((OpdMapper.getInstrMapping().getID() >= 1 && 405 OpdMapper.getInstrMapping().getID() <= 4) && 406 "Don't know how to handle that ID"); 407 return applyDefaultMapping(OpdMapper); 408 default: 409 llvm_unreachable("Don't know how to handle that operation"); 410 } 411 } 412 413 /// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode, 414 /// having only floating-point operands. 415 static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { 416 switch (Opc) { 417 case TargetOpcode::G_FADD: 418 case TargetOpcode::G_FSUB: 419 case TargetOpcode::G_FMUL: 420 case TargetOpcode::G_FMA: 421 case TargetOpcode::G_FDIV: 422 case TargetOpcode::G_FCONSTANT: 423 case TargetOpcode::G_FPEXT: 424 case TargetOpcode::G_FPTRUNC: 425 case TargetOpcode::G_FCEIL: 426 case TargetOpcode::G_FFLOOR: 427 case TargetOpcode::G_FNEARBYINT: 428 case TargetOpcode::G_FNEG: 429 case TargetOpcode::G_FCOS: 430 case TargetOpcode::G_FSIN: 431 case TargetOpcode::G_FLOG10: 432 case TargetOpcode::G_FLOG: 433 case TargetOpcode::G_FLOG2: 434 case TargetOpcode::G_FSQRT: 435 case TargetOpcode::G_FABS: 436 case TargetOpcode::G_FEXP: 437 case TargetOpcode::G_FRINT: 438 case TargetOpcode::G_INTRINSIC_TRUNC: 439 case TargetOpcode::G_INTRINSIC_ROUND: 440 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: 441 case TargetOpcode::G_FMAXNUM: 442 case TargetOpcode::G_FMINNUM: 443 case TargetOpcode::G_FMAXIMUM: 444 case TargetOpcode::G_FMINIMUM: 445 return true; 446 } 447 return false; 448 } 449 450 const RegisterBankInfo::InstructionMapping & 451 AArch64RegisterBankInfo::getSameKindOfOperandsMapping( 452 const MachineInstr &MI) const { 453 const unsigned Opc = MI.getOpcode(); 454 const MachineFunction &MF = *MI.getParent()->getParent(); 455 const MachineRegisterInfo &MRI = MF.getRegInfo(); 456 457 unsigned NumOperands = MI.getNumOperands(); 458 assert(NumOperands <= 3 && 459 "This code is for instructions with 3 or less operands"); 460 461 LLT Ty = MRI.getType(MI.getOperand(0).getReg()); 462 unsigned Size = Ty.getSizeInBits(); 463 bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 464 465 PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR; 466 467 #ifndef NDEBUG 468 // Make sure all the operands are using similar size and type. 469 // Should probably be checked by the machine verifier. 470 // This code won't catch cases where the number of lanes is 471 // different between the operands. 472 // If we want to go to that level of details, it is probably 473 // best to check that the types are the same, period. 474 // Currently, we just check that the register banks are the same 475 // for each types. 476 for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { 477 LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); 478 assert( 479 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset( 480 RBIdx, OpTy.getSizeInBits()) == 481 AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) && 482 "Operand has incompatible size"); 483 bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc); 484 (void)OpIsFPR; 485 assert(IsFPR == OpIsFPR && "Operand has incompatible type"); 486 } 487 #endif // End NDEBUG. 488 489 return getInstructionMapping(DefaultMappingID, 1, 490 getValueMapping(RBIdx, Size), NumOperands); 491 } 492 493 /// \returns true if a given intrinsic only uses and defines FPRs. 494 static bool isFPIntrinsic(const MachineRegisterInfo &MRI, 495 const MachineInstr &MI) { 496 // TODO: Add more intrinsics. 497 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 498 default: 499 return false; 500 case Intrinsic::aarch64_neon_uaddlv: 501 case Intrinsic::aarch64_neon_uaddv: 502 case Intrinsic::aarch64_neon_saddv: 503 case Intrinsic::aarch64_neon_umaxv: 504 case Intrinsic::aarch64_neon_smaxv: 505 case Intrinsic::aarch64_neon_uminv: 506 case Intrinsic::aarch64_neon_sminv: 507 case Intrinsic::aarch64_neon_faddv: 508 case Intrinsic::aarch64_neon_fmaxv: 509 case Intrinsic::aarch64_neon_fminv: 510 case Intrinsic::aarch64_neon_fmaxnmv: 511 case Intrinsic::aarch64_neon_fminnmv: 512 return true; 513 case Intrinsic::aarch64_neon_saddlv: { 514 const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 515 return SrcTy.getElementType().getSizeInBits() >= 16 && 516 SrcTy.getElementCount().getFixedValue() >= 4; 517 } 518 } 519 } 520 521 bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, 522 const MachineRegisterInfo &MRI, 523 const TargetRegisterInfo &TRI, 524 unsigned Depth) const { 525 unsigned Op = MI.getOpcode(); 526 if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI)) 527 return true; 528 529 // Do we have an explicit floating point instruction? 530 if (isPreISelGenericFloatingPointOpcode(Op)) 531 return true; 532 533 // No. Check if we have a copy-like instruction. If we do, then we could 534 // still be fed by floating point instructions. 535 if (Op != TargetOpcode::COPY && !MI.isPHI() && 536 !isPreISelGenericOptimizationHint(Op)) 537 return false; 538 539 // Check if we already know the register bank. 540 auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI); 541 if (RB == &AArch64::FPRRegBank) 542 return true; 543 if (RB == &AArch64::GPRRegBank) 544 return false; 545 546 // We don't know anything. 547 // 548 // If we have a phi, we may be able to infer that it will be assigned a FPR 549 // based off of its inputs. 550 if (!MI.isPHI() || Depth > MaxFPRSearchDepth) 551 return false; 552 553 return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) { 554 return Op.isReg() && 555 onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1); 556 }); 557 } 558 559 bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, 560 const MachineRegisterInfo &MRI, 561 const TargetRegisterInfo &TRI, 562 unsigned Depth) const { 563 switch (MI.getOpcode()) { 564 case TargetOpcode::G_FPTOSI: 565 case TargetOpcode::G_FPTOUI: 566 case TargetOpcode::G_FCMP: 567 case TargetOpcode::G_LROUND: 568 case TargetOpcode::G_LLROUND: 569 return true; 570 default: 571 break; 572 } 573 return hasFPConstraints(MI, MRI, TRI, Depth); 574 } 575 576 bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI, 577 const MachineRegisterInfo &MRI, 578 const TargetRegisterInfo &TRI, 579 unsigned Depth) const { 580 switch (MI.getOpcode()) { 581 case AArch64::G_DUP: 582 case TargetOpcode::G_SITOFP: 583 case TargetOpcode::G_UITOFP: 584 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 585 case TargetOpcode::G_INSERT_VECTOR_ELT: 586 case TargetOpcode::G_BUILD_VECTOR: 587 case TargetOpcode::G_BUILD_VECTOR_TRUNC: 588 return true; 589 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 590 switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 591 case Intrinsic::aarch64_neon_ld1x2: 592 case Intrinsic::aarch64_neon_ld1x3: 593 case Intrinsic::aarch64_neon_ld1x4: 594 case Intrinsic::aarch64_neon_ld2: 595 case Intrinsic::aarch64_neon_ld2lane: 596 case Intrinsic::aarch64_neon_ld2r: 597 case Intrinsic::aarch64_neon_ld3: 598 case Intrinsic::aarch64_neon_ld3lane: 599 case Intrinsic::aarch64_neon_ld3r: 600 case Intrinsic::aarch64_neon_ld4: 601 case Intrinsic::aarch64_neon_ld4lane: 602 case Intrinsic::aarch64_neon_ld4r: 603 return true; 604 default: 605 break; 606 } 607 break; 608 default: 609 break; 610 } 611 return hasFPConstraints(MI, MRI, TRI, Depth); 612 } 613 614 bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const { 615 // GMemOperation because we also want to match indexed loads. 616 auto *MemOp = cast<GMemOperation>(&MI); 617 const Value *LdVal = MemOp->getMMO().getValue(); 618 if (!LdVal) 619 return false; 620 621 Type *EltTy = nullptr; 622 if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) { 623 EltTy = GV->getValueType(); 624 // Look at the first element of the struct to determine the type we are 625 // loading 626 while (StructType *StructEltTy = dyn_cast<StructType>(EltTy)) 627 EltTy = StructEltTy->getTypeAtIndex(0U); 628 // Look at the first element of the array to determine its type 629 if (isa<ArrayType>(EltTy)) 630 EltTy = EltTy->getArrayElementType(); 631 } else { 632 // FIXME: grubbing around uses is pretty ugly, but with no more 633 // `getPointerElementType` there's not much else we can do. 634 for (const auto *LdUser : LdVal->users()) { 635 if (isa<LoadInst>(LdUser)) { 636 EltTy = LdUser->getType(); 637 break; 638 } 639 if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) { 640 EltTy = LdUser->getOperand(0)->getType(); 641 break; 642 } 643 } 644 } 645 return EltTy && EltTy->isFPOrFPVectorTy(); 646 } 647 648 const RegisterBankInfo::InstructionMapping & 649 AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { 650 const unsigned Opc = MI.getOpcode(); 651 652 // Try the default logic for non-generic instructions that are either copies 653 // or already have some operands assigned to banks. 654 if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || 655 Opc == TargetOpcode::G_PHI) { 656 const RegisterBankInfo::InstructionMapping &Mapping = 657 getInstrMappingImpl(MI); 658 if (Mapping.isValid()) 659 return Mapping; 660 } 661 662 const MachineFunction &MF = *MI.getParent()->getParent(); 663 const MachineRegisterInfo &MRI = MF.getRegInfo(); 664 const TargetSubtargetInfo &STI = MF.getSubtarget(); 665 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 666 667 switch (Opc) { 668 // G_{F|S|U}REM are not listed because they are not legal. 669 // Arithmetic ops. 670 case TargetOpcode::G_ADD: 671 case TargetOpcode::G_SUB: 672 case TargetOpcode::G_PTR_ADD: 673 case TargetOpcode::G_MUL: 674 case TargetOpcode::G_SDIV: 675 case TargetOpcode::G_UDIV: 676 // Bitwise ops. 677 case TargetOpcode::G_AND: 678 case TargetOpcode::G_OR: 679 case TargetOpcode::G_XOR: 680 // Floating point ops. 681 case TargetOpcode::G_FADD: 682 case TargetOpcode::G_FSUB: 683 case TargetOpcode::G_FMUL: 684 case TargetOpcode::G_FDIV: 685 case TargetOpcode::G_FMAXIMUM: 686 case TargetOpcode::G_FMINIMUM: 687 return getSameKindOfOperandsMapping(MI); 688 case TargetOpcode::G_FPEXT: { 689 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 690 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 691 return getInstructionMapping( 692 DefaultMappingID, /*Cost*/ 1, 693 getFPExtMapping(DstTy.getSizeInBits(), SrcTy.getSizeInBits()), 694 /*NumOperands*/ 2); 695 } 696 // Shifts. 697 case TargetOpcode::G_SHL: 698 case TargetOpcode::G_LSHR: 699 case TargetOpcode::G_ASHR: { 700 LLT ShiftAmtTy = MRI.getType(MI.getOperand(2).getReg()); 701 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 702 if (ShiftAmtTy.getSizeInBits() == 64 && SrcTy.getSizeInBits() == 32) 703 return getInstructionMapping(DefaultMappingID, 1, 704 &ValMappings[Shift64Imm], 3); 705 return getSameKindOfOperandsMapping(MI); 706 } 707 case TargetOpcode::COPY: { 708 Register DstReg = MI.getOperand(0).getReg(); 709 Register SrcReg = MI.getOperand(1).getReg(); 710 // Check if one of the register is not a generic register. 711 if ((DstReg.isPhysical() || !MRI.getType(DstReg).isValid()) || 712 (SrcReg.isPhysical() || !MRI.getType(SrcReg).isValid())) { 713 const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); 714 const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); 715 if (!DstRB) 716 DstRB = SrcRB; 717 else if (!SrcRB) 718 SrcRB = DstRB; 719 // If both RB are null that means both registers are generic. 720 // We shouldn't be here. 721 assert(DstRB && SrcRB && "Both RegBank were nullptr"); 722 unsigned Size = getSizeInBits(DstReg, MRI, TRI); 723 return getInstructionMapping( 724 DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)), 725 getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), 726 // We only care about the mapping of the destination. 727 /*NumOperands*/ 1); 728 } 729 // Both registers are generic, use G_BITCAST. 730 [[fallthrough]]; 731 } 732 case TargetOpcode::G_BITCAST: { 733 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 734 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 735 unsigned Size = DstTy.getSizeInBits(); 736 bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; 737 bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; 738 const RegisterBank &DstRB = 739 DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 740 const RegisterBank &SrcRB = 741 SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; 742 return getInstructionMapping( 743 DefaultMappingID, copyCost(DstRB, SrcRB, TypeSize::getFixed(Size)), 744 getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), 745 // We only care about the mapping of the destination for COPY. 746 /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); 747 } 748 default: 749 break; 750 } 751 752 unsigned NumOperands = MI.getNumOperands(); 753 754 // Track the size and bank of each register. We don't do partial mappings. 755 SmallVector<unsigned, 4> OpSize(NumOperands); 756 SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands); 757 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 758 auto &MO = MI.getOperand(Idx); 759 if (!MO.isReg() || !MO.getReg()) 760 continue; 761 762 LLT Ty = MRI.getType(MO.getReg()); 763 if (!Ty.isValid()) 764 continue; 765 OpSize[Idx] = Ty.getSizeInBits(); 766 767 // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. 768 // For floating-point instructions, scalars go in FPRs. 769 if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || 770 Ty.getSizeInBits() > 64) 771 OpRegBankIdx[Idx] = PMI_FirstFPR; 772 else 773 OpRegBankIdx[Idx] = PMI_FirstGPR; 774 } 775 776 unsigned Cost = 1; 777 // Some of the floating-point instructions have mixed GPR and FPR operands: 778 // fine-tune the computed mapping. 779 switch (Opc) { 780 case AArch64::G_DUP: { 781 Register ScalarReg = MI.getOperand(1).getReg(); 782 LLT ScalarTy = MRI.getType(ScalarReg); 783 auto ScalarDef = MRI.getVRegDef(ScalarReg); 784 // We want to select dup(load) into LD1R. 785 if (ScalarDef->getOpcode() == TargetOpcode::G_LOAD) 786 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 787 // s8 is an exception for G_DUP, which we always want on gpr. 788 else if (ScalarTy.getSizeInBits() != 8 && 789 (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || 790 onlyDefinesFP(*ScalarDef, MRI, TRI))) 791 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 792 else 793 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 794 break; 795 } 796 case TargetOpcode::G_TRUNC: { 797 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 798 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) 799 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 800 break; 801 } 802 case TargetOpcode::G_SITOFP: 803 case TargetOpcode::G_UITOFP: { 804 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 805 break; 806 // Integer to FP conversions don't necessarily happen between GPR -> FPR 807 // regbanks. They can also be done within an FPR register. 808 Register SrcReg = MI.getOperand(1).getReg(); 809 if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank) 810 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 811 else 812 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; 813 break; 814 } 815 case TargetOpcode::G_FPTOSI: 816 case TargetOpcode::G_FPTOUI: 817 if (MRI.getType(MI.getOperand(0).getReg()).isVector()) 818 break; 819 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 820 break; 821 case TargetOpcode::G_FCMP: { 822 // If the result is a vector, it must use a FPR. 823 AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = 824 MRI.getType(MI.getOperand(0).getReg()).isVector() ? PMI_FirstFPR 825 : PMI_FirstGPR; 826 OpRegBankIdx = {Idx0, 827 /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR}; 828 break; 829 } 830 case TargetOpcode::G_BITCAST: 831 // This is going to be a cross register bank copy and this is expensive. 832 if (OpRegBankIdx[0] != OpRegBankIdx[1]) 833 Cost = copyCost( 834 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank, 835 *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank, 836 TypeSize::getFixed(OpSize[0])); 837 break; 838 case TargetOpcode::G_LOAD: { 839 // Loading in vector unit is slightly more expensive. 840 // This is actually only true for the LD1R and co instructions, 841 // but anyway for the fast mode this number does not matter and 842 // for the greedy mode the cost of the cross bank copy will 843 // offset this number. 844 // FIXME: Should be derived from the scheduling model. 845 if (OpRegBankIdx[0] != PMI_FirstGPR) { 846 Cost = 2; 847 break; 848 } 849 850 if (cast<GLoad>(MI).isAtomic()) { 851 // Atomics always use GPR destinations. Don't refine any further. 852 OpRegBankIdx[0] = PMI_FirstGPR; 853 break; 854 } 855 856 // Try to guess the type of the load from the MMO. 857 if (isLoadFromFPType(MI)) { 858 OpRegBankIdx[0] = PMI_FirstFPR; 859 break; 860 } 861 862 // Check if that load feeds fp instructions. 863 // In that case, we want the default mapping to be on FPR 864 // instead of blind map every scalar to GPR. 865 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 866 [&](const MachineInstr &UseMI) { 867 // If we have at least one direct use in a FP instruction, 868 // assume this was a floating point load in the IR. If it was 869 // not, we would have had a bitcast before reaching that 870 // instruction. 871 // 872 // Int->FP conversion operations are also captured in 873 // onlyDefinesFP(). 874 return onlyUsesFP(UseMI, MRI, TRI) || 875 onlyDefinesFP(UseMI, MRI, TRI); 876 })) 877 OpRegBankIdx[0] = PMI_FirstFPR; 878 break; 879 } 880 case TargetOpcode::G_STORE: 881 // Check if that store is fed by fp instructions. 882 if (OpRegBankIdx[0] == PMI_FirstGPR) { 883 Register VReg = MI.getOperand(0).getReg(); 884 if (!VReg) 885 break; 886 MachineInstr *DefMI = MRI.getVRegDef(VReg); 887 if (onlyDefinesFP(*DefMI, MRI, TRI)) 888 OpRegBankIdx[0] = PMI_FirstFPR; 889 break; 890 } 891 break; 892 case TargetOpcode::G_INDEXED_STORE: 893 if (OpRegBankIdx[1] == PMI_FirstGPR) { 894 Register VReg = MI.getOperand(1).getReg(); 895 if (!VReg) 896 break; 897 MachineInstr *DefMI = MRI.getVRegDef(VReg); 898 if (onlyDefinesFP(*DefMI, MRI, TRI)) 899 OpRegBankIdx[1] = PMI_FirstFPR; 900 break; 901 } 902 break; 903 case TargetOpcode::G_INDEXED_SEXTLOAD: 904 case TargetOpcode::G_INDEXED_ZEXTLOAD: 905 // These should always be GPR. 906 OpRegBankIdx[0] = PMI_FirstGPR; 907 break; 908 case TargetOpcode::G_INDEXED_LOAD: { 909 if (isLoadFromFPType(MI)) 910 OpRegBankIdx[0] = PMI_FirstFPR; 911 break; 912 } 913 case TargetOpcode::G_SELECT: { 914 // If the destination is FPR, preserve that. 915 if (OpRegBankIdx[0] != PMI_FirstGPR) 916 break; 917 918 // If we're taking in vectors, we have no choice but to put everything on 919 // FPRs, except for the condition. The condition must always be on a GPR. 920 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg()); 921 if (SrcTy.isVector()) { 922 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 923 break; 924 } 925 926 // Try to minimize the number of copies. If we have more floating point 927 // constrained values than not, then we'll put everything on FPR. Otherwise, 928 // everything has to be on GPR. 929 unsigned NumFP = 0; 930 931 // Check if the uses of the result always produce floating point values. 932 // 933 // For example: 934 // 935 // %z = G_SELECT %cond %x %y 936 // fpr = G_FOO %z ... 937 if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 938 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) 939 ++NumFP; 940 941 // Check if the defs of the source values always produce floating point 942 // values. 943 // 944 // For example: 945 // 946 // %x = G_SOMETHING_ALWAYS_FLOAT %a ... 947 // %z = G_SELECT %cond %x %y 948 // 949 // Also check whether or not the sources have already been decided to be 950 // FPR. Keep track of this. 951 // 952 // This doesn't check the condition, since it's just whatever is in NZCV. 953 // This isn't passed explicitly in a register to fcsel/csel. 954 for (unsigned Idx = 2; Idx < 4; ++Idx) { 955 Register VReg = MI.getOperand(Idx).getReg(); 956 MachineInstr *DefMI = MRI.getVRegDef(VReg); 957 if (getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank || 958 onlyDefinesFP(*DefMI, MRI, TRI)) 959 ++NumFP; 960 } 961 962 // If we have more FP constraints than not, then move everything over to 963 // FPR. 964 if (NumFP >= 2) 965 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR, PMI_FirstFPR, PMI_FirstFPR}; 966 967 break; 968 } 969 case TargetOpcode::G_UNMERGE_VALUES: { 970 // If the first operand belongs to a FPR register bank, then make sure that 971 // we preserve that. 972 if (OpRegBankIdx[0] != PMI_FirstGPR) 973 break; 974 975 LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg()); 976 // UNMERGE into scalars from a vector should always use FPR. 977 // Likewise if any of the uses are FP instructions. 978 if (SrcTy.isVector() || SrcTy == LLT::scalar(128) || 979 any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), 980 [&](MachineInstr &MI) { return onlyUsesFP(MI, MRI, TRI); })) { 981 // Set the register bank of every operand to FPR. 982 for (unsigned Idx = 0, NumOperands = MI.getNumOperands(); 983 Idx < NumOperands; ++Idx) 984 OpRegBankIdx[Idx] = PMI_FirstFPR; 985 } 986 break; 987 } 988 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 989 // Destination and source need to be FPRs. 990 OpRegBankIdx[0] = PMI_FirstFPR; 991 OpRegBankIdx[1] = PMI_FirstFPR; 992 993 // Index needs to be a GPR. 994 OpRegBankIdx[2] = PMI_FirstGPR; 995 break; 996 case TargetOpcode::G_INSERT_VECTOR_ELT: 997 OpRegBankIdx[0] = PMI_FirstFPR; 998 OpRegBankIdx[1] = PMI_FirstFPR; 999 1000 // The element may be either a GPR or FPR. Preserve that behaviour. 1001 if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) 1002 OpRegBankIdx[2] = PMI_FirstFPR; 1003 else 1004 OpRegBankIdx[2] = PMI_FirstGPR; 1005 1006 // Index needs to be a GPR. 1007 OpRegBankIdx[3] = PMI_FirstGPR; 1008 break; 1009 case TargetOpcode::G_EXTRACT: { 1010 // For s128 sources we have to use fpr unless we know otherwise. 1011 auto Src = MI.getOperand(1).getReg(); 1012 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); 1013 if (SrcTy.getSizeInBits() != 128) 1014 break; 1015 auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass 1016 ? PMI_FirstGPR 1017 : PMI_FirstFPR; 1018 OpRegBankIdx[0] = Idx; 1019 OpRegBankIdx[1] = Idx; 1020 break; 1021 } 1022 case TargetOpcode::G_BUILD_VECTOR: { 1023 // If the first source operand belongs to a FPR register bank, then make 1024 // sure that we preserve that. 1025 if (OpRegBankIdx[1] != PMI_FirstGPR) 1026 break; 1027 Register VReg = MI.getOperand(1).getReg(); 1028 if (!VReg) 1029 break; 1030 1031 // Get the instruction that defined the source operand reg, and check if 1032 // it's a floating point operation. Or, if it's a type like s16 which 1033 // doesn't have a exact size gpr register class. The exception is if the 1034 // build_vector has all constant operands, which may be better to leave as 1035 // gpr without copies, so it can be matched in imported patterns. 1036 MachineInstr *DefMI = MRI.getVRegDef(VReg); 1037 unsigned DefOpc = DefMI->getOpcode(); 1038 const LLT SrcTy = MRI.getType(VReg); 1039 if (all_of(MI.operands(), [&](const MachineOperand &Op) { 1040 return Op.isDef() || MRI.getVRegDef(Op.getReg())->getOpcode() == 1041 TargetOpcode::G_CONSTANT; 1042 })) 1043 break; 1044 if (isPreISelGenericFloatingPointOpcode(DefOpc) || 1045 SrcTy.getSizeInBits() < 32 || 1046 getRegBank(VReg, MRI, TRI) == &AArch64::FPRRegBank) { 1047 // Have a floating point op. 1048 // Make sure every operand gets mapped to a FPR register class. 1049 unsigned NumOperands = MI.getNumOperands(); 1050 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) 1051 OpRegBankIdx[Idx] = PMI_FirstFPR; 1052 } 1053 break; 1054 } 1055 case TargetOpcode::G_VECREDUCE_FADD: 1056 case TargetOpcode::G_VECREDUCE_FMUL: 1057 case TargetOpcode::G_VECREDUCE_FMAX: 1058 case TargetOpcode::G_VECREDUCE_FMIN: 1059 case TargetOpcode::G_VECREDUCE_FMAXIMUM: 1060 case TargetOpcode::G_VECREDUCE_FMINIMUM: 1061 case TargetOpcode::G_VECREDUCE_ADD: 1062 case TargetOpcode::G_VECREDUCE_MUL: 1063 case TargetOpcode::G_VECREDUCE_AND: 1064 case TargetOpcode::G_VECREDUCE_OR: 1065 case TargetOpcode::G_VECREDUCE_XOR: 1066 case TargetOpcode::G_VECREDUCE_SMAX: 1067 case TargetOpcode::G_VECREDUCE_SMIN: 1068 case TargetOpcode::G_VECREDUCE_UMAX: 1069 case TargetOpcode::G_VECREDUCE_UMIN: 1070 // Reductions produce a scalar value from a vector, the scalar should be on 1071 // FPR bank. 1072 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; 1073 break; 1074 case TargetOpcode::G_VECREDUCE_SEQ_FADD: 1075 case TargetOpcode::G_VECREDUCE_SEQ_FMUL: 1076 // These reductions also take a scalar accumulator input. 1077 // Assign them FPR for now. 1078 OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; 1079 break; 1080 case TargetOpcode::G_INTRINSIC: 1081 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { 1082 // Check if we know that the intrinsic has any constraints on its register 1083 // banks. If it does, then update the mapping accordingly. 1084 unsigned Idx = 0; 1085 if (onlyDefinesFP(MI, MRI, TRI)) 1086 for (const auto &Op : MI.defs()) { 1087 if (Op.isReg()) 1088 OpRegBankIdx[Idx] = PMI_FirstFPR; 1089 ++Idx; 1090 } 1091 else 1092 Idx += MI.getNumExplicitDefs(); 1093 1094 if (onlyUsesFP(MI, MRI, TRI)) 1095 for (const auto &Op : MI.explicit_uses()) { 1096 if (Op.isReg()) 1097 OpRegBankIdx[Idx] = PMI_FirstFPR; 1098 ++Idx; 1099 } 1100 break; 1101 } 1102 case TargetOpcode::G_LROUND: 1103 case TargetOpcode::G_LLROUND: { 1104 // Source is always floating point and destination is always integer. 1105 OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; 1106 break; 1107 } 1108 } 1109 1110 // Finally construct the computed mapping. 1111 SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands); 1112 for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { 1113 if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { 1114 LLT Ty = MRI.getType(MI.getOperand(Idx).getReg()); 1115 if (!Ty.isValid()) 1116 continue; 1117 auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); 1118 if (!Mapping->isValid()) 1119 return getInvalidInstructionMapping(); 1120 1121 OpdsMapping[Idx] = Mapping; 1122 } 1123 } 1124 1125 return getInstructionMapping(DefaultMappingID, Cost, 1126 getOperandsMapping(OpdsMapping), NumOperands); 1127 } 1128