1 //===- AArch64RegisterInfo.cpp - AArch64 Register Information -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the AArch64 implementation of the TargetRegisterInfo 10 // class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64RegisterInfo.h" 15 #include "AArch64FrameLowering.h" 16 #include "AArch64InstrInfo.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "MCTargetDesc/AArch64InstPrinter.h" 21 #include "Utils/AArch64SMEAttributes.h" 22 #include "llvm/ADT/BitVector.h" 23 #include "llvm/BinaryFormat/Dwarf.h" 24 #include "llvm/CodeGen/LiveRegMatrix.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/RegisterScavenging.h" 29 #include "llvm/CodeGen/TargetFrameLowering.h" 30 #include "llvm/IR/DebugInfoMetadata.h" 31 #include "llvm/IR/DiagnosticInfo.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/Target/TargetOptions.h" 34 #include "llvm/TargetParser/Triple.h" 35 36 using namespace llvm; 37 38 #define GET_CC_REGISTER_LISTS 39 #include "AArch64GenCallingConv.inc" 40 #define GET_REGINFO_TARGET_DESC 41 #include "AArch64GenRegisterInfo.inc" 42 43 AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode) 44 : AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) { 45 AArch64_MC::initLLVMToCVRegMapping(this); 46 } 47 48 /// Return whether the register needs a CFI entry. Not all unwinders may know 49 /// about SVE registers, so we assume the lowest common denominator, i.e. the 50 /// callee-saves required by the base ABI. For the SVE registers z8-z15 only the 51 /// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is 52 /// returned in \p RegToUseForCFI. 53 bool AArch64RegisterInfo::regNeedsCFI(MCRegister Reg, 54 MCRegister &RegToUseForCFI) const { 55 if (AArch64::PPRRegClass.contains(Reg)) 56 return false; 57 58 if (AArch64::ZPRRegClass.contains(Reg)) { 59 RegToUseForCFI = getSubReg(Reg, AArch64::dsub); 60 for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) { 61 if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI) 62 return true; 63 } 64 return false; 65 } 66 67 RegToUseForCFI = Reg; 68 return true; 69 } 70 71 const MCPhysReg * 72 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 73 assert(MF && "Invalid MachineFunction pointer."); 74 75 if (MF->getFunction().getCallingConv() == CallingConv::GHC) 76 // GHC set of callee saved regs is empty as all those regs are 77 // used for passing STG regs around 78 return CSR_AArch64_NoRegs_SaveList; 79 if (MF->getFunction().getCallingConv() == CallingConv::PreserveNone) 80 return CSR_AArch64_NoneRegs_SaveList; 81 if (MF->getFunction().getCallingConv() == CallingConv::AnyReg) 82 return CSR_AArch64_AllRegs_SaveList; 83 84 if (MF->getFunction().getCallingConv() == CallingConv::ARM64EC_Thunk_X64) 85 return CSR_Win_AArch64_Arm64EC_Thunk_SaveList; 86 87 // Darwin has its own CSR_AArch64_AAPCS_SaveList, which means most CSR save 88 // lists depending on that will need to have their Darwin variant as well. 89 if (MF->getSubtarget<AArch64Subtarget>().isTargetDarwin()) 90 return getDarwinCalleeSavedRegs(MF); 91 92 if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check) 93 return CSR_Win_AArch64_CFGuard_Check_SaveList; 94 if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows()) { 95 if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering() 96 ->supportSwiftError() && 97 MF->getFunction().getAttributes().hasAttrSomewhere( 98 Attribute::SwiftError)) 99 return CSR_Win_AArch64_AAPCS_SwiftError_SaveList; 100 if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail) 101 return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList; 102 if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall) 103 return CSR_Win_AArch64_AAVPCS_SaveList; 104 if (MF->getFunction().getCallingConv() == 105 CallingConv::AArch64_SVE_VectorCall) 106 return CSR_Win_AArch64_SVE_AAPCS_SaveList; 107 if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) 108 return CSR_Win_AArch64_SVE_AAPCS_SaveList; 109 return CSR_Win_AArch64_AAPCS_SaveList; 110 } 111 if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall) 112 return CSR_AArch64_AAVPCS_SaveList; 113 if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall) 114 return CSR_AArch64_SVE_AAPCS_SaveList; 115 if (MF->getFunction().getCallingConv() == 116 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0) 117 report_fatal_error( 118 "Calling convention " 119 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is only " 120 "supported to improve calls to SME ACLE save/restore/disable-za " 121 "functions, and is not intended to be used beyond that scope."); 122 if (MF->getFunction().getCallingConv() == 123 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1) 124 report_fatal_error( 125 "Calling convention " 126 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is " 127 "only supported to improve calls to SME ACLE __arm_get_current_vg " 128 "function, and is not intended to be used beyond that scope."); 129 if (MF->getFunction().getCallingConv() == 130 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2) 131 report_fatal_error( 132 "Calling convention " 133 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is " 134 "only supported to improve calls to SME ACLE __arm_sme_state " 135 "and is not intended to be used beyond that scope."); 136 if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering() 137 ->supportSwiftError() && 138 MF->getFunction().getAttributes().hasAttrSomewhere( 139 Attribute::SwiftError)) 140 return CSR_AArch64_AAPCS_SwiftError_SaveList; 141 if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail) 142 return CSR_AArch64_AAPCS_SwiftTail_SaveList; 143 if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) 144 return CSR_AArch64_RT_MostRegs_SaveList; 145 if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll) 146 return CSR_AArch64_RT_AllRegs_SaveList; 147 if (MF->getFunction().getCallingConv() == CallingConv::Win64) 148 // This is for OSes other than Windows; Windows is a separate case further 149 // above. 150 return CSR_AArch64_AAPCS_X18_SaveList; 151 if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) 152 return CSR_AArch64_SVE_AAPCS_SaveList; 153 return CSR_AArch64_AAPCS_SaveList; 154 } 155 156 const MCPhysReg * 157 AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const { 158 assert(MF && "Invalid MachineFunction pointer."); 159 assert(MF->getSubtarget<AArch64Subtarget>().isTargetDarwin() && 160 "Invalid subtarget for getDarwinCalleeSavedRegs"); 161 162 if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check) 163 report_fatal_error( 164 "Calling convention CFGuard_Check is unsupported on Darwin."); 165 if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall) 166 return CSR_Darwin_AArch64_AAVPCS_SaveList; 167 if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall) 168 report_fatal_error( 169 "Calling convention SVE_VectorCall is unsupported on Darwin."); 170 if (MF->getFunction().getCallingConv() == 171 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0) 172 report_fatal_error( 173 "Calling convention " 174 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0 is " 175 "only supported to improve calls to SME ACLE save/restore/disable-za " 176 "functions, and is not intended to be used beyond that scope."); 177 if (MF->getFunction().getCallingConv() == 178 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1) 179 report_fatal_error( 180 "Calling convention " 181 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1 is " 182 "only supported to improve calls to SME ACLE __arm_get_current_vg " 183 "function, and is not intended to be used beyond that scope."); 184 if (MF->getFunction().getCallingConv() == 185 CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2) 186 report_fatal_error( 187 "Calling convention " 188 "AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 is " 189 "only supported to improve calls to SME ACLE __arm_sme_state " 190 "and is not intended to be used beyond that scope."); 191 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS) 192 return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() 193 ? CSR_Darwin_AArch64_CXX_TLS_PE_SaveList 194 : CSR_Darwin_AArch64_CXX_TLS_SaveList; 195 if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering() 196 ->supportSwiftError() && 197 MF->getFunction().getAttributes().hasAttrSomewhere( 198 Attribute::SwiftError)) 199 return CSR_Darwin_AArch64_AAPCS_SwiftError_SaveList; 200 if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail) 201 return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList; 202 if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost) 203 return CSR_Darwin_AArch64_RT_MostRegs_SaveList; 204 if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll) 205 return CSR_Darwin_AArch64_RT_AllRegs_SaveList; 206 if (MF->getFunction().getCallingConv() == CallingConv::Win64) 207 return CSR_Darwin_AArch64_AAPCS_Win64_SaveList; 208 if (MF->getInfo<AArch64FunctionInfo>()->isSVECC()) 209 return CSR_Darwin_AArch64_SVE_AAPCS_SaveList; 210 return CSR_Darwin_AArch64_AAPCS_SaveList; 211 } 212 213 const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( 214 const MachineFunction *MF) const { 215 assert(MF && "Invalid MachineFunction pointer."); 216 if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 217 MF->getInfo<AArch64FunctionInfo>()->isSplitCSR()) 218 return CSR_Darwin_AArch64_CXX_TLS_ViaCopy_SaveList; 219 return nullptr; 220 } 221 222 void AArch64RegisterInfo::UpdateCustomCalleeSavedRegs( 223 MachineFunction &MF) const { 224 const MCPhysReg *CSRs = getCalleeSavedRegs(&MF); 225 SmallVector<MCPhysReg, 32> UpdatedCSRs; 226 for (const MCPhysReg *I = CSRs; *I; ++I) 227 UpdatedCSRs.push_back(*I); 228 229 for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) { 230 if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) { 231 UpdatedCSRs.push_back(AArch64::GPR64commonRegClass.getRegister(i)); 232 } 233 } 234 // Register lists are zero-terminated. 235 UpdatedCSRs.push_back(0); 236 MF.getRegInfo().setCalleeSavedRegs(UpdatedCSRs); 237 } 238 239 const TargetRegisterClass * 240 AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, 241 unsigned Idx) const { 242 // edge case for GPR/FPR register classes 243 if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub) 244 return &AArch64::FPR32RegClass; 245 else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub) 246 return &AArch64::FPR64RegClass; 247 248 // Forward to TableGen's default version. 249 return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx); 250 } 251 252 const uint32_t * 253 AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF, 254 CallingConv::ID CC) const { 255 assert(MF.getSubtarget<AArch64Subtarget>().isTargetDarwin() && 256 "Invalid subtarget for getDarwinCallPreservedMask"); 257 258 if (CC == CallingConv::CXX_FAST_TLS) 259 return CSR_Darwin_AArch64_CXX_TLS_RegMask; 260 if (CC == CallingConv::AArch64_VectorCall) 261 return CSR_Darwin_AArch64_AAVPCS_RegMask; 262 if (CC == CallingConv::AArch64_SVE_VectorCall) 263 return CSR_Darwin_AArch64_SVE_AAPCS_RegMask; 264 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0) 265 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask; 266 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1) 267 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask; 268 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2) 269 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask; 270 if (CC == CallingConv::CFGuard_Check) 271 report_fatal_error( 272 "Calling convention CFGuard_Check is unsupported on Darwin."); 273 if (MF.getSubtarget<AArch64Subtarget>() 274 .getTargetLowering() 275 ->supportSwiftError() && 276 MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 277 return CSR_Darwin_AArch64_AAPCS_SwiftError_RegMask; 278 if (CC == CallingConv::SwiftTail) 279 return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask; 280 if (CC == CallingConv::PreserveMost) 281 return CSR_Darwin_AArch64_RT_MostRegs_RegMask; 282 if (CC == CallingConv::PreserveAll) 283 return CSR_Darwin_AArch64_RT_AllRegs_RegMask; 284 return CSR_Darwin_AArch64_AAPCS_RegMask; 285 } 286 287 const uint32_t * 288 AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, 289 CallingConv::ID CC) const { 290 bool SCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); 291 if (CC == CallingConv::GHC) 292 // This is academic because all GHC calls are (supposed to be) tail calls 293 return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask; 294 if (CC == CallingConv::PreserveNone) 295 return SCS ? CSR_AArch64_NoneRegs_SCS_RegMask 296 : CSR_AArch64_NoneRegs_RegMask; 297 if (CC == CallingConv::AnyReg) 298 return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask; 299 300 // All the following calling conventions are handled differently on Darwin. 301 if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) { 302 if (SCS) 303 report_fatal_error("ShadowCallStack attribute not supported on Darwin."); 304 return getDarwinCallPreservedMask(MF, CC); 305 } 306 307 if (CC == CallingConv::AArch64_VectorCall) 308 return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask; 309 if (CC == CallingConv::AArch64_SVE_VectorCall) 310 return SCS ? CSR_AArch64_SVE_AAPCS_SCS_RegMask 311 : CSR_AArch64_SVE_AAPCS_RegMask; 312 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0) 313 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask; 314 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1) 315 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1_RegMask; 316 if (CC == CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2) 317 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2_RegMask; 318 if (CC == CallingConv::CFGuard_Check) 319 return CSR_Win_AArch64_CFGuard_Check_RegMask; 320 if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering() 321 ->supportSwiftError() && 322 MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 323 return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask 324 : CSR_AArch64_AAPCS_SwiftError_RegMask; 325 if (CC == CallingConv::SwiftTail) { 326 if (SCS) 327 report_fatal_error("ShadowCallStack attribute not supported with swifttail"); 328 return CSR_AArch64_AAPCS_SwiftTail_RegMask; 329 } 330 if (CC == CallingConv::PreserveMost) 331 return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask 332 : CSR_AArch64_RT_MostRegs_RegMask; 333 if (CC == CallingConv::PreserveAll) 334 return SCS ? CSR_AArch64_RT_AllRegs_SCS_RegMask 335 : CSR_AArch64_RT_AllRegs_RegMask; 336 337 return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask; 338 } 339 340 const uint32_t *AArch64RegisterInfo::getCustomEHPadPreservedMask( 341 const MachineFunction &MF) const { 342 if (MF.getSubtarget<AArch64Subtarget>().isTargetLinux()) 343 return CSR_AArch64_AAPCS_RegMask; 344 345 return nullptr; 346 } 347 348 const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { 349 if (TT.isOSDarwin()) 350 return CSR_Darwin_AArch64_TLS_RegMask; 351 352 assert(TT.isOSBinFormatELF() && "Invalid target"); 353 return CSR_AArch64_TLS_ELF_RegMask; 354 } 355 356 void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF, 357 const uint32_t **Mask) const { 358 uint32_t *UpdatedMask = MF.allocateRegMask(); 359 unsigned RegMaskSize = MachineOperand::getRegMaskSize(getNumRegs()); 360 memcpy(UpdatedMask, *Mask, sizeof(UpdatedMask[0]) * RegMaskSize); 361 362 for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) { 363 if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) { 364 for (MCPhysReg SubReg : 365 subregs_inclusive(AArch64::GPR64commonRegClass.getRegister(i))) { 366 // See TargetRegisterInfo::getCallPreservedMask for how to interpret the 367 // register mask. 368 UpdatedMask[SubReg / 32] |= 1u << (SubReg % 32); 369 } 370 } 371 } 372 *Mask = UpdatedMask; 373 } 374 375 const uint32_t *AArch64RegisterInfo::getSMStartStopCallPreservedMask() const { 376 return CSR_AArch64_SMStartStop_RegMask; 377 } 378 379 const uint32_t * 380 AArch64RegisterInfo::SMEABISupportRoutinesCallPreservedMaskFromX0() const { 381 return CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0_RegMask; 382 } 383 384 const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const { 385 return CSR_AArch64_NoRegs_RegMask; 386 } 387 388 const uint32_t * 389 AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, 390 CallingConv::ID CC) const { 391 // This should return a register mask that is the same as that returned by 392 // getCallPreservedMask but that additionally preserves the register used for 393 // the first i64 argument (which must also be the register used to return a 394 // single i64 return value) 395 // 396 // In case that the calling convention does not use the same register for 397 // both, the function should return NULL (does not currently apply) 398 assert(CC != CallingConv::GHC && "should not be GHC calling convention."); 399 if (MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) 400 return CSR_Darwin_AArch64_AAPCS_ThisReturn_RegMask; 401 return CSR_AArch64_AAPCS_ThisReturn_RegMask; 402 } 403 404 const uint32_t *AArch64RegisterInfo::getWindowsStackProbePreservedMask() const { 405 return CSR_AArch64_StackProbe_Windows_RegMask; 406 } 407 408 std::optional<std::string> 409 AArch64RegisterInfo::explainReservedReg(const MachineFunction &MF, 410 MCRegister PhysReg) const { 411 if (hasBasePointer(MF) && MCRegisterInfo::regsOverlap(PhysReg, AArch64::X19)) 412 return std::string("X19 is used as the frame base pointer register."); 413 414 if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) { 415 bool warn = false; 416 if (MCRegisterInfo::regsOverlap(PhysReg, AArch64::X13) || 417 MCRegisterInfo::regsOverlap(PhysReg, AArch64::X14) || 418 MCRegisterInfo::regsOverlap(PhysReg, AArch64::X23) || 419 MCRegisterInfo::regsOverlap(PhysReg, AArch64::X24) || 420 MCRegisterInfo::regsOverlap(PhysReg, AArch64::X28)) 421 warn = true; 422 423 for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i) 424 if (MCRegisterInfo::regsOverlap(PhysReg, i)) 425 warn = true; 426 427 if (warn) 428 return std::string(AArch64InstPrinter::getRegisterName(PhysReg)) + 429 " is clobbered by asynchronous signals when using Arm64EC."; 430 } 431 432 return {}; 433 } 434 435 BitVector 436 AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { 437 const AArch64FrameLowering *TFI = getFrameLowering(MF); 438 439 // FIXME: avoid re-calculating this every time. 440 BitVector Reserved(getNumRegs()); 441 markSuperRegs(Reserved, AArch64::WSP); 442 markSuperRegs(Reserved, AArch64::WZR); 443 444 if (TFI->isFPReserved(MF)) 445 markSuperRegs(Reserved, AArch64::W29); 446 447 if (MF.getSubtarget<AArch64Subtarget>().isWindowsArm64EC()) { 448 // x13, x14, x23, x24, x28, and v16-v31 are clobbered by asynchronous 449 // signals, so we can't ever use them. 450 markSuperRegs(Reserved, AArch64::W13); 451 markSuperRegs(Reserved, AArch64::W14); 452 markSuperRegs(Reserved, AArch64::W23); 453 markSuperRegs(Reserved, AArch64::W24); 454 markSuperRegs(Reserved, AArch64::W28); 455 for (unsigned i = AArch64::B16; i <= AArch64::B31; ++i) 456 markSuperRegs(Reserved, i); 457 } 458 459 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { 460 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i)) 461 markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); 462 } 463 464 if (hasBasePointer(MF)) 465 markSuperRegs(Reserved, AArch64::W19); 466 467 // SLH uses register W16/X16 as the taint register. 468 if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) 469 markSuperRegs(Reserved, AArch64::W16); 470 471 // FFR is modelled as global state that cannot be allocated. 472 if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) 473 Reserved.set(AArch64::FFR); 474 475 // SME tiles are not allocatable. 476 if (MF.getSubtarget<AArch64Subtarget>().hasSME()) { 477 for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA)) 478 Reserved.set(SubReg); 479 } 480 481 // VG cannot be allocated 482 Reserved.set(AArch64::VG); 483 484 if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) { 485 for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true); 486 SubReg.isValid(); ++SubReg) 487 Reserved.set(*SubReg); 488 } 489 490 markSuperRegs(Reserved, AArch64::FPCR); 491 markSuperRegs(Reserved, AArch64::FPMR); 492 markSuperRegs(Reserved, AArch64::FPSR); 493 494 if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { 495 markSuperRegs(Reserved, AArch64::X27); 496 markSuperRegs(Reserved, AArch64::X28); 497 markSuperRegs(Reserved, AArch64::W27); 498 markSuperRegs(Reserved, AArch64::W28); 499 } 500 501 assert(checkAllSuperRegsMarked(Reserved)); 502 503 // Add _HI registers after checkAllSuperRegsMarked as this check otherwise 504 // becomes considerably more expensive. 505 Reserved.set(AArch64::WSP_HI); 506 Reserved.set(AArch64::WZR_HI); 507 static_assert(AArch64::W30_HI - AArch64::W0_HI == 30, 508 "Unexpected order of registers"); 509 Reserved.set(AArch64::W0_HI, AArch64::W30_HI); 510 static_assert(AArch64::B31_HI - AArch64::B0_HI == 31, 511 "Unexpected order of registers"); 512 Reserved.set(AArch64::B0_HI, AArch64::B31_HI); 513 static_assert(AArch64::H31_HI - AArch64::H0_HI == 31, 514 "Unexpected order of registers"); 515 Reserved.set(AArch64::H0_HI, AArch64::H31_HI); 516 static_assert(AArch64::S31_HI - AArch64::S0_HI == 31, 517 "Unexpected order of registers"); 518 Reserved.set(AArch64::S0_HI, AArch64::S31_HI); 519 static_assert(AArch64::D31_HI - AArch64::D0_HI == 31, 520 "Unexpected order of registers"); 521 Reserved.set(AArch64::D0_HI, AArch64::D31_HI); 522 static_assert(AArch64::Q31_HI - AArch64::Q0_HI == 31, 523 "Unexpected order of registers"); 524 Reserved.set(AArch64::Q0_HI, AArch64::Q31_HI); 525 526 return Reserved; 527 } 528 529 BitVector 530 AArch64RegisterInfo::getUserReservedRegs(const MachineFunction &MF) const { 531 BitVector Reserved(getNumRegs()); 532 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { 533 // ReserveXRegister is set for registers manually reserved 534 // through +reserve-x#i. 535 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(i)) 536 markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); 537 } 538 return Reserved; 539 } 540 541 BitVector 542 AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { 543 BitVector Reserved(getNumRegs()); 544 for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) { 545 if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReservedForRA(i)) 546 markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i)); 547 } 548 549 if (MF.getSubtarget<AArch64Subtarget>().isLRReservedForRA()) { 550 // In order to prevent the register allocator from using LR, we need to 551 // mark it as reserved. However we don't want to keep it reserved throughout 552 // the pipeline since it prevents other infrastructure from reasoning about 553 // it's liveness. We use the NoVRegs property instead of IsSSA because 554 // IsSSA is removed before VirtRegRewriter runs. 555 if (!MF.getProperties().hasNoVRegs()) 556 markSuperRegs(Reserved, AArch64::LR); 557 } 558 559 assert(checkAllSuperRegsMarked(Reserved)); 560 561 // Handle strictlyReservedRegs separately to avoid re-evaluating the assert, 562 // which becomes considerably expensive when considering the _HI registers. 563 Reserved |= getStrictlyReservedRegs(MF); 564 565 return Reserved; 566 } 567 568 bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, 569 MCRegister Reg) const { 570 return getReservedRegs(MF)[Reg]; 571 } 572 573 bool AArch64RegisterInfo::isUserReservedReg(const MachineFunction &MF, 574 MCRegister Reg) const { 575 return getUserReservedRegs(MF)[Reg]; 576 } 577 578 bool AArch64RegisterInfo::isStrictlyReservedReg(const MachineFunction &MF, 579 MCRegister Reg) const { 580 return getStrictlyReservedRegs(MF)[Reg]; 581 } 582 583 bool AArch64RegisterInfo::isAnyArgRegReserved(const MachineFunction &MF) const { 584 return llvm::any_of(*AArch64::GPR64argRegClass.MC, [this, &MF](MCPhysReg r) { 585 return isStrictlyReservedReg(MF, r); 586 }); 587 } 588 589 void AArch64RegisterInfo::emitReservedArgRegCallError( 590 const MachineFunction &MF) const { 591 const Function &F = MF.getFunction(); 592 F.getContext().diagnose(DiagnosticInfoUnsupported{F, ("AArch64 doesn't support" 593 " function calls if any of the argument registers is reserved.")}); 594 } 595 596 bool AArch64RegisterInfo::isAsmClobberable(const MachineFunction &MF, 597 MCRegister PhysReg) const { 598 // SLH uses register X16 as the taint register but it will fallback to a different 599 // method if the user clobbers it. So X16 is not reserved for inline asm but is 600 // for normal codegen. 601 if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening) && 602 MCRegisterInfo::regsOverlap(PhysReg, AArch64::X16)) 603 return true; 604 605 // ZA/ZT0 registers are reserved but may be permitted in the clobber list. 606 if (PhysReg == AArch64::ZA || PhysReg == AArch64::ZT0) 607 return true; 608 609 return !isReservedReg(MF, PhysReg); 610 } 611 612 const TargetRegisterClass * 613 AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, 614 unsigned Kind) const { 615 return &AArch64::GPR64spRegClass; 616 } 617 618 const TargetRegisterClass * 619 AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { 620 if (RC == &AArch64::CCRRegClass) 621 return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV. 622 return RC; 623 } 624 625 unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; } 626 627 bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { 628 const MachineFrameInfo &MFI = MF.getFrameInfo(); 629 630 // In the presence of variable sized objects or funclets, if the fixed stack 631 // size is large enough that referencing from the FP won't result in things 632 // being in range relatively often, we can use a base pointer to allow access 633 // from the other direction like the SP normally works. 634 // 635 // Furthermore, if both variable sized objects are present, and the 636 // stack needs to be dynamically re-aligned, the base pointer is the only 637 // reliable way to reference the locals. 638 if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) { 639 if (hasStackRealignment(MF)) 640 return true; 641 642 auto &ST = MF.getSubtarget<AArch64Subtarget>(); 643 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 644 if (ST.hasSVE() || ST.isStreaming()) { 645 // Frames that have variable sized objects and scalable SVE objects, 646 // should always use a basepointer. 647 if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) 648 return true; 649 } 650 651 // Frames with hazard padding can have a large offset between the frame 652 // pointer and GPR locals, which includes the emergency spill slot. If the 653 // emergency spill slot is not within range of the load/store instructions 654 // (which have a signed 9-bit range), we will fail to compile if it is used. 655 // Since hasBasePointer() is called before we know if we have hazard padding 656 // or an emergency spill slot we need to enable the basepointer 657 // conservatively. 658 if (ST.getStreamingHazardSize() && 659 !AFI->getSMEFnAttrs().hasNonStreamingInterfaceAndBody()) { 660 return true; 661 } 662 663 // Conservatively estimate whether the negative offset from the frame 664 // pointer will be sufficient to reach. If a function has a smallish 665 // frame, it's less likely to have lots of spills and callee saved 666 // space, so it's all more likely to be within range of the frame pointer. 667 // If it's wrong, we'll materialize the constant and still get to the 668 // object; it's just suboptimal. Negative offsets use the unscaled 669 // load/store instructions, which have a 9-bit signed immediate. 670 return MFI.getLocalFrameSize() >= 256; 671 } 672 673 return false; 674 } 675 676 bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF, 677 MCRegister Reg) const { 678 CallingConv::ID CC = MF.getFunction().getCallingConv(); 679 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>(); 680 bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(), 681 MF.getFunction().isVarArg()); 682 683 auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) { 684 return llvm::is_contained(RegList, Reg); 685 }; 686 687 switch (CC) { 688 default: 689 report_fatal_error("Unsupported calling convention."); 690 case CallingConv::GHC: 691 return HasReg(CC_AArch64_GHC_ArgRegs, Reg); 692 case CallingConv::PreserveNone: 693 if (!MF.getFunction().isVarArg()) 694 return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg); 695 [[fallthrough]]; 696 case CallingConv::C: 697 case CallingConv::Fast: 698 case CallingConv::PreserveMost: 699 case CallingConv::PreserveAll: 700 case CallingConv::CXX_FAST_TLS: 701 case CallingConv::Swift: 702 case CallingConv::SwiftTail: 703 case CallingConv::Tail: 704 if (STI.isTargetWindows()) { 705 if (IsVarArg) 706 return HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); 707 switch (CC) { 708 default: 709 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg); 710 case CallingConv::Swift: 711 case CallingConv::SwiftTail: 712 return HasReg(CC_AArch64_Win64PCS_Swift_ArgRegs, Reg) || 713 HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg); 714 } 715 } 716 if (!STI.isTargetDarwin()) { 717 switch (CC) { 718 default: 719 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); 720 case CallingConv::Swift: 721 case CallingConv::SwiftTail: 722 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg) || 723 HasReg(CC_AArch64_AAPCS_Swift_ArgRegs, Reg); 724 } 725 } 726 if (!IsVarArg) { 727 switch (CC) { 728 default: 729 return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg); 730 case CallingConv::Swift: 731 case CallingConv::SwiftTail: 732 return HasReg(CC_AArch64_DarwinPCS_ArgRegs, Reg) || 733 HasReg(CC_AArch64_DarwinPCS_Swift_ArgRegs, Reg); 734 } 735 } 736 if (STI.isTargetILP32()) 737 return HasReg(CC_AArch64_DarwinPCS_ILP32_VarArg_ArgRegs, Reg); 738 return HasReg(CC_AArch64_DarwinPCS_VarArg_ArgRegs, Reg); 739 case CallingConv::Win64: 740 if (IsVarArg) 741 HasReg(CC_AArch64_Win64_VarArg_ArgRegs, Reg); 742 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg); 743 case CallingConv::CFGuard_Check: 744 return HasReg(CC_AArch64_Win64_CFGuard_Check_ArgRegs, Reg); 745 case CallingConv::AArch64_VectorCall: 746 case CallingConv::AArch64_SVE_VectorCall: 747 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0: 748 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1: 749 case CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2: 750 if (STI.isTargetWindows()) 751 return HasReg(CC_AArch64_Win64PCS_ArgRegs, Reg); 752 return HasReg(CC_AArch64_AAPCS_ArgRegs, Reg); 753 } 754 } 755 756 Register 757 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { 758 const AArch64FrameLowering *TFI = getFrameLowering(MF); 759 return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP; 760 } 761 762 bool AArch64RegisterInfo::requiresRegisterScavenging( 763 const MachineFunction &MF) const { 764 return true; 765 } 766 767 bool AArch64RegisterInfo::requiresVirtualBaseRegisters( 768 const MachineFunction &MF) const { 769 return true; 770 } 771 772 bool 773 AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { 774 // This function indicates whether the emergency spillslot should be placed 775 // close to the beginning of the stackframe (closer to FP) or the end 776 // (closer to SP). 777 // 778 // The beginning works most reliably if we have a frame pointer. 779 // In the presence of any non-constant space between FP and locals, 780 // (e.g. in case of stack realignment or a scalable SVE area), it is 781 // better to use SP or BP. 782 const AArch64FrameLowering &TFI = *getFrameLowering(MF); 783 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 784 assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() || 785 AFI->hasCalculatedStackSizeSVE()) && 786 "Expected SVE area to be calculated by this point"); 787 return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() && 788 !AFI->hasStackHazardSlotIndex(); 789 } 790 791 bool AArch64RegisterInfo::requiresFrameIndexScavenging( 792 const MachineFunction &MF) const { 793 return true; 794 } 795 796 bool 797 AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { 798 const MachineFrameInfo &MFI = MF.getFrameInfo(); 799 if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack()) 800 return true; 801 return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken(); 802 } 803 804 /// needsFrameBaseReg - Returns true if the instruction's frame index 805 /// reference would be better served by a base register other than FP 806 /// or SP. Used by LocalStackFrameAllocation to determine which frame index 807 /// references it should create new base registers for. 808 bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, 809 int64_t Offset) const { 810 for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) 811 assert(i < MI->getNumOperands() && 812 "Instr doesn't have FrameIndex operand!"); 813 814 // It's the load/store FI references that cause issues, as it can be difficult 815 // to materialize the offset if it won't fit in the literal field. Estimate 816 // based on the size of the local frame and some conservative assumptions 817 // about the rest of the stack frame (note, this is pre-regalloc, so 818 // we don't know everything for certain yet) whether this offset is likely 819 // to be out of range of the immediate. Return true if so. 820 821 // We only generate virtual base registers for loads and stores, so 822 // return false for everything else. 823 if (!MI->mayLoad() && !MI->mayStore()) 824 return false; 825 826 // Without a virtual base register, if the function has variable sized 827 // objects, all fixed-size local references will be via the frame pointer, 828 // Approximate the offset and see if it's legal for the instruction. 829 // Note that the incoming offset is based on the SP value at function entry, 830 // so it'll be negative. 831 MachineFunction &MF = *MI->getParent()->getParent(); 832 const AArch64FrameLowering *TFI = getFrameLowering(MF); 833 MachineFrameInfo &MFI = MF.getFrameInfo(); 834 835 // Estimate an offset from the frame pointer. 836 // Conservatively assume all GPR callee-saved registers get pushed. 837 // FP, LR, X19-X28, D8-D15. 64-bits each. 838 int64_t FPOffset = Offset - 16 * 20; 839 // Estimate an offset from the stack pointer. 840 // The incoming offset is relating to the SP at the start of the function, 841 // but when we access the local it'll be relative to the SP after local 842 // allocation, so adjust our SP-relative offset by that allocation size. 843 Offset += MFI.getLocalFrameSize(); 844 // Assume that we'll have at least some spill slots allocated. 845 // FIXME: This is a total SWAG number. We should run some statistics 846 // and pick a real one. 847 Offset += 128; // 128 bytes of spill slots 848 849 // If there is a frame pointer, try using it. 850 // The FP is only available if there is no dynamic realignment. We 851 // don't know for sure yet whether we'll need that, so we guess based 852 // on whether there are any local variables that would trigger it. 853 if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset)) 854 return false; 855 856 // If we can reference via the stack pointer or base pointer, try that. 857 // FIXME: This (and the code that resolves the references) can be improved 858 // to only disallow SP relative references in the live range of 859 // the VLA(s). In practice, it's unclear how much difference that 860 // would make, but it may be worth doing. 861 if (isFrameOffsetLegal(MI, AArch64::SP, Offset)) 862 return false; 863 864 // If even offset 0 is illegal, we don't want a virtual base register. 865 if (!isFrameOffsetLegal(MI, AArch64::SP, 0)) 866 return false; 867 868 // The offset likely isn't legal; we want to allocate a virtual base register. 869 return true; 870 } 871 872 bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 873 Register BaseReg, 874 int64_t Offset) const { 875 assert(MI && "Unable to get the legal offset for nil instruction."); 876 StackOffset SaveOffset = StackOffset::getFixed(Offset); 877 return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; 878 } 879 880 /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx 881 /// at the beginning of the basic block. 882 Register 883 AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 884 int FrameIdx, 885 int64_t Offset) const { 886 MachineBasicBlock::iterator Ins = MBB->begin(); 887 DebugLoc DL; // Defaults to "unknown" 888 if (Ins != MBB->end()) 889 DL = Ins->getDebugLoc(); 890 const MachineFunction &MF = *MBB->getParent(); 891 const AArch64InstrInfo *TII = 892 MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 893 const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); 894 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 895 Register BaseReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 896 MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); 897 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 898 899 BuildMI(*MBB, Ins, DL, MCID, BaseReg) 900 .addFrameIndex(FrameIdx) 901 .addImm(Offset) 902 .addImm(Shifter); 903 904 return BaseReg; 905 } 906 907 void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, 908 int64_t Offset) const { 909 // ARM doesn't need the general 64-bit offsets 910 StackOffset Off = StackOffset::getFixed(Offset); 911 912 unsigned i = 0; 913 while (!MI.getOperand(i).isFI()) { 914 ++i; 915 assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); 916 } 917 918 const MachineFunction *MF = MI.getParent()->getParent(); 919 const AArch64InstrInfo *TII = 920 MF->getSubtarget<AArch64Subtarget>().getInstrInfo(); 921 bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); 922 assert(Done && "Unable to resolve frame index!"); 923 (void)Done; 924 } 925 926 // Create a scratch register for the frame index elimination in an instruction. 927 // This function has special handling of stack tagging loop pseudos, in which 928 // case it can also change the instruction opcode. 929 static Register 930 createScratchRegisterForInstruction(MachineInstr &MI, unsigned FIOperandNum, 931 const AArch64InstrInfo *TII) { 932 // ST*Gloop have a reserved scratch register in operand 1. Use it, and also 933 // replace the instruction with the writeback variant because it will now 934 // satisfy the operand constraints for it. 935 Register ScratchReg; 936 if (MI.getOpcode() == AArch64::STGloop || 937 MI.getOpcode() == AArch64::STZGloop) { 938 assert(FIOperandNum == 3 && 939 "Wrong frame index operand for STGloop/STZGloop"); 940 unsigned Op = MI.getOpcode() == AArch64::STGloop ? AArch64::STGloop_wback 941 : AArch64::STZGloop_wback; 942 ScratchReg = MI.getOperand(1).getReg(); 943 MI.getOperand(3).ChangeToRegister(ScratchReg, false, false, true); 944 MI.setDesc(TII->get(Op)); 945 MI.tieOperands(1, 3); 946 } else { 947 ScratchReg = 948 MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); 949 MI.getOperand(FIOperandNum) 950 .ChangeToRegister(ScratchReg, false, false, true); 951 } 952 return ScratchReg; 953 } 954 955 void AArch64RegisterInfo::getOffsetOpcodes( 956 const StackOffset &Offset, SmallVectorImpl<uint64_t> &Ops) const { 957 // The smallest scalable element supported by scaled SVE addressing 958 // modes are predicates, which are 2 scalable bytes in size. So the scalable 959 // byte offset must always be a multiple of 2. 960 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset"); 961 962 // Add fixed-sized offset using existing DIExpression interface. 963 DIExpression::appendOffset(Ops, Offset.getFixed()); 964 965 unsigned VG = getDwarfRegNum(AArch64::VG, true); 966 int64_t VGSized = Offset.getScalable() / 2; 967 if (VGSized > 0) { 968 Ops.push_back(dwarf::DW_OP_constu); 969 Ops.push_back(VGSized); 970 Ops.append({dwarf::DW_OP_bregx, VG, 0ULL}); 971 Ops.push_back(dwarf::DW_OP_mul); 972 Ops.push_back(dwarf::DW_OP_plus); 973 } else if (VGSized < 0) { 974 Ops.push_back(dwarf::DW_OP_constu); 975 Ops.push_back(-VGSized); 976 Ops.append({dwarf::DW_OP_bregx, VG, 0ULL}); 977 Ops.push_back(dwarf::DW_OP_mul); 978 Ops.push_back(dwarf::DW_OP_minus); 979 } 980 } 981 982 bool AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, 983 int SPAdj, unsigned FIOperandNum, 984 RegScavenger *RS) const { 985 assert(SPAdj == 0 && "Unexpected"); 986 987 MachineInstr &MI = *II; 988 MachineBasicBlock &MBB = *MI.getParent(); 989 MachineFunction &MF = *MBB.getParent(); 990 const MachineFrameInfo &MFI = MF.getFrameInfo(); 991 const AArch64InstrInfo *TII = 992 MF.getSubtarget<AArch64Subtarget>().getInstrInfo(); 993 const AArch64FrameLowering *TFI = getFrameLowering(MF); 994 int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); 995 bool Tagged = 996 MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED; 997 Register FrameReg; 998 999 // Special handling of dbg_value, stackmap patchpoint statepoint instructions. 1000 if (MI.getOpcode() == TargetOpcode::STACKMAP || 1001 MI.getOpcode() == TargetOpcode::PATCHPOINT || 1002 MI.getOpcode() == TargetOpcode::STATEPOINT) { 1003 StackOffset Offset = 1004 TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, 1005 /*PreferFP=*/true, 1006 /*ForSimm=*/false); 1007 Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); 1008 MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); 1009 MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); 1010 return false; 1011 } 1012 1013 if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) { 1014 MachineOperand &FI = MI.getOperand(FIOperandNum); 1015 StackOffset Offset = TFI->getNonLocalFrameIndexReference(MF, FrameIndex); 1016 assert(!Offset.getScalable() && 1017 "Frame offsets with a scalable component are not supported"); 1018 FI.ChangeToImmediate(Offset.getFixed()); 1019 return false; 1020 } 1021 1022 StackOffset Offset; 1023 if (MI.getOpcode() == AArch64::TAGPstack) { 1024 // TAGPstack must use the virtual frame register in its 3rd operand. 1025 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1026 FrameReg = MI.getOperand(3).getReg(); 1027 Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + 1028 AFI->getTaggedBasePointerOffset()); 1029 } else if (Tagged) { 1030 StackOffset SPOffset = StackOffset::getFixed( 1031 MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize()); 1032 if (MFI.hasVarSizedObjects() || 1033 isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) != 1034 (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) { 1035 // Can't update to SP + offset in place. Precalculate the tagged pointer 1036 // in a scratch register. 1037 Offset = TFI->resolveFrameIndexReference( 1038 MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); 1039 Register ScratchReg = 1040 MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); 1041 emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, 1042 TII); 1043 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg) 1044 .addReg(ScratchReg) 1045 .addReg(ScratchReg) 1046 .addImm(0); 1047 MI.getOperand(FIOperandNum) 1048 .ChangeToRegister(ScratchReg, false, false, true); 1049 return false; 1050 } 1051 FrameReg = AArch64::SP; 1052 Offset = StackOffset::getFixed(MFI.getObjectOffset(FrameIndex) + 1053 (int64_t)MFI.getStackSize()); 1054 } else { 1055 Offset = TFI->resolveFrameIndexReference( 1056 MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); 1057 } 1058 1059 // Modify MI as necessary to handle as much of 'Offset' as possible 1060 if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) 1061 return true; 1062 1063 assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) && 1064 "Emergency spill slot is out of reach"); 1065 1066 // If we get here, the immediate doesn't fit into the instruction. We folded 1067 // as much as possible above. Handle the rest, providing a register that is 1068 // SP+LargeImm. 1069 Register ScratchReg = 1070 createScratchRegisterForInstruction(MI, FIOperandNum, TII); 1071 emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); 1072 return false; 1073 } 1074 1075 unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 1076 MachineFunction &MF) const { 1077 const AArch64FrameLowering *TFI = getFrameLowering(MF); 1078 1079 switch (RC->getID()) { 1080 default: 1081 return 0; 1082 case AArch64::GPR32RegClassID: 1083 case AArch64::GPR32spRegClassID: 1084 case AArch64::GPR32allRegClassID: 1085 case AArch64::GPR64spRegClassID: 1086 case AArch64::GPR64allRegClassID: 1087 case AArch64::GPR64RegClassID: 1088 case AArch64::GPR32commonRegClassID: 1089 case AArch64::GPR64commonRegClassID: 1090 return 32 - 1 // XZR/SP 1091 - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP 1092 - MF.getSubtarget<AArch64Subtarget>().getNumXRegisterReserved() 1093 - hasBasePointer(MF); // X19 1094 case AArch64::FPR8RegClassID: 1095 case AArch64::FPR16RegClassID: 1096 case AArch64::FPR32RegClassID: 1097 case AArch64::FPR64RegClassID: 1098 case AArch64::FPR128RegClassID: 1099 return 32; 1100 1101 case AArch64::MatrixIndexGPR32_8_11RegClassID: 1102 case AArch64::MatrixIndexGPR32_12_15RegClassID: 1103 return 4; 1104 1105 case AArch64::DDRegClassID: 1106 case AArch64::DDDRegClassID: 1107 case AArch64::DDDDRegClassID: 1108 case AArch64::QQRegClassID: 1109 case AArch64::QQQRegClassID: 1110 case AArch64::QQQQRegClassID: 1111 return 32; 1112 1113 case AArch64::FPR128_loRegClassID: 1114 case AArch64::FPR64_loRegClassID: 1115 case AArch64::FPR16_loRegClassID: 1116 return 16; 1117 case AArch64::FPR128_0to7RegClassID: 1118 return 8; 1119 } 1120 } 1121 1122 // FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation 1123 // where a consecutive multi-vector tuple is constructed from the same indices 1124 // of multiple strided loads. This may still result in unnecessary copies 1125 // between the loads and the tuple. Here we try to return a hint to assign the 1126 // contiguous ZPRMulReg starting at the same register as the first operand of 1127 // the pseudo, which should be a subregister of the first strided load. 1128 // 1129 // For example, if the first strided load has been assigned $z16_z20_z24_z28 1130 // and the operands of the pseudo are each accessing subregister zsub2, we 1131 // should look through through Order to find a contiguous register which 1132 // begins with $z24 (i.e. $z24_z25_z26_z27). 1133 // 1134 bool AArch64RegisterInfo::getRegAllocationHints( 1135 Register VirtReg, ArrayRef<MCPhysReg> Order, 1136 SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, 1137 const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { 1138 1139 auto &ST = MF.getSubtarget<AArch64Subtarget>(); 1140 if (!ST.hasSME() || !ST.isStreaming()) 1141 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, 1142 VRM); 1143 1144 // The SVE calling convention preserves registers Z8-Z23. As a result, there 1145 // are no ZPR2Strided or ZPR4Strided registers that do not overlap with the 1146 // callee-saved registers and so by default these will be pushed to the back 1147 // of the allocation order for the ZPRStridedOrContiguous classes. 1148 // If any of the instructions which define VirtReg are used by the 1149 // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy 1150 // instructions over reducing the number of clobbered callee-save registers, 1151 // so we add the strided registers as a hint. 1152 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1153 unsigned RegID = MRI.getRegClass(VirtReg)->getID(); 1154 if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID || 1155 RegID == AArch64::ZPR4StridedOrContiguousRegClassID) { 1156 1157 // Look through uses of the register for FORM_TRANSPOSED_REG_TUPLE. 1158 for (const MachineInstr &Use : MRI.use_nodbg_instructions(VirtReg)) { 1159 if (Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO && 1160 Use.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) 1161 continue; 1162 1163 unsigned UseOps = Use.getNumOperands() - 1; 1164 const TargetRegisterClass *StridedRC; 1165 switch (RegID) { 1166 case AArch64::ZPR2StridedOrContiguousRegClassID: 1167 StridedRC = &AArch64::ZPR2StridedRegClass; 1168 break; 1169 case AArch64::ZPR4StridedOrContiguousRegClassID: 1170 StridedRC = &AArch64::ZPR4StridedRegClass; 1171 break; 1172 default: 1173 llvm_unreachable("Unexpected RegID"); 1174 } 1175 1176 SmallVector<MCPhysReg, 4> StridedOrder; 1177 for (MCPhysReg Reg : Order) 1178 if (StridedRC->contains(Reg)) 1179 StridedOrder.push_back(Reg); 1180 1181 int OpIdx = Use.findRegisterUseOperandIdx(VirtReg, this); 1182 assert(OpIdx != -1 && "Expected operand index from register use."); 1183 1184 unsigned TupleID = MRI.getRegClass(Use.getOperand(0).getReg())->getID(); 1185 bool IsMulZPR = TupleID == AArch64::ZPR2Mul2RegClassID || 1186 TupleID == AArch64::ZPR4Mul4RegClassID; 1187 1188 const MachineOperand *AssignedRegOp = llvm::find_if( 1189 make_range(Use.operands_begin() + 1, Use.operands_end()), 1190 [&VRM](const MachineOperand &Op) { 1191 return VRM->hasPhys(Op.getReg()); 1192 }); 1193 1194 // Example: 1195 // 1196 // When trying to find a suitable register allocation for VirtReg %v2 in: 1197 // 1198 // %v0:zpr2stridedorcontiguous = ld1 p0/z, [...] 1199 // %v1:zpr2stridedorcontiguous = ld1 p0/z, [...] 1200 // %v2:zpr2stridedorcontiguous = ld1 p0/z, [...] 1201 // %v3:zpr2stridedorcontiguous = ld1 p0/z, [...] 1202 // %v4:zpr4mul4 = FORM_TRANSPOSED_X4 %v0:0, %v1:0, %v2:0, %v3:0 1203 // 1204 // One such suitable allocation would be: 1205 // 1206 // { z0, z8 } = ld1 p0/z, [...] 1207 // { z1, z9 } = ld1 p0/z, [...] 1208 // { z2, z10 } = ld1 p0/z, [...] 1209 // { z3, z11 } = ld1 p0/z, [...] 1210 // { z0, z1, z2, z3 } = 1211 // FORM_TRANSPOSED_X4 {z0, z8}:0, {z1, z9}:0, {z2, z10}:0, {z3, z11}:0 1212 // 1213 // Below we distinguish two cases when trying to find a register: 1214 // * None of the registers used by FORM_TRANSPOSED_X4 have been assigned 1215 // yet. In this case the code muse ensure that there are at least UseOps 1216 // free consecutive registers. If IsMulZPR is true, then the first of 1217 // registers must also be a multiple of UseOps, e.g. { z0, z1, z2, z3 } 1218 // is valid but { z1, z2, z3, z5 } is not. 1219 // * One or more of the registers used by FORM_TRANSPOSED_X4 is already 1220 // assigned a physical register, which means only checking that a 1221 // consecutive range of free tuple registers exists which includes 1222 // the assigned register. 1223 // e.g. in the example above, if { z0, z8 } is already allocated for 1224 // %v0, we just need to ensure that { z1, z9 }, { z2, z10 } and 1225 // { z3, z11 } are also free. If so, we add { z2, z10 }. 1226 1227 if (AssignedRegOp == Use.operands_end()) { 1228 // There are no registers already assigned to any of the pseudo 1229 // operands. Look for a valid starting register for the group. 1230 for (unsigned I = 0; I < StridedOrder.size(); ++I) { 1231 MCPhysReg Reg = StridedOrder[I]; 1232 1233 // If the FORM_TRANSPOSE nodes use the ZPRMul classes, the starting 1234 // register of the first load should be a multiple of 2 or 4. 1235 unsigned SubRegIdx = Use.getOperand(OpIdx).getSubReg(); 1236 if (IsMulZPR && (getSubReg(Reg, SubRegIdx) - AArch64::Z0) % UseOps != 1237 ((unsigned)OpIdx - 1)) 1238 continue; 1239 1240 // In the example above, if VirtReg is the third operand of the 1241 // tuple (%v2) and Reg == Z2_Z10, then we need to make sure that 1242 // Z0_Z8, Z1_Z9 and Z3_Z11 are also available. 1243 auto IsFreeConsecutiveReg = [&](unsigned UseOp) { 1244 unsigned R = Reg - (OpIdx - 1) + UseOp; 1245 return StridedRC->contains(R) && 1246 (UseOp == 0 || 1247 ((getSubReg(R, AArch64::zsub0) - AArch64::Z0) == 1248 (getSubReg(R - 1, AArch64::zsub0) - AArch64::Z0) + 1)) && 1249 !Matrix->isPhysRegUsed(R); 1250 }; 1251 if (all_of(iota_range<unsigned>(0U, UseOps, /*Inclusive=*/false), 1252 IsFreeConsecutiveReg)) 1253 Hints.push_back(Reg); 1254 } 1255 } else { 1256 // At least one operand already has a physical register assigned. 1257 // Find the starting sub-register of this and use it to work out the 1258 // correct strided register to suggest based on the current op index. 1259 MCPhysReg TargetStartReg = 1260 getSubReg(VRM->getPhys(AssignedRegOp->getReg()), AArch64::zsub0) + 1261 (OpIdx - AssignedRegOp->getOperandNo()); 1262 1263 for (unsigned I = 0; I < StridedOrder.size(); ++I) 1264 if (getSubReg(StridedOrder[I], AArch64::zsub0) == TargetStartReg) 1265 Hints.push_back(StridedOrder[I]); 1266 } 1267 1268 if (!Hints.empty()) 1269 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, 1270 MF, VRM); 1271 } 1272 } 1273 1274 for (MachineInstr &MI : MRI.def_instructions(VirtReg)) { 1275 if (MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO && 1276 MI.getOpcode() != AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO) 1277 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, 1278 MF, VRM); 1279 1280 unsigned FirstOpSubReg = MI.getOperand(1).getSubReg(); 1281 switch (FirstOpSubReg) { 1282 case AArch64::zsub0: 1283 case AArch64::zsub1: 1284 case AArch64::zsub2: 1285 case AArch64::zsub3: 1286 break; 1287 default: 1288 continue; 1289 } 1290 1291 // Look up the physical register mapped to the first operand of the pseudo. 1292 Register FirstOpVirtReg = MI.getOperand(1).getReg(); 1293 if (!VRM->hasPhys(FirstOpVirtReg)) 1294 continue; 1295 1296 MCRegister TupleStartReg = 1297 getSubReg(VRM->getPhys(FirstOpVirtReg), FirstOpSubReg); 1298 for (unsigned I = 0; I < Order.size(); ++I) 1299 if (MCRegister R = getSubReg(Order[I], AArch64::zsub0)) 1300 if (R == TupleStartReg) 1301 Hints.push_back(Order[I]); 1302 } 1303 1304 return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, 1305 VRM); 1306 } 1307 1308 unsigned AArch64RegisterInfo::getLocalAddressRegister( 1309 const MachineFunction &MF) const { 1310 const auto &MFI = MF.getFrameInfo(); 1311 if (!MF.hasEHFunclets() && !MFI.hasVarSizedObjects()) 1312 return AArch64::SP; 1313 else if (hasStackRealignment(MF)) 1314 return getBaseRegister(); 1315 return getFrameRegister(MF); 1316 } 1317 1318 /// SrcRC and DstRC will be morphed into NewRC if this returns true 1319 bool AArch64RegisterInfo::shouldCoalesce( 1320 MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, 1321 const TargetRegisterClass *DstRC, unsigned DstSubReg, 1322 const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { 1323 MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); 1324 1325 if (MI->isCopy() && 1326 ((DstRC->getID() == AArch64::GPR64RegClassID) || 1327 (DstRC->getID() == AArch64::GPR64commonRegClassID)) && 1328 MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg()) 1329 // Do not coalesce in the case of a 32-bit subregister copy 1330 // which implements a 32 to 64 bit zero extension 1331 // which relies on the upper 32 bits being zeroed. 1332 return false; 1333 1334 auto IsCoalescerBarrier = [](const MachineInstr &MI) { 1335 switch (MI.getOpcode()) { 1336 case AArch64::COALESCER_BARRIER_FPR16: 1337 case AArch64::COALESCER_BARRIER_FPR32: 1338 case AArch64::COALESCER_BARRIER_FPR64: 1339 case AArch64::COALESCER_BARRIER_FPR128: 1340 return true; 1341 default: 1342 return false; 1343 } 1344 }; 1345 1346 // For calls that temporarily have to toggle streaming mode as part of the 1347 // call-sequence, we need to be more careful when coalescing copy instructions 1348 // so that we don't end up coalescing the NEON/FP result or argument register 1349 // with a whole Z-register, such that after coalescing the register allocator 1350 // will try to spill/reload the entire Z register. 1351 // 1352 // We do this by checking if the node has any defs/uses that are 1353 // COALESCER_BARRIER pseudos. These are 'nops' in practice, but they exist to 1354 // instruct the coalescer to avoid coalescing the copy. 1355 if (MI->isCopy() && SubReg != DstSubReg && 1356 (AArch64::ZPRRegClass.hasSubClassEq(DstRC) || 1357 AArch64::ZPRRegClass.hasSubClassEq(SrcRC))) { 1358 unsigned SrcReg = MI->getOperand(1).getReg(); 1359 if (any_of(MRI.def_instructions(SrcReg), IsCoalescerBarrier)) 1360 return false; 1361 unsigned DstReg = MI->getOperand(0).getReg(); 1362 if (any_of(MRI.use_nodbg_instructions(DstReg), IsCoalescerBarrier)) 1363 return false; 1364 } 1365 1366 return true; 1367 } 1368 1369 bool AArch64RegisterInfo::shouldAnalyzePhysregInMachineLoopInfo( 1370 MCRegister R) const { 1371 return R == AArch64::VG; 1372 } 1373 1374 bool AArch64RegisterInfo::isIgnoredCVReg(MCRegister LLVMReg) const { 1375 return (LLVMReg >= AArch64::Z0 && LLVMReg <= AArch64::Z31) || 1376 (LLVMReg >= AArch64::P0 && LLVMReg <= AArch64::P15); 1377 } 1378