1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64Subtarget.h" 14 15 #include "AArch64.h" 16 #include "AArch64InstrInfo.h" 17 #include "AArch64PBQPRegAlloc.h" 18 #include "AArch64TargetMachine.h" 19 #include "GISel/AArch64CallLowering.h" 20 #include "GISel/AArch64LegalizerInfo.h" 21 #include "GISel/AArch64RegisterBankInfo.h" 22 #include "MCTargetDesc/AArch64AddressingModes.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineScheduler.h" 26 #include "llvm/IR/GlobalValue.h" 27 #include "llvm/Support/SipHash.h" 28 #include "llvm/TargetParser/AArch64TargetParser.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "aarch64-subtarget" 33 34 #define GET_SUBTARGETINFO_CTOR 35 #define GET_SUBTARGETINFO_TARGET_DESC 36 #include "AArch64GenSubtargetInfo.inc" 37 38 static cl::opt<bool> 39 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 40 "converter pass"), cl::init(true), cl::Hidden); 41 42 // If OS supports TBI, use this flag to enable it. 43 static cl::opt<bool> 44 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 45 "an address is ignored"), cl::init(false), cl::Hidden); 46 47 static cl::opt<bool> MachOUseNonLazyBind( 48 "aarch64-macho-enable-nonlazybind", 49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"), 50 cl::Hidden); 51 52 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), 53 cl::desc("Enable the use of AA during codegen.")); 54 55 static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost( 56 "aarch64-insert-extract-base-cost", 57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden); 58 59 // Reserve a list of X# registers, so they are unavailable for register 60 // allocator, but can still be used as ABI requests, such as passing arguments 61 // to function call. 62 static cl::list<std::string> 63 ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " 64 "registers, so they can't be used by register allocator. " 65 "Should only be used for testing register allocator."), 66 cl::CommaSeparated, cl::Hidden); 67 68 static cl::opt<AArch64PAuth::AuthCheckMethod> 69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method", 70 cl::Hidden, 71 cl::desc("Override the variant of check applied " 72 "to authenticated LR during tail call"), 73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); 74 75 static cl::opt<unsigned> AArch64MinimumJumpTableEntries( 76 "aarch64-min-jump-table-entries", cl::init(10), cl::Hidden, 77 cl::desc("Set minimum number of entries to use a jump table on AArch64")); 78 79 static cl::opt<unsigned> AArch64StreamingHazardSize( 80 "aarch64-streaming-hazard-size", 81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."), 82 cl::init(0), cl::Hidden); 83 84 static cl::alias AArch64StreamingStackHazardSize( 85 "aarch64-stack-hazard-size", 86 cl::desc("alias for -aarch64-streaming-hazard-size"), 87 cl::aliasopt(AArch64StreamingHazardSize)); 88 89 static cl::opt<bool> EnableZPRPredicateSpills( 90 "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden, 91 cl::desc( 92 "Enables spilling/reloading SVE predicates as data vectors (ZPRs)")); 93 94 // Subreg liveness tracking is disabled by default for now until all issues 95 // are ironed out. This option allows the feature to be used in tests. 96 static cl::opt<bool> 97 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking", 98 cl::init(false), cl::Hidden, 99 cl::desc("Enable subreg liveness tracking")); 100 101 static cl::opt<bool> 102 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(false), cl::Hidden, 103 cl::desc("Prefer add+cnt over addvl/inc/dec")); 104 105 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { 106 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) 107 return OverrideVectorInsertExtractBaseCost; 108 return VectorInsertExtractBaseCost; 109 } 110 111 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( 112 StringRef FS, StringRef CPUString, StringRef TuneCPUString, 113 bool HasMinSize) { 114 // Determine default and user-specified characteristics 115 116 if (CPUString.empty()) 117 CPUString = "generic"; 118 119 if (TuneCPUString.empty()) 120 TuneCPUString = CPUString; 121 122 ParseSubtargetFeatures(CPUString, TuneCPUString, FS); 123 initializeProperties(HasMinSize); 124 125 return *this; 126 } 127 128 void AArch64Subtarget::initializeProperties(bool HasMinSize) { 129 // Initialize CPU specific properties. We should add a tablegen feature for 130 // this in the future so we can specify it together with the subtarget 131 // features. 132 switch (ARMProcFamily) { 133 case Generic: 134 // Using TuneCPU=generic we avoid ldapur instructions to line up with the 135 // cpus that use the AvoidLDAPUR feature. We don't want this to be on 136 // forever, so it is enabled between armv8.4 and armv8.7/armv9.2. 137 if (hasV8_4aOps() && !hasV8_8aOps()) 138 AvoidLDAPUR = true; 139 break; 140 case Carmel: 141 CacheLineSize = 64; 142 break; 143 case CortexA35: 144 case CortexA53: 145 case CortexA55: 146 case CortexR82: 147 case CortexR82AE: 148 PrefFunctionAlignment = Align(16); 149 PrefLoopAlignment = Align(16); 150 MaxBytesForLoopAlignment = 8; 151 break; 152 case CortexA57: 153 MaxInterleaveFactor = 4; 154 PrefFunctionAlignment = Align(16); 155 PrefLoopAlignment = Align(16); 156 MaxBytesForLoopAlignment = 8; 157 break; 158 case CortexA65: 159 PrefFunctionAlignment = Align(8); 160 break; 161 case CortexA72: 162 case CortexA73: 163 case CortexA75: 164 PrefFunctionAlignment = Align(16); 165 PrefLoopAlignment = Align(16); 166 MaxBytesForLoopAlignment = 8; 167 break; 168 case CortexA76: 169 case CortexA77: 170 case CortexA78: 171 case CortexA78AE: 172 case CortexA78C: 173 case CortexX1: 174 PrefFunctionAlignment = Align(16); 175 PrefLoopAlignment = Align(32); 176 MaxBytesForLoopAlignment = 16; 177 break; 178 case CortexA320: 179 case CortexA510: 180 case CortexA520: 181 PrefFunctionAlignment = Align(16); 182 VScaleForTuning = 1; 183 PrefLoopAlignment = Align(16); 184 MaxBytesForLoopAlignment = 8; 185 break; 186 case CortexA710: 187 case CortexA715: 188 case CortexA720: 189 case CortexA725: 190 case CortexX2: 191 case CortexX3: 192 case CortexX4: 193 case CortexX925: 194 PrefFunctionAlignment = Align(16); 195 VScaleForTuning = 1; 196 PrefLoopAlignment = Align(32); 197 MaxBytesForLoopAlignment = 16; 198 break; 199 case A64FX: 200 CacheLineSize = 256; 201 PrefFunctionAlignment = Align(8); 202 PrefLoopAlignment = Align(4); 203 MaxInterleaveFactor = 4; 204 PrefetchDistance = 128; 205 MinPrefetchStride = 1024; 206 MaxPrefetchIterationsAhead = 4; 207 VScaleForTuning = 4; 208 break; 209 case MONAKA: 210 VScaleForTuning = 2; 211 break; 212 case AppleA7: 213 case AppleA10: 214 case AppleA11: 215 case AppleA12: 216 case AppleA13: 217 case AppleA14: 218 case AppleA15: 219 case AppleA16: 220 case AppleA17: 221 case AppleM4: 222 CacheLineSize = 64; 223 PrefetchDistance = 280; 224 MinPrefetchStride = 2048; 225 MaxPrefetchIterationsAhead = 3; 226 switch (ARMProcFamily) { 227 case AppleA14: 228 case AppleA15: 229 case AppleA16: 230 case AppleA17: 231 case AppleM4: 232 MaxInterleaveFactor = 4; 233 break; 234 default: 235 break; 236 } 237 break; 238 case ExynosM3: 239 MaxInterleaveFactor = 4; 240 MaxJumpTableSize = 20; 241 PrefFunctionAlignment = Align(32); 242 PrefLoopAlignment = Align(16); 243 break; 244 case Falkor: 245 MaxInterleaveFactor = 4; 246 // FIXME: remove this to enable 64-bit SLP if performance looks good. 247 MinVectorRegisterBitWidth = 128; 248 CacheLineSize = 128; 249 PrefetchDistance = 820; 250 MinPrefetchStride = 2048; 251 MaxPrefetchIterationsAhead = 8; 252 break; 253 case Kryo: 254 MaxInterleaveFactor = 4; 255 VectorInsertExtractBaseCost = 2; 256 CacheLineSize = 128; 257 PrefetchDistance = 740; 258 MinPrefetchStride = 1024; 259 MaxPrefetchIterationsAhead = 11; 260 // FIXME: remove this to enable 64-bit SLP if performance looks good. 261 MinVectorRegisterBitWidth = 128; 262 break; 263 case NeoverseE1: 264 PrefFunctionAlignment = Align(8); 265 break; 266 case NeoverseN1: 267 PrefFunctionAlignment = Align(16); 268 PrefLoopAlignment = Align(32); 269 MaxBytesForLoopAlignment = 16; 270 break; 271 case NeoverseV2: 272 case NeoverseV3: 273 CacheLineSize = 64; 274 EpilogueVectorizationMinVF = 8; 275 MaxInterleaveFactor = 4; 276 ScatterOverhead = 13; 277 LLVM_FALLTHROUGH; 278 case NeoverseN2: 279 case NeoverseN3: 280 PrefFunctionAlignment = Align(16); 281 PrefLoopAlignment = Align(32); 282 MaxBytesForLoopAlignment = 16; 283 VScaleForTuning = 1; 284 break; 285 case NeoverseV1: 286 PrefFunctionAlignment = Align(16); 287 PrefLoopAlignment = Align(32); 288 MaxBytesForLoopAlignment = 16; 289 VScaleForTuning = 2; 290 DefaultSVETFOpts = TailFoldingOpts::Simple; 291 break; 292 case Neoverse512TVB: 293 PrefFunctionAlignment = Align(16); 294 VScaleForTuning = 1; 295 MaxInterleaveFactor = 4; 296 break; 297 case Saphira: 298 MaxInterleaveFactor = 4; 299 // FIXME: remove this to enable 64-bit SLP if performance looks good. 300 MinVectorRegisterBitWidth = 128; 301 break; 302 case ThunderX2T99: 303 CacheLineSize = 64; 304 PrefFunctionAlignment = Align(8); 305 PrefLoopAlignment = Align(4); 306 MaxInterleaveFactor = 4; 307 PrefetchDistance = 128; 308 MinPrefetchStride = 1024; 309 MaxPrefetchIterationsAhead = 4; 310 // FIXME: remove this to enable 64-bit SLP if performance looks good. 311 MinVectorRegisterBitWidth = 128; 312 break; 313 case ThunderX: 314 case ThunderXT88: 315 case ThunderXT81: 316 case ThunderXT83: 317 CacheLineSize = 128; 318 PrefFunctionAlignment = Align(8); 319 PrefLoopAlignment = Align(4); 320 // FIXME: remove this to enable 64-bit SLP if performance looks good. 321 MinVectorRegisterBitWidth = 128; 322 break; 323 case TSV110: 324 CacheLineSize = 64; 325 PrefFunctionAlignment = Align(16); 326 PrefLoopAlignment = Align(4); 327 break; 328 case ThunderX3T110: 329 CacheLineSize = 64; 330 PrefFunctionAlignment = Align(16); 331 PrefLoopAlignment = Align(4); 332 MaxInterleaveFactor = 4; 333 PrefetchDistance = 128; 334 MinPrefetchStride = 1024; 335 MaxPrefetchIterationsAhead = 4; 336 // FIXME: remove this to enable 64-bit SLP if performance looks good. 337 MinVectorRegisterBitWidth = 128; 338 break; 339 case Ampere1: 340 case Ampere1A: 341 case Ampere1B: 342 CacheLineSize = 64; 343 PrefFunctionAlignment = Align(64); 344 PrefLoopAlignment = Align(64); 345 MaxInterleaveFactor = 4; 346 break; 347 case Oryon: 348 CacheLineSize = 64; 349 PrefFunctionAlignment = Align(16); 350 MaxInterleaveFactor = 4; 351 PrefetchDistance = 128; 352 MinPrefetchStride = 1024; 353 break; 354 case Olympus: 355 EpilogueVectorizationMinVF = 8; 356 MaxInterleaveFactor = 4; 357 ScatterOverhead = 13; 358 PrefFunctionAlignment = Align(16); 359 PrefLoopAlignment = Align(32); 360 MaxBytesForLoopAlignment = 16; 361 VScaleForTuning = 1; 362 break; 363 } 364 365 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) 366 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; 367 } 368 369 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, 370 StringRef TuneCPU, StringRef FS, 371 const TargetMachine &TM, bool LittleEndian, 372 unsigned MinSVEVectorSizeInBitsOverride, 373 unsigned MaxSVEVectorSizeInBitsOverride, 374 bool IsStreaming, bool IsStreamingCompatible, 375 bool HasMinSize) 376 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), 377 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 378 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), 379 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 380 IsLittle(LittleEndian), IsStreaming(IsStreaming), 381 IsStreamingCompatible(IsStreamingCompatible), 382 StreamingHazardSize( 383 AArch64StreamingHazardSize.getNumOccurrences() > 0 384 ? std::optional<unsigned>(AArch64StreamingHazardSize) 385 : std::nullopt), 386 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), 387 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), 388 InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), 389 TLInfo(TM, *this) { 390 if (AArch64::isX18ReservedByDefault(TT)) 391 ReserveXRegister.set(18); 392 393 CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 394 InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 395 Legalizer.reset(new AArch64LegalizerInfo(*this)); 396 397 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 398 399 // FIXME: At this point, we can't rely on Subtarget having RBI. 400 // It's awkward to mix passing RBI and the Subtarget; should we pass 401 // TII/TRI as well? 402 InstSelector.reset(createAArch64InstructionSelector( 403 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 404 405 RegBankInfo.reset(RBI); 406 407 auto TRI = getRegisterInfo(); 408 StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA); 409 for (unsigned i = 0; i < 29; ++i) { 410 if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i))) 411 ReserveXRegisterForRA.set(i); 412 } 413 // X30 is named LR, so we can't use TRI->getName to check X30. 414 if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR")) 415 ReserveXRegisterForRA.set(30); 416 // X29 is named FP, so we can't use TRI->getName to check X29. 417 if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) 418 ReserveXRegisterForRA.set(29); 419 420 EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); 421 } 422 423 unsigned AArch64Subtarget::getHwModeSet() const { 424 AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode; 425 426 // Use a special hardware mode in streaming[-compatible] functions with 427 // aarch64-enable-zpr-predicate-spills. This changes the spill size (and 428 // alignment) for the predicate register class. 429 if (EnableZPRPredicateSpills.getValue() && 430 (isStreaming() || isStreamingCompatible())) { 431 Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills; 432 } 433 434 return to_underlying(Modes); 435 } 436 437 const CallLowering *AArch64Subtarget::getCallLowering() const { 438 return CallLoweringInfo.get(); 439 } 440 441 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 442 return InlineAsmLoweringInfo.get(); 443 } 444 445 InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 446 return InstSelector.get(); 447 } 448 449 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 450 return Legalizer.get(); 451 } 452 453 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 454 return RegBankInfo.get(); 455 } 456 457 /// Find the target operand flags that describe how a global value should be 458 /// referenced for the current subtarget. 459 unsigned 460 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 461 const TargetMachine &TM) const { 462 // MachO large model always goes via a GOT, simply to get a single 8-byte 463 // absolute relocation on all global addresses. 464 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 465 return AArch64II::MO_GOT; 466 467 // All globals dynamically protected by MTE must have their address tags 468 // synthesized. This is done by having the loader stash the tag in the GOT 469 // entry. Force all tagged globals (even ones with internal linkage) through 470 // the GOT. 471 if (GV->isTagged()) 472 return AArch64II::MO_GOT; 473 474 if (!TM.shouldAssumeDSOLocal(GV)) { 475 if (GV->hasDLLImportStorageClass()) { 476 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 477 } 478 if (getTargetTriple().isOSWindows()) 479 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 480 return AArch64II::MO_GOT; 481 } 482 483 // The small code model's direct accesses use ADRP, which cannot 484 // necessarily produce the value 0 (if the code is above 4GB). 485 // Same for the tiny code model, where we have a pc relative LDR. 486 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 487 GV->hasExternalWeakLinkage()) 488 return AArch64II::MO_GOT; 489 490 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 491 // that their nominal addresses are tagged and outside of the code model. In 492 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 493 // tag if necessary based on MO_TAGGED. 494 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 495 return AArch64II::MO_NC | AArch64II::MO_TAGGED; 496 497 return AArch64II::MO_NO_FLAG; 498 } 499 500 unsigned AArch64Subtarget::classifyGlobalFunctionReference( 501 const GlobalValue *GV, const TargetMachine &TM) const { 502 // MachO large model always goes via a GOT, because we don't have the 503 // relocations available to do anything else.. 504 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 505 !GV->hasInternalLinkage()) 506 return AArch64II::MO_GOT; 507 508 // NonLazyBind goes via GOT unless we know it's available locally. 509 auto *F = dyn_cast<Function>(GV); 510 if ((!isTargetMachO() || MachOUseNonLazyBind) && F && 511 F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV)) 512 return AArch64II::MO_GOT; 513 514 if (getTargetTriple().isOSWindows()) { 515 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) { 516 if (GV->hasDLLImportStorageClass()) { 517 // On Arm64EC, if we're calling a symbol from the import table 518 // directly, use MO_ARM64EC_CALLMANGLE. 519 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT | 520 AArch64II::MO_ARM64EC_CALLMANGLE; 521 } 522 if (GV->hasExternalLinkage()) { 523 // If we're calling a symbol directly, use the mangled form in the 524 // call instruction. 525 return AArch64II::MO_ARM64EC_CALLMANGLE; 526 } 527 } 528 529 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 530 return ClassifyGlobalReference(GV, TM); 531 } 532 533 return AArch64II::MO_NO_FLAG; 534 } 535 536 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 537 unsigned NumRegionInstrs) const { 538 // LNT run (at least on Cyclone) showed reasonably significant gains for 539 // bi-directional scheduling. 253.perlbmk. 540 Policy.OnlyTopDown = false; 541 Policy.OnlyBottomUp = false; 542 // Enabling or Disabling the latency heuristic is a close call: It seems to 543 // help nearly no benchmark on out-of-order architectures, on the other hand 544 // it regresses register pressure on a few benchmarking. 545 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 546 } 547 548 void AArch64Subtarget::adjustSchedDependency( 549 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, 550 const TargetSchedModel *SchedModel) const { 551 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() || 552 !Def->isInstr() || !Use->isInstr() || 553 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE && 554 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE)) 555 return; 556 557 // If the Def is a BUNDLE, find the last instruction in the bundle that defs 558 // the register. 559 const MachineInstr *DefMI = Def->getInstr(); 560 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) { 561 Register Reg = DefMI->getOperand(DefOpIdx).getReg(); 562 for (const auto &Op : const_mi_bundle_ops(*DefMI)) { 563 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) { 564 DefMI = Op.getParent(); 565 DefOpIdx = Op.getOperandNo(); 566 } 567 } 568 } 569 570 // If the Use is a BUNDLE, find the first instruction that uses the Reg. 571 const MachineInstr *UseMI = Use->getInstr(); 572 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) { 573 Register Reg = UseMI->getOperand(UseOpIdx).getReg(); 574 for (const auto &Op : const_mi_bundle_ops(*UseMI)) { 575 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) { 576 UseMI = Op.getParent(); 577 UseOpIdx = Op.getOperandNo(); 578 break; 579 } 580 } 581 } 582 583 Dep.setLatency( 584 SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx)); 585 } 586 587 bool AArch64Subtarget::enableEarlyIfConversion() const { 588 return EnableEarlyIfConvert; 589 } 590 591 bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 592 if (!UseAddressTopByteIgnored) 593 return false; 594 595 if (TargetTriple.isDriverKit()) 596 return true; 597 if (TargetTriple.isiOS()) { 598 return TargetTriple.getiOSVersion() >= VersionTuple(8); 599 } 600 601 return false; 602 } 603 604 std::unique_ptr<PBQPRAConstraint> 605 AArch64Subtarget::getCustomPBQPConstraints() const { 606 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 607 } 608 609 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 610 // We usually compute max call frame size after ISel. Do the computation now 611 // if the .mir file didn't specify it. Note that this will probably give you 612 // bogus values after PEI has eliminated the callframe setup/destroy pseudo 613 // instructions, specify explicitly if you need it to be correct. 614 MachineFrameInfo &MFI = MF.getFrameInfo(); 615 if (!MFI.isMaxCallFrameSizeComputed()) 616 MFI.computeMaxCallFrameSize(MF); 617 } 618 619 bool AArch64Subtarget::useAA() const { return UseAA; } 620 621 bool AArch64Subtarget::useScalarIncVL() const { 622 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL 623 // is not otherwise set, enable it by default. 624 if (UseScalarIncVL.getNumOccurrences()) 625 return UseScalarIncVL; 626 return hasSVE2() || hasSME(); 627 } 628 629 // If return address signing is enabled, tail calls are emitted as follows: 630 // 631 // ``` 632 // <authenticate LR> 633 // <check LR> 634 // TCRETURN ; the callee may sign and spill the LR in its prologue 635 // ``` 636 // 637 // LR may require explicit checking because if FEAT_FPAC is not implemented 638 // and LR was tampered with, then `<authenticate LR>` will not generate an 639 // exception on its own. Later, if the callee spills the signed LR value and 640 // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces 641 // the higher bits of LR thus hiding the authentication failure. 642 AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod( 643 const MachineFunction &MF) const { 644 // TODO: Check subtarget for the scheme. Present variant is a default for 645 // pauthtest ABI. 646 if (MF.getFunction().hasFnAttribute("ptrauth-returns") && 647 MF.getFunction().hasFnAttribute("ptrauth-auth-traps")) 648 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI; 649 if (AuthenticatedLRCheckMethod.getNumOccurrences()) 650 return AuthenticatedLRCheckMethod; 651 652 // At now, use None by default because checks may introduce an unexpected 653 // performance regression or incompatibility with execute-only mappings. 654 return AArch64PAuth::AuthCheckMethod::None; 655 } 656 657 std::optional<uint16_t> 658 AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled( 659 const Function &ParentFn) const { 660 if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos")) 661 return std::nullopt; 662 // We currently have one simple mechanism for all targets. 663 // This isn't ABI, so we can always do better in the future. 664 return getPointerAuthStableSipHash( 665 (Twine(ParentFn.getName()) + " blockaddress").str()); 666 } 667 668 bool AArch64Subtarget::isX16X17Safer() const { 669 // The Darwin kernel implements special protections for x16 and x17 so we 670 // should prefer to use those registers on that platform. 671 return isTargetDarwin(); 672 } 673 674 bool AArch64Subtarget::enableMachinePipeliner() const { 675 return getSchedModel().hasInstrSchedModel(); 676 } 677