1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64Subtarget.h"
14
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
19 #include "GISel/AArch64CallLowering.h"
20 #include "GISel/AArch64LegalizerInfo.h"
21 #include "GISel/AArch64RegisterBankInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineScheduler.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/Support/SipHash.h"
28 #include "llvm/TargetParser/AArch64TargetParser.h"
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "aarch64-subtarget"
33
34 #define GET_SUBTARGETINFO_CTOR
35 #define GET_SUBTARGETINFO_TARGET_DESC
36 #include "AArch64GenSubtargetInfo.inc"
37
38 static cl::opt<bool>
39 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
40 "converter pass"), cl::init(true), cl::Hidden);
41
42 // If OS supports TBI, use this flag to enable it.
43 static cl::opt<bool>
44 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
45 "an address is ignored"), cl::init(false), cl::Hidden);
46
47 static cl::opt<bool> MachOUseNonLazyBind(
48 "aarch64-macho-enable-nonlazybind",
49 cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"),
50 cl::Hidden);
51
52 static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
53 cl::desc("Enable the use of AA during codegen."));
54
55 static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56 "aarch64-insert-extract-base-cost",
57 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58
59 // Reserve a list of X# registers, so they are unavailable for register
60 // allocator, but can still be used as ABI requests, such as passing arguments
61 // to function call.
62 static cl::list<std::string>
63 ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64 "registers, so they can't be used by register allocator. "
65 "Should only be used for testing register allocator."),
66 cl::CommaSeparated, cl::Hidden);
67
68 static cl::opt<AArch64PAuth::AuthCheckMethod>
69 AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
70 cl::Hidden,
71 cl::desc("Override the variant of check applied "
72 "to authenticated LR during tail call"),
73 cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
74
75 static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
76 "aarch64-min-jump-table-entries", cl::init(10), cl::Hidden,
77 cl::desc("Set minimum number of entries to use a jump table on AArch64"));
78
79 static cl::opt<unsigned> AArch64StreamingHazardSize(
80 "aarch64-streaming-hazard-size",
81 cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82 cl::init(0), cl::Hidden);
83
84 static cl::alias AArch64StreamingStackHazardSize(
85 "aarch64-stack-hazard-size",
86 cl::desc("alias for -aarch64-streaming-hazard-size"),
87 cl::aliasopt(AArch64StreamingHazardSize));
88
89 static cl::opt<bool> EnableZPRPredicateSpills(
90 "aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden,
91 cl::desc(
92 "Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
93
94 // Subreg liveness tracking is disabled by default for now until all issues
95 // are ironed out. This option allows the feature to be used in tests.
96 static cl::opt<bool>
97 EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
98 cl::init(false), cl::Hidden,
99 cl::desc("Enable subreg liveness tracking"));
100
101 static cl::opt<bool>
102 UseScalarIncVL("sve-use-scalar-inc-vl", cl::init(false), cl::Hidden,
103 cl::desc("Prefer add+cnt over addvl/inc/dec"));
104
getVectorInsertExtractBaseCost() const105 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
106 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
107 return OverrideVectorInsertExtractBaseCost;
108 return VectorInsertExtractBaseCost;
109 }
110
initializeSubtargetDependencies(StringRef FS,StringRef CPUString,StringRef TuneCPUString,bool HasMinSize)111 AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
112 StringRef FS, StringRef CPUString, StringRef TuneCPUString,
113 bool HasMinSize) {
114 // Determine default and user-specified characteristics
115
116 if (CPUString.empty())
117 CPUString = "generic";
118
119 if (TuneCPUString.empty())
120 TuneCPUString = CPUString;
121
122 ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
123 initializeProperties(HasMinSize);
124
125 return *this;
126 }
127
initializeProperties(bool HasMinSize)128 void AArch64Subtarget::initializeProperties(bool HasMinSize) {
129 // Initialize CPU specific properties. We should add a tablegen feature for
130 // this in the future so we can specify it together with the subtarget
131 // features.
132 switch (ARMProcFamily) {
133 case Generic:
134 // Using TuneCPU=generic we avoid ldapur instructions to line up with the
135 // cpus that use the AvoidLDAPUR feature. We don't want this to be on
136 // forever, so it is enabled between armv8.4 and armv8.7/armv9.2.
137 if (hasV8_4aOps() && !hasV8_8aOps())
138 AvoidLDAPUR = true;
139 break;
140 case Carmel:
141 CacheLineSize = 64;
142 break;
143 case CortexA35:
144 case CortexA53:
145 case CortexA55:
146 case CortexR82:
147 case CortexR82AE:
148 PrefFunctionAlignment = Align(16);
149 PrefLoopAlignment = Align(16);
150 MaxBytesForLoopAlignment = 8;
151 break;
152 case CortexA57:
153 MaxInterleaveFactor = 4;
154 PrefFunctionAlignment = Align(16);
155 PrefLoopAlignment = Align(16);
156 MaxBytesForLoopAlignment = 8;
157 break;
158 case CortexA65:
159 PrefFunctionAlignment = Align(8);
160 break;
161 case CortexA72:
162 case CortexA73:
163 case CortexA75:
164 PrefFunctionAlignment = Align(16);
165 PrefLoopAlignment = Align(16);
166 MaxBytesForLoopAlignment = 8;
167 break;
168 case CortexA76:
169 case CortexA77:
170 case CortexA78:
171 case CortexA78AE:
172 case CortexA78C:
173 case CortexX1:
174 PrefFunctionAlignment = Align(16);
175 PrefLoopAlignment = Align(32);
176 MaxBytesForLoopAlignment = 16;
177 break;
178 case CortexA320:
179 case CortexA510:
180 case CortexA520:
181 PrefFunctionAlignment = Align(16);
182 VScaleForTuning = 1;
183 PrefLoopAlignment = Align(16);
184 MaxBytesForLoopAlignment = 8;
185 break;
186 case CortexA710:
187 case CortexA715:
188 case CortexA720:
189 case CortexA725:
190 case CortexX2:
191 case CortexX3:
192 case CortexX4:
193 case CortexX925:
194 PrefFunctionAlignment = Align(16);
195 VScaleForTuning = 1;
196 PrefLoopAlignment = Align(32);
197 MaxBytesForLoopAlignment = 16;
198 break;
199 case A64FX:
200 CacheLineSize = 256;
201 PrefFunctionAlignment = Align(8);
202 PrefLoopAlignment = Align(4);
203 MaxInterleaveFactor = 4;
204 PrefetchDistance = 128;
205 MinPrefetchStride = 1024;
206 MaxPrefetchIterationsAhead = 4;
207 VScaleForTuning = 4;
208 break;
209 case MONAKA:
210 VScaleForTuning = 2;
211 break;
212 case AppleA7:
213 case AppleA10:
214 case AppleA11:
215 case AppleA12:
216 case AppleA13:
217 case AppleA14:
218 case AppleA15:
219 case AppleA16:
220 case AppleA17:
221 case AppleM4:
222 CacheLineSize = 64;
223 PrefetchDistance = 280;
224 MinPrefetchStride = 2048;
225 MaxPrefetchIterationsAhead = 3;
226 switch (ARMProcFamily) {
227 case AppleA14:
228 case AppleA15:
229 case AppleA16:
230 case AppleA17:
231 case AppleM4:
232 MaxInterleaveFactor = 4;
233 break;
234 default:
235 break;
236 }
237 break;
238 case ExynosM3:
239 MaxInterleaveFactor = 4;
240 MaxJumpTableSize = 20;
241 PrefFunctionAlignment = Align(32);
242 PrefLoopAlignment = Align(16);
243 break;
244 case Falkor:
245 MaxInterleaveFactor = 4;
246 // FIXME: remove this to enable 64-bit SLP if performance looks good.
247 MinVectorRegisterBitWidth = 128;
248 CacheLineSize = 128;
249 PrefetchDistance = 820;
250 MinPrefetchStride = 2048;
251 MaxPrefetchIterationsAhead = 8;
252 break;
253 case Kryo:
254 MaxInterleaveFactor = 4;
255 VectorInsertExtractBaseCost = 2;
256 CacheLineSize = 128;
257 PrefetchDistance = 740;
258 MinPrefetchStride = 1024;
259 MaxPrefetchIterationsAhead = 11;
260 // FIXME: remove this to enable 64-bit SLP if performance looks good.
261 MinVectorRegisterBitWidth = 128;
262 break;
263 case NeoverseE1:
264 PrefFunctionAlignment = Align(8);
265 break;
266 case NeoverseN1:
267 PrefFunctionAlignment = Align(16);
268 PrefLoopAlignment = Align(32);
269 MaxBytesForLoopAlignment = 16;
270 break;
271 case NeoverseV2:
272 case NeoverseV3:
273 CacheLineSize = 64;
274 EpilogueVectorizationMinVF = 8;
275 MaxInterleaveFactor = 4;
276 ScatterOverhead = 13;
277 LLVM_FALLTHROUGH;
278 case NeoverseN2:
279 case NeoverseN3:
280 PrefFunctionAlignment = Align(16);
281 PrefLoopAlignment = Align(32);
282 MaxBytesForLoopAlignment = 16;
283 VScaleForTuning = 1;
284 break;
285 case NeoverseV1:
286 PrefFunctionAlignment = Align(16);
287 PrefLoopAlignment = Align(32);
288 MaxBytesForLoopAlignment = 16;
289 VScaleForTuning = 2;
290 DefaultSVETFOpts = TailFoldingOpts::Simple;
291 break;
292 case Neoverse512TVB:
293 PrefFunctionAlignment = Align(16);
294 VScaleForTuning = 1;
295 MaxInterleaveFactor = 4;
296 break;
297 case Saphira:
298 MaxInterleaveFactor = 4;
299 // FIXME: remove this to enable 64-bit SLP if performance looks good.
300 MinVectorRegisterBitWidth = 128;
301 break;
302 case ThunderX2T99:
303 CacheLineSize = 64;
304 PrefFunctionAlignment = Align(8);
305 PrefLoopAlignment = Align(4);
306 MaxInterleaveFactor = 4;
307 PrefetchDistance = 128;
308 MinPrefetchStride = 1024;
309 MaxPrefetchIterationsAhead = 4;
310 // FIXME: remove this to enable 64-bit SLP if performance looks good.
311 MinVectorRegisterBitWidth = 128;
312 break;
313 case ThunderX:
314 case ThunderXT88:
315 case ThunderXT81:
316 case ThunderXT83:
317 CacheLineSize = 128;
318 PrefFunctionAlignment = Align(8);
319 PrefLoopAlignment = Align(4);
320 // FIXME: remove this to enable 64-bit SLP if performance looks good.
321 MinVectorRegisterBitWidth = 128;
322 break;
323 case TSV110:
324 CacheLineSize = 64;
325 PrefFunctionAlignment = Align(16);
326 PrefLoopAlignment = Align(4);
327 break;
328 case ThunderX3T110:
329 CacheLineSize = 64;
330 PrefFunctionAlignment = Align(16);
331 PrefLoopAlignment = Align(4);
332 MaxInterleaveFactor = 4;
333 PrefetchDistance = 128;
334 MinPrefetchStride = 1024;
335 MaxPrefetchIterationsAhead = 4;
336 // FIXME: remove this to enable 64-bit SLP if performance looks good.
337 MinVectorRegisterBitWidth = 128;
338 break;
339 case Ampere1:
340 case Ampere1A:
341 case Ampere1B:
342 CacheLineSize = 64;
343 PrefFunctionAlignment = Align(64);
344 PrefLoopAlignment = Align(64);
345 MaxInterleaveFactor = 4;
346 break;
347 case Oryon:
348 CacheLineSize = 64;
349 PrefFunctionAlignment = Align(16);
350 MaxInterleaveFactor = 4;
351 PrefetchDistance = 128;
352 MinPrefetchStride = 1024;
353 break;
354 case Olympus:
355 EpilogueVectorizationMinVF = 8;
356 MaxInterleaveFactor = 4;
357 ScatterOverhead = 13;
358 PrefFunctionAlignment = Align(16);
359 PrefLoopAlignment = Align(32);
360 MaxBytesForLoopAlignment = 16;
361 VScaleForTuning = 1;
362 break;
363 }
364
365 if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
366 MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
367 }
368
AArch64Subtarget(const Triple & TT,StringRef CPU,StringRef TuneCPU,StringRef FS,const TargetMachine & TM,bool LittleEndian,unsigned MinSVEVectorSizeInBitsOverride,unsigned MaxSVEVectorSizeInBitsOverride,bool IsStreaming,bool IsStreamingCompatible,bool HasMinSize)369 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
370 StringRef TuneCPU, StringRef FS,
371 const TargetMachine &TM, bool LittleEndian,
372 unsigned MinSVEVectorSizeInBitsOverride,
373 unsigned MaxSVEVectorSizeInBitsOverride,
374 bool IsStreaming, bool IsStreamingCompatible,
375 bool HasMinSize)
376 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
377 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
378 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
379 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
380 IsLittle(LittleEndian), IsStreaming(IsStreaming),
381 IsStreamingCompatible(IsStreamingCompatible),
382 StreamingHazardSize(
383 AArch64StreamingHazardSize.getNumOccurrences() > 0
384 ? std::optional<unsigned>(AArch64StreamingHazardSize)
385 : std::nullopt),
386 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
387 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
388 InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
389 TLInfo(TM, *this) {
390 if (AArch64::isX18ReservedByDefault(TT))
391 ReserveXRegister.set(18);
392
393 CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
394 InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
395 Legalizer.reset(new AArch64LegalizerInfo(*this));
396
397 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
398
399 // FIXME: At this point, we can't rely on Subtarget having RBI.
400 // It's awkward to mix passing RBI and the Subtarget; should we pass
401 // TII/TRI as well?
402 InstSelector.reset(createAArch64InstructionSelector(
403 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
404
405 RegBankInfo.reset(RBI);
406
407 auto TRI = getRegisterInfo();
408 StringSet<> ReservedRegNames(llvm::from_range, ReservedRegsForRA);
409 for (unsigned i = 0; i < 29; ++i) {
410 if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
411 ReserveXRegisterForRA.set(i);
412 }
413 // X30 is named LR, so we can't use TRI->getName to check X30.
414 if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
415 ReserveXRegisterForRA.set(30);
416 // X29 is named FP, so we can't use TRI->getName to check X29.
417 if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
418 ReserveXRegisterForRA.set(29);
419
420 EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
421 }
422
getHwModeSet() const423 unsigned AArch64Subtarget::getHwModeSet() const {
424 AArch64HwModeBits Modes = AArch64HwModeBits::DefaultMode;
425
426 // Use a special hardware mode in streaming[-compatible] functions with
427 // aarch64-enable-zpr-predicate-spills. This changes the spill size (and
428 // alignment) for the predicate register class.
429 if (EnableZPRPredicateSpills.getValue() &&
430 (isStreaming() || isStreamingCompatible())) {
431 Modes |= AArch64HwModeBits::SMEWithZPRPredicateSpills;
432 }
433
434 return to_underlying(Modes);
435 }
436
getCallLowering() const437 const CallLowering *AArch64Subtarget::getCallLowering() const {
438 return CallLoweringInfo.get();
439 }
440
getInlineAsmLowering() const441 const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
442 return InlineAsmLoweringInfo.get();
443 }
444
getInstructionSelector() const445 InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
446 return InstSelector.get();
447 }
448
getLegalizerInfo() const449 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
450 return Legalizer.get();
451 }
452
getRegBankInfo() const453 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
454 return RegBankInfo.get();
455 }
456
457 /// Find the target operand flags that describe how a global value should be
458 /// referenced for the current subtarget.
459 unsigned
ClassifyGlobalReference(const GlobalValue * GV,const TargetMachine & TM) const460 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
461 const TargetMachine &TM) const {
462 // MachO large model always goes via a GOT, simply to get a single 8-byte
463 // absolute relocation on all global addresses.
464 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
465 return AArch64II::MO_GOT;
466
467 // All globals dynamically protected by MTE must have their address tags
468 // synthesized. This is done by having the loader stash the tag in the GOT
469 // entry. Force all tagged globals (even ones with internal linkage) through
470 // the GOT.
471 if (GV->isTagged())
472 return AArch64II::MO_GOT;
473
474 if (!TM.shouldAssumeDSOLocal(GV)) {
475 if (GV->hasDLLImportStorageClass()) {
476 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
477 }
478 if (getTargetTriple().isOSWindows())
479 return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
480 return AArch64II::MO_GOT;
481 }
482
483 // The small code model's direct accesses use ADRP, which cannot
484 // necessarily produce the value 0 (if the code is above 4GB).
485 // Same for the tiny code model, where we have a pc relative LDR.
486 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
487 GV->hasExternalWeakLinkage())
488 return AArch64II::MO_GOT;
489
490 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
491 // that their nominal addresses are tagged and outside of the code model. In
492 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
493 // tag if necessary based on MO_TAGGED.
494 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
495 return AArch64II::MO_NC | AArch64II::MO_TAGGED;
496
497 return AArch64II::MO_NO_FLAG;
498 }
499
classifyGlobalFunctionReference(const GlobalValue * GV,const TargetMachine & TM) const500 unsigned AArch64Subtarget::classifyGlobalFunctionReference(
501 const GlobalValue *GV, const TargetMachine &TM) const {
502 // MachO large model always goes via a GOT, because we don't have the
503 // relocations available to do anything else..
504 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
505 !GV->hasInternalLinkage())
506 return AArch64II::MO_GOT;
507
508 // NonLazyBind goes via GOT unless we know it's available locally.
509 auto *F = dyn_cast<Function>(GV);
510 if ((!isTargetMachO() || MachOUseNonLazyBind) && F &&
511 F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(GV))
512 return AArch64II::MO_GOT;
513
514 if (getTargetTriple().isOSWindows()) {
515 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
516 if (GV->hasDLLImportStorageClass()) {
517 // On Arm64EC, if we're calling a symbol from the import table
518 // directly, use MO_ARM64EC_CALLMANGLE.
519 return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
520 AArch64II::MO_ARM64EC_CALLMANGLE;
521 }
522 if (GV->hasExternalLinkage()) {
523 // If we're calling a symbol directly, use the mangled form in the
524 // call instruction.
525 return AArch64II::MO_ARM64EC_CALLMANGLE;
526 }
527 }
528
529 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
530 return ClassifyGlobalReference(GV, TM);
531 }
532
533 return AArch64II::MO_NO_FLAG;
534 }
535
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const536 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
537 unsigned NumRegionInstrs) const {
538 // LNT run (at least on Cyclone) showed reasonably significant gains for
539 // bi-directional scheduling. 253.perlbmk.
540 Policy.OnlyTopDown = false;
541 Policy.OnlyBottomUp = false;
542 // Enabling or Disabling the latency heuristic is a close call: It seems to
543 // help nearly no benchmark on out-of-order architectures, on the other hand
544 // it regresses register pressure on a few benchmarking.
545 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
546 }
547
adjustSchedDependency(SUnit * Def,int DefOpIdx,SUnit * Use,int UseOpIdx,SDep & Dep,const TargetSchedModel * SchedModel) const548 void AArch64Subtarget::adjustSchedDependency(
549 SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep,
550 const TargetSchedModel *SchedModel) const {
551 if (!SchedModel || Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
552 !Def->isInstr() || !Use->isInstr() ||
553 (Def->getInstr()->getOpcode() != TargetOpcode::BUNDLE &&
554 Use->getInstr()->getOpcode() != TargetOpcode::BUNDLE))
555 return;
556
557 // If the Def is a BUNDLE, find the last instruction in the bundle that defs
558 // the register.
559 const MachineInstr *DefMI = Def->getInstr();
560 if (DefMI->getOpcode() == TargetOpcode::BUNDLE) {
561 Register Reg = DefMI->getOperand(DefOpIdx).getReg();
562 for (const auto &Op : const_mi_bundle_ops(*DefMI)) {
563 if (Op.isReg() && Op.isDef() && Op.getReg() == Reg) {
564 DefMI = Op.getParent();
565 DefOpIdx = Op.getOperandNo();
566 }
567 }
568 }
569
570 // If the Use is a BUNDLE, find the first instruction that uses the Reg.
571 const MachineInstr *UseMI = Use->getInstr();
572 if (UseMI->getOpcode() == TargetOpcode::BUNDLE) {
573 Register Reg = UseMI->getOperand(UseOpIdx).getReg();
574 for (const auto &Op : const_mi_bundle_ops(*UseMI)) {
575 if (Op.isReg() && Op.isUse() && Op.getReg() == Reg) {
576 UseMI = Op.getParent();
577 UseOpIdx = Op.getOperandNo();
578 break;
579 }
580 }
581 }
582
583 Dep.setLatency(
584 SchedModel->computeOperandLatency(DefMI, DefOpIdx, UseMI, UseOpIdx));
585 }
586
enableEarlyIfConversion() const587 bool AArch64Subtarget::enableEarlyIfConversion() const {
588 return EnableEarlyIfConvert;
589 }
590
supportsAddressTopByteIgnored() const591 bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
592 if (!UseAddressTopByteIgnored)
593 return false;
594
595 if (TargetTriple.isDriverKit())
596 return true;
597 if (TargetTriple.isiOS()) {
598 return TargetTriple.getiOSVersion() >= VersionTuple(8);
599 }
600
601 return false;
602 }
603
604 std::unique_ptr<PBQPRAConstraint>
getCustomPBQPConstraints() const605 AArch64Subtarget::getCustomPBQPConstraints() const {
606 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
607 }
608
mirFileLoaded(MachineFunction & MF) const609 void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
610 // We usually compute max call frame size after ISel. Do the computation now
611 // if the .mir file didn't specify it. Note that this will probably give you
612 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
613 // instructions, specify explicitly if you need it to be correct.
614 MachineFrameInfo &MFI = MF.getFrameInfo();
615 if (!MFI.isMaxCallFrameSizeComputed())
616 MFI.computeMaxCallFrameSize(MF);
617 }
618
useAA() const619 bool AArch64Subtarget::useAA() const { return UseAA; }
620
useScalarIncVL() const621 bool AArch64Subtarget::useScalarIncVL() const {
622 // If SVE2 or SME is present (we are not SVE-1 only) and UseScalarIncVL
623 // is not otherwise set, enable it by default.
624 if (UseScalarIncVL.getNumOccurrences())
625 return UseScalarIncVL;
626 return hasSVE2() || hasSME();
627 }
628
629 // If return address signing is enabled, tail calls are emitted as follows:
630 //
631 // ```
632 // <authenticate LR>
633 // <check LR>
634 // TCRETURN ; the callee may sign and spill the LR in its prologue
635 // ```
636 //
637 // LR may require explicit checking because if FEAT_FPAC is not implemented
638 // and LR was tampered with, then `<authenticate LR>` will not generate an
639 // exception on its own. Later, if the callee spills the signed LR value and
640 // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
641 // the higher bits of LR thus hiding the authentication failure.
getAuthenticatedLRCheckMethod(const MachineFunction & MF) const642 AArch64PAuth::AuthCheckMethod AArch64Subtarget::getAuthenticatedLRCheckMethod(
643 const MachineFunction &MF) const {
644 // TODO: Check subtarget for the scheme. Present variant is a default for
645 // pauthtest ABI.
646 if (MF.getFunction().hasFnAttribute("ptrauth-returns") &&
647 MF.getFunction().hasFnAttribute("ptrauth-auth-traps"))
648 return AArch64PAuth::AuthCheckMethod::HighBitsNoTBI;
649 if (AuthenticatedLRCheckMethod.getNumOccurrences())
650 return AuthenticatedLRCheckMethod;
651
652 // At now, use None by default because checks may introduce an unexpected
653 // performance regression or incompatibility with execute-only mappings.
654 return AArch64PAuth::AuthCheckMethod::None;
655 }
656
657 std::optional<uint16_t>
getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function & ParentFn) const658 AArch64Subtarget::getPtrAuthBlockAddressDiscriminatorIfEnabled(
659 const Function &ParentFn) const {
660 if (!ParentFn.hasFnAttribute("ptrauth-indirect-gotos"))
661 return std::nullopt;
662 // We currently have one simple mechanism for all targets.
663 // This isn't ABI, so we can always do better in the future.
664 return getPointerAuthStableSipHash(
665 (Twine(ParentFn.getName()) + " blockaddress").str());
666 }
667
isX16X17Safer() const668 bool AArch64Subtarget::isX16X17Safer() const {
669 // The Darwin kernel implements special protections for x16 and x17 so we
670 // should prefer to use those registers on that platform.
671 return isTargetDarwin();
672 }
673
enableMachinePipeliner() const674 bool AArch64Subtarget::enableMachinePipeliner() const {
675 return getSchedModel().hasInstrSchedModel();
676 }
677