xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
2704eeddc0SDimitry Andric #include "llvm/Support/AArch64TargetParser.h"
280b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
350b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
360b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric static cl::opt<bool>
390b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
400b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
430b57cec5SDimitry Andric static cl::opt<bool>
440b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
450b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric static cl::opt<bool>
480b57cec5SDimitry Andric     UseNonLazyBind("aarch64-enable-nonlazybind",
490b57cec5SDimitry Andric                    cl::desc("Call nonlazybind functions via direct GOT load"),
500b57cec5SDimitry Andric                    cl::init(false), cl::Hidden);
510b57cec5SDimitry Andric 
52fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
53fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
545ffd83dbSDimitry Andric 
5581ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
5681ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
5781ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
5881ad6265SDimitry Andric 
59*bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register
60*bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments
61*bdd1243dSDimitry Andric // to function call.
62*bdd1243dSDimitry Andric static cl::list<std::string>
63*bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64*bdd1243dSDimitry Andric                   "registers, so they can't be used by register allocator. "
65*bdd1243dSDimitry Andric                   "Should only be used for testing register allocator."),
66*bdd1243dSDimitry Andric                   cl::CommaSeparated, cl::Hidden);
67*bdd1243dSDimitry Andric 
68*bdd1243dSDimitry Andric static cl::opt<bool>
69*bdd1243dSDimitry Andric     ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
70*bdd1243dSDimitry Andric                                 cl::init(false), cl::Hidden);
71*bdd1243dSDimitry Andric 
7281ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
7381ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
7481ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
7581ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
7681ad6265SDimitry Andric }
7781ad6265SDimitry Andric 
78349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
79349cc55cSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
800b57cec5SDimitry Andric   // Determine default and user-specified characteristics
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   if (CPUString.empty())
830b57cec5SDimitry Andric     CPUString = "generic";
840b57cec5SDimitry Andric 
85349cc55cSDimitry Andric   if (TuneCPUString.empty())
86349cc55cSDimitry Andric     TuneCPUString = CPUString;
87349cc55cSDimitry Andric 
88349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
890b57cec5SDimitry Andric   initializeProperties();
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   return *this;
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() {
950b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
960b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
970b57cec5SDimitry Andric   // features.
980b57cec5SDimitry Andric   switch (ARMProcFamily) {
990b57cec5SDimitry Andric   case Others:
1000b57cec5SDimitry Andric     break;
1015ffd83dbSDimitry Andric   case Carmel:
1025ffd83dbSDimitry Andric     CacheLineSize = 64;
1035ffd83dbSDimitry Andric     break;
1040b57cec5SDimitry Andric   case CortexA35:
1050b57cec5SDimitry Andric   case CortexA53:
1060b57cec5SDimitry Andric   case CortexA55:
107fe6060f1SDimitry Andric     PrefFunctionLogAlignment = 4;
10881ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
10981ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1100b57cec5SDimitry Andric     break;
1110b57cec5SDimitry Andric   case CortexA57:
1120b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1138bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
11481ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
11581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1168bcb0991SDimitry Andric     break;
1178bcb0991SDimitry Andric   case CortexA65:
1188bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
1190b57cec5SDimitry Andric     break;
1200b57cec5SDimitry Andric   case CortexA72:
1210b57cec5SDimitry Andric   case CortexA73:
1220b57cec5SDimitry Andric   case CortexA75:
12381ad6265SDimitry Andric     PrefFunctionLogAlignment = 4;
12481ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
12581ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
12681ad6265SDimitry Andric     break;
1270b57cec5SDimitry Andric   case CortexA76:
1285ffd83dbSDimitry Andric   case CortexA77:
1295ffd83dbSDimitry Andric   case CortexA78:
130e8d8bef9SDimitry Andric   case CortexA78C:
131e8d8bef9SDimitry Andric   case CortexR82:
1325ffd83dbSDimitry Andric   case CortexX1:
1331fd87a68SDimitry Andric   case CortexX1C:
1348bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
13581ad6265SDimitry Andric     PrefLoopLogAlignment = 5;
13681ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1370b57cec5SDimitry Andric     break;
138349cc55cSDimitry Andric   case CortexA510:
13981ad6265SDimitry Andric     PrefFunctionLogAlignment = 4;
14081ad6265SDimitry Andric     VScaleForTuning = 1;
14181ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
14281ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
14381ad6265SDimitry Andric     break;
144349cc55cSDimitry Andric   case CortexA710:
145*bdd1243dSDimitry Andric   case CortexA715:
146349cc55cSDimitry Andric   case CortexX2:
147*bdd1243dSDimitry Andric   case CortexX3:
148349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
149349cc55cSDimitry Andric     VScaleForTuning = 1;
15081ad6265SDimitry Andric     PrefLoopLogAlignment = 5;
15181ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
152349cc55cSDimitry Andric     break;
1535ffd83dbSDimitry Andric   case A64FX:
1545ffd83dbSDimitry Andric     CacheLineSize = 256;
155e8d8bef9SDimitry Andric     PrefFunctionLogAlignment = 3;
156e8d8bef9SDimitry Andric     PrefLoopLogAlignment = 2;
157e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
158e8d8bef9SDimitry Andric     PrefetchDistance = 128;
159e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
160e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
161349cc55cSDimitry Andric     VScaleForTuning = 4;
1625ffd83dbSDimitry Andric     break;
163480093f4SDimitry Andric   case AppleA7:
164480093f4SDimitry Andric   case AppleA10:
165480093f4SDimitry Andric   case AppleA11:
166480093f4SDimitry Andric   case AppleA12:
167480093f4SDimitry Andric   case AppleA13:
168e8d8bef9SDimitry Andric   case AppleA14:
169*bdd1243dSDimitry Andric   case AppleA15:
170*bdd1243dSDimitry Andric   case AppleA16:
1710b57cec5SDimitry Andric     CacheLineSize = 64;
1720b57cec5SDimitry Andric     PrefetchDistance = 280;
1730b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1740b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
175*bdd1243dSDimitry Andric     switch (ARMProcFamily) {
176*bdd1243dSDimitry Andric     case AppleA14:
177*bdd1243dSDimitry Andric     case AppleA15:
178*bdd1243dSDimitry Andric     case AppleA16:
179*bdd1243dSDimitry Andric       MaxInterleaveFactor = 4;
180*bdd1243dSDimitry Andric       break;
181*bdd1243dSDimitry Andric     default:
182*bdd1243dSDimitry Andric       break;
183*bdd1243dSDimitry Andric     }
1840b57cec5SDimitry Andric     break;
1850b57cec5SDimitry Andric   case ExynosM3:
1860b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1870b57cec5SDimitry Andric     MaxJumpTableSize = 20;
1888bcb0991SDimitry Andric     PrefFunctionLogAlignment = 5;
1898bcb0991SDimitry Andric     PrefLoopLogAlignment = 4;
1900b57cec5SDimitry Andric     break;
1910b57cec5SDimitry Andric   case Falkor:
1920b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1930b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
1940b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
1950b57cec5SDimitry Andric     CacheLineSize = 128;
1960b57cec5SDimitry Andric     PrefetchDistance = 820;
1970b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1980b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
1990b57cec5SDimitry Andric     break;
2000b57cec5SDimitry Andric   case Kryo:
2010b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2020b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
2030b57cec5SDimitry Andric     CacheLineSize = 128;
2040b57cec5SDimitry Andric     PrefetchDistance = 740;
2050b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2060b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
2070b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2080b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2090b57cec5SDimitry Andric     break;
2108bcb0991SDimitry Andric   case NeoverseE1:
2118bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
2128bcb0991SDimitry Andric     break;
2138bcb0991SDimitry Andric   case NeoverseN1:
214349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
21504eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
21604eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
217349cc55cSDimitry Andric     break;
218e8d8bef9SDimitry Andric   case NeoverseN2:
219*bdd1243dSDimitry Andric   case NeoverseV2:
220349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
22104eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
22204eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
223349cc55cSDimitry Andric     VScaleForTuning = 1;
224349cc55cSDimitry Andric     break;
225e8d8bef9SDimitry Andric   case NeoverseV1:
2268bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
22704eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
22804eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
229349cc55cSDimitry Andric     VScaleForTuning = 2;
230349cc55cSDimitry Andric     break;
231349cc55cSDimitry Andric   case Neoverse512TVB:
232349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
233349cc55cSDimitry Andric     VScaleForTuning = 1;
234349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2358bcb0991SDimitry Andric     break;
2360b57cec5SDimitry Andric   case Saphira:
2370b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2380b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2390b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2400b57cec5SDimitry Andric     break;
2410b57cec5SDimitry Andric   case ThunderX2T99:
2420b57cec5SDimitry Andric     CacheLineSize = 64;
2438bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
2448bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2450b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2460b57cec5SDimitry Andric     PrefetchDistance = 128;
2470b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2480b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2490b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2500b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2510b57cec5SDimitry Andric     break;
2520b57cec5SDimitry Andric   case ThunderX:
2530b57cec5SDimitry Andric   case ThunderXT88:
2540b57cec5SDimitry Andric   case ThunderXT81:
2550b57cec5SDimitry Andric   case ThunderXT83:
2560b57cec5SDimitry Andric     CacheLineSize = 128;
2578bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
2588bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2590b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2600b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2610b57cec5SDimitry Andric     break;
2620b57cec5SDimitry Andric   case TSV110:
2630b57cec5SDimitry Andric     CacheLineSize = 64;
2648bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
2658bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2660b57cec5SDimitry Andric     break;
267e837bb5cSDimitry Andric   case ThunderX3T110:
268e837bb5cSDimitry Andric     CacheLineSize = 64;
269e837bb5cSDimitry Andric     PrefFunctionLogAlignment = 4;
270e837bb5cSDimitry Andric     PrefLoopLogAlignment = 2;
271e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
272e837bb5cSDimitry Andric     PrefetchDistance = 128;
273e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
274e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
275e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
276e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
277e837bb5cSDimitry Andric     break;
2782a66634dSDimitry Andric   case Ampere1:
279*bdd1243dSDimitry Andric   case Ampere1A:
2802a66634dSDimitry Andric     CacheLineSize = 64;
2812a66634dSDimitry Andric     PrefFunctionLogAlignment = 6;
2822a66634dSDimitry Andric     PrefLoopLogAlignment = 6;
2832a66634dSDimitry Andric     MaxInterleaveFactor = 4;
2842a66634dSDimitry Andric     break;
2850b57cec5SDimitry Andric   }
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric 
288*bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
289*bdd1243dSDimitry Andric                                    StringRef TuneCPU, StringRef FS,
290fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
291fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
292*bdd1243dSDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride,
293*bdd1243dSDimitry Andric                                    bool StreamingSVEModeDisabled)
294349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
2950b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
296*bdd1243dSDimitry Andric       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
2970b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
2980b57cec5SDimitry Andric       IsLittle(LittleEndian),
299*bdd1243dSDimitry Andric       StreamingSVEModeDisabled(StreamingSVEModeDisabled),
300fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
301fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
30204eeddc0SDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
303349cc55cSDimitry Andric       TLInfo(TM, *this) {
3040b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
3050b57cec5SDimitry Andric     ReserveXRegister.set(18);
3060b57cec5SDimitry Andric 
3070b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
3085ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
3090b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
3120b57cec5SDimitry Andric 
3130b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
3140b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
3150b57cec5SDimitry Andric   // TII/TRI as well?
3160b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
3170b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
320*bdd1243dSDimitry Andric 
321*bdd1243dSDimitry Andric   auto TRI = getRegisterInfo();
322*bdd1243dSDimitry Andric   StringSet<> ReservedRegNames;
323*bdd1243dSDimitry Andric   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
324*bdd1243dSDimitry Andric   for (unsigned i = 0; i < 29; ++i) {
325*bdd1243dSDimitry Andric     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
326*bdd1243dSDimitry Andric       ReserveXRegisterForRA.set(i);
327*bdd1243dSDimitry Andric   }
328*bdd1243dSDimitry Andric   // X30 is named LR, so we can't use TRI->getName to check X30.
329*bdd1243dSDimitry Andric   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
330*bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(30);
331*bdd1243dSDimitry Andric   // X29 is named FP, so we can't use TRI->getName to check X29.
332*bdd1243dSDimitry Andric   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
333*bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(29);
3340b57cec5SDimitry Andric }
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
3370b57cec5SDimitry Andric   return CallLoweringInfo.get();
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric 
3405ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
3415ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
3425ffd83dbSDimitry Andric }
3435ffd83dbSDimitry Andric 
3448bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3450b57cec5SDimitry Andric   return InstSelector.get();
3460b57cec5SDimitry Andric }
3470b57cec5SDimitry Andric 
3480b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3490b57cec5SDimitry Andric   return Legalizer.get();
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3530b57cec5SDimitry Andric   return RegBankInfo.get();
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3570b57cec5SDimitry Andric /// referenced for the current subtarget.
3588bcb0991SDimitry Andric unsigned
3590b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3600b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3610b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3620b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3630b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3640b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3650b57cec5SDimitry Andric 
3660b57cec5SDimitry Andric   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
367*bdd1243dSDimitry Andric     if (GV->hasDLLImportStorageClass()) {
368*bdd1243dSDimitry Andric       if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
369*bdd1243dSDimitry Andric         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
3700b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
371*bdd1243dSDimitry Andric     }
3720b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
3730b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
3740b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
3780b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
3790b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
3800b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
3810b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
3820b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3830b57cec5SDimitry Andric 
3848bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
3858bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
3868bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
3878bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
3888bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
3898bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
3908bcb0991SDimitry Andric 
3910b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3948bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
3950b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
3960b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
3970b57cec5SDimitry Andric   // relocations available to do anything else..
3980b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
3990b57cec5SDimitry Andric       !GV->hasInternalLinkage())
4000b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
4030b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
4040b57cec5SDimitry Andric   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
4050b57cec5SDimitry Andric       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4060b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4070b57cec5SDimitry Andric 
408*bdd1243dSDimitry Andric   if (getTargetTriple().isOSWindows()) {
409*bdd1243dSDimitry Andric     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
410*bdd1243dSDimitry Andric         GV->hasDLLImportStorageClass()) {
411*bdd1243dSDimitry Andric       // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
412*bdd1243dSDimitry Andric       // not MO_DLLIMPORTAUX.
413*bdd1243dSDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
414*bdd1243dSDimitry Andric     }
415*bdd1243dSDimitry Andric 
416480093f4SDimitry Andric     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
417480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
418*bdd1243dSDimitry Andric   }
419480093f4SDimitry Andric 
4200b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4210b57cec5SDimitry Andric }
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
4240b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
4250b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
4260b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
4270b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
4280b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
4290b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
4300b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
4310b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
4320b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
4330b57cec5SDimitry Andric }
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
4360b57cec5SDimitry Andric   return EnableEarlyIfConvert;
4370b57cec5SDimitry Andric }
4380b57cec5SDimitry Andric 
4390b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
4400b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
4410b57cec5SDimitry Andric     return false;
4420b57cec5SDimitry Andric 
44381ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
44481ad6265SDimitry Andric     return true;
4450b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
4460eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
4470b57cec5SDimitry Andric   }
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric   return false;
4500b57cec5SDimitry Andric }
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
4530b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
4548bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
4550b57cec5SDimitry Andric }
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
4580b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
4590b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
4600b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
4610b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
4620b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
4630b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
4640b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
4650b57cec5SDimitry Andric }
4665ffd83dbSDimitry Andric 
467fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
468*bdd1243dSDimitry Andric 
469*bdd1243dSDimitry Andric bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
470*bdd1243dSDimitry Andric   if (ForceStreamingCompatibleSVE) {
471*bdd1243dSDimitry Andric     assert(hasSVEorSME() && "Expected SVE to be available");
472*bdd1243dSDimitry Andric     return hasSVEorSME();
473*bdd1243dSDimitry Andric   }
474*bdd1243dSDimitry Andric   return false;
475*bdd1243dSDimitry Andric }
476