xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
27*06c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h"
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
350b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric static cl::opt<bool>
380b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
390b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
420b57cec5SDimitry Andric static cl::opt<bool>
430b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
440b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static cl::opt<bool>
470b57cec5SDimitry Andric     UseNonLazyBind("aarch64-enable-nonlazybind",
480b57cec5SDimitry Andric                    cl::desc("Call nonlazybind functions via direct GOT load"),
490b57cec5SDimitry Andric                    cl::init(false), cl::Hidden);
500b57cec5SDimitry Andric 
51fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
52fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
535ffd83dbSDimitry Andric 
5481ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
5581ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
5681ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
5781ad6265SDimitry Andric 
58bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register
59bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments
60bdd1243dSDimitry Andric // to function call.
61bdd1243dSDimitry Andric static cl::list<std::string>
62bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
63bdd1243dSDimitry Andric                   "registers, so they can't be used by register allocator. "
64bdd1243dSDimitry Andric                   "Should only be used for testing register allocator."),
65bdd1243dSDimitry Andric                   cl::CommaSeparated, cl::Hidden);
66bdd1243dSDimitry Andric 
67*06c3fb27SDimitry Andric static cl::opt<bool> ForceStreamingCompatibleSVE(
68*06c3fb27SDimitry Andric     "force-streaming-compatible-sve",
69*06c3fb27SDimitry Andric     cl::desc(
70*06c3fb27SDimitry Andric         "Force the use of streaming-compatible SVE code for all functions"),
71*06c3fb27SDimitry Andric     cl::Hidden);
72bdd1243dSDimitry Andric 
7381ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
7481ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
7581ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
7681ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
7781ad6265SDimitry Andric }
7881ad6265SDimitry Andric 
79349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
80349cc55cSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
810b57cec5SDimitry Andric   // Determine default and user-specified characteristics
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   if (CPUString.empty())
840b57cec5SDimitry Andric     CPUString = "generic";
850b57cec5SDimitry Andric 
86349cc55cSDimitry Andric   if (TuneCPUString.empty())
87349cc55cSDimitry Andric     TuneCPUString = CPUString;
88349cc55cSDimitry Andric 
89349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
900b57cec5SDimitry Andric   initializeProperties();
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   return *this;
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() {
960b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
970b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
980b57cec5SDimitry Andric   // features.
990b57cec5SDimitry Andric   switch (ARMProcFamily) {
1000b57cec5SDimitry Andric   case Others:
1010b57cec5SDimitry Andric     break;
1025ffd83dbSDimitry Andric   case Carmel:
1035ffd83dbSDimitry Andric     CacheLineSize = 64;
1045ffd83dbSDimitry Andric     break;
1050b57cec5SDimitry Andric   case CortexA35:
1060b57cec5SDimitry Andric   case CortexA53:
1070b57cec5SDimitry Andric   case CortexA55:
108*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
109*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
11081ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1110b57cec5SDimitry Andric     break;
1120b57cec5SDimitry Andric   case CortexA57:
1130b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
114*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
115*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
11681ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1178bcb0991SDimitry Andric     break;
1188bcb0991SDimitry Andric   case CortexA65:
119*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
1200b57cec5SDimitry Andric     break;
1210b57cec5SDimitry Andric   case CortexA72:
1220b57cec5SDimitry Andric   case CortexA73:
1230b57cec5SDimitry Andric   case CortexA75:
124*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
125*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12681ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
12781ad6265SDimitry Andric     break;
1280b57cec5SDimitry Andric   case CortexA76:
1295ffd83dbSDimitry Andric   case CortexA77:
1305ffd83dbSDimitry Andric   case CortexA78:
131e8d8bef9SDimitry Andric   case CortexA78C:
132e8d8bef9SDimitry Andric   case CortexR82:
1335ffd83dbSDimitry Andric   case CortexX1:
1341fd87a68SDimitry Andric   case CortexX1C:
135*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
136*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
13781ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1380b57cec5SDimitry Andric     break;
139349cc55cSDimitry Andric   case CortexA510:
140*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
14181ad6265SDimitry Andric     VScaleForTuning = 1;
142*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
14381ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
14481ad6265SDimitry Andric     break;
145349cc55cSDimitry Andric   case CortexA710:
146bdd1243dSDimitry Andric   case CortexA715:
147349cc55cSDimitry Andric   case CortexX2:
148bdd1243dSDimitry Andric   case CortexX3:
149*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
150349cc55cSDimitry Andric     VScaleForTuning = 1;
151*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
15281ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
153349cc55cSDimitry Andric     break;
1545ffd83dbSDimitry Andric   case A64FX:
1555ffd83dbSDimitry Andric     CacheLineSize = 256;
156*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
157*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
158e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
159e8d8bef9SDimitry Andric     PrefetchDistance = 128;
160e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
161e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
162349cc55cSDimitry Andric     VScaleForTuning = 4;
1635ffd83dbSDimitry Andric     break;
164480093f4SDimitry Andric   case AppleA7:
165480093f4SDimitry Andric   case AppleA10:
166480093f4SDimitry Andric   case AppleA11:
167480093f4SDimitry Andric   case AppleA12:
168480093f4SDimitry Andric   case AppleA13:
169e8d8bef9SDimitry Andric   case AppleA14:
170bdd1243dSDimitry Andric   case AppleA15:
171bdd1243dSDimitry Andric   case AppleA16:
1720b57cec5SDimitry Andric     CacheLineSize = 64;
1730b57cec5SDimitry Andric     PrefetchDistance = 280;
1740b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1750b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
176bdd1243dSDimitry Andric     switch (ARMProcFamily) {
177bdd1243dSDimitry Andric     case AppleA14:
178bdd1243dSDimitry Andric     case AppleA15:
179bdd1243dSDimitry Andric     case AppleA16:
180bdd1243dSDimitry Andric       MaxInterleaveFactor = 4;
181bdd1243dSDimitry Andric       break;
182bdd1243dSDimitry Andric     default:
183bdd1243dSDimitry Andric       break;
184bdd1243dSDimitry Andric     }
1850b57cec5SDimitry Andric     break;
1860b57cec5SDimitry Andric   case ExynosM3:
1870b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1880b57cec5SDimitry Andric     MaxJumpTableSize = 20;
189*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(32);
190*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
1910b57cec5SDimitry Andric     break;
1920b57cec5SDimitry Andric   case Falkor:
1930b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1940b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
1950b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
1960b57cec5SDimitry Andric     CacheLineSize = 128;
1970b57cec5SDimitry Andric     PrefetchDistance = 820;
1980b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1990b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
2000b57cec5SDimitry Andric     break;
2010b57cec5SDimitry Andric   case Kryo:
2020b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2030b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
2040b57cec5SDimitry Andric     CacheLineSize = 128;
2050b57cec5SDimitry Andric     PrefetchDistance = 740;
2060b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2070b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
2080b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2090b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2100b57cec5SDimitry Andric     break;
2118bcb0991SDimitry Andric   case NeoverseE1:
212*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
2138bcb0991SDimitry Andric     break;
2148bcb0991SDimitry Andric   case NeoverseN1:
215*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
216*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
21704eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
218349cc55cSDimitry Andric     break;
219e8d8bef9SDimitry Andric   case NeoverseN2:
220bdd1243dSDimitry Andric   case NeoverseV2:
221*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
222*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
22304eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
224349cc55cSDimitry Andric     VScaleForTuning = 1;
225349cc55cSDimitry Andric     break;
226e8d8bef9SDimitry Andric   case NeoverseV1:
227*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
228*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
22904eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
230349cc55cSDimitry Andric     VScaleForTuning = 2;
231*06c3fb27SDimitry Andric     DefaultSVETFOpts = TailFoldingOpts::Simple;
232349cc55cSDimitry Andric     break;
233349cc55cSDimitry Andric   case Neoverse512TVB:
234*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
235349cc55cSDimitry Andric     VScaleForTuning = 1;
236349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2378bcb0991SDimitry Andric     break;
2380b57cec5SDimitry Andric   case Saphira:
2390b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2400b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2410b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2420b57cec5SDimitry Andric     break;
2430b57cec5SDimitry Andric   case ThunderX2T99:
2440b57cec5SDimitry Andric     CacheLineSize = 64;
245*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
246*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2470b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2480b57cec5SDimitry Andric     PrefetchDistance = 128;
2490b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2500b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2510b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2520b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2530b57cec5SDimitry Andric     break;
2540b57cec5SDimitry Andric   case ThunderX:
2550b57cec5SDimitry Andric   case ThunderXT88:
2560b57cec5SDimitry Andric   case ThunderXT81:
2570b57cec5SDimitry Andric   case ThunderXT83:
2580b57cec5SDimitry Andric     CacheLineSize = 128;
259*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
260*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2610b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2620b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2630b57cec5SDimitry Andric     break;
2640b57cec5SDimitry Andric   case TSV110:
2650b57cec5SDimitry Andric     CacheLineSize = 64;
266*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
267*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2680b57cec5SDimitry Andric     break;
269e837bb5cSDimitry Andric   case ThunderX3T110:
270e837bb5cSDimitry Andric     CacheLineSize = 64;
271*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
272*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
273e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
274e837bb5cSDimitry Andric     PrefetchDistance = 128;
275e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
276e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
277e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
278e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
279e837bb5cSDimitry Andric     break;
2802a66634dSDimitry Andric   case Ampere1:
281bdd1243dSDimitry Andric   case Ampere1A:
2822a66634dSDimitry Andric     CacheLineSize = 64;
283*06c3fb27SDimitry Andric     PrefFunctionAlignment = Align(64);
284*06c3fb27SDimitry Andric     PrefLoopAlignment = Align(64);
2852a66634dSDimitry Andric     MaxInterleaveFactor = 4;
2862a66634dSDimitry Andric     break;
2870b57cec5SDimitry Andric   }
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric 
290bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
291bdd1243dSDimitry Andric                                    StringRef TuneCPU, StringRef FS,
292fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
293fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
294bdd1243dSDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride,
295*06c3fb27SDimitry Andric                                    bool StreamingSVEMode,
296*06c3fb27SDimitry Andric                                    bool StreamingCompatibleSVEMode)
297349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
2980b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
299bdd1243dSDimitry Andric       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
3000b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
3010b57cec5SDimitry Andric       IsLittle(LittleEndian),
302*06c3fb27SDimitry Andric       StreamingSVEMode(StreamingSVEMode),
303*06c3fb27SDimitry Andric       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
304fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
305fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
30604eeddc0SDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
307349cc55cSDimitry Andric       TLInfo(TM, *this) {
3080b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
3090b57cec5SDimitry Andric     ReserveXRegister.set(18);
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
3125ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
3130b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
3160b57cec5SDimitry Andric 
3170b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
3180b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
3190b57cec5SDimitry Andric   // TII/TRI as well?
3200b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
3210b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
3220b57cec5SDimitry Andric 
3230b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
324bdd1243dSDimitry Andric 
325bdd1243dSDimitry Andric   auto TRI = getRegisterInfo();
326bdd1243dSDimitry Andric   StringSet<> ReservedRegNames;
327bdd1243dSDimitry Andric   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
328bdd1243dSDimitry Andric   for (unsigned i = 0; i < 29; ++i) {
329bdd1243dSDimitry Andric     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
330bdd1243dSDimitry Andric       ReserveXRegisterForRA.set(i);
331bdd1243dSDimitry Andric   }
332bdd1243dSDimitry Andric   // X30 is named LR, so we can't use TRI->getName to check X30.
333bdd1243dSDimitry Andric   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
334bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(30);
335bdd1243dSDimitry Andric   // X29 is named FP, so we can't use TRI->getName to check X29.
336bdd1243dSDimitry Andric   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
337bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(29);
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
3410b57cec5SDimitry Andric   return CallLoweringInfo.get();
3420b57cec5SDimitry Andric }
3430b57cec5SDimitry Andric 
3445ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
3455ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
3465ffd83dbSDimitry Andric }
3475ffd83dbSDimitry Andric 
3488bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3490b57cec5SDimitry Andric   return InstSelector.get();
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3530b57cec5SDimitry Andric   return Legalizer.get();
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3570b57cec5SDimitry Andric   return RegBankInfo.get();
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3610b57cec5SDimitry Andric /// referenced for the current subtarget.
3628bcb0991SDimitry Andric unsigned
3630b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3640b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3650b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3660b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3670b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3680b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3690b57cec5SDimitry Andric 
370*06c3fb27SDimitry Andric   // All globals dynamically protected by MTE must have their address tags
371*06c3fb27SDimitry Andric   // synthesized. This is done by having the loader stash the tag in the GOT
372*06c3fb27SDimitry Andric   // entry. Force all tagged globals (even ones with internal linkage) through
373*06c3fb27SDimitry Andric   // the GOT.
374*06c3fb27SDimitry Andric   if (GV->isTagged())
375*06c3fb27SDimitry Andric     return AArch64II::MO_GOT;
376*06c3fb27SDimitry Andric 
3770b57cec5SDimitry Andric   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
378bdd1243dSDimitry Andric     if (GV->hasDLLImportStorageClass()) {
379bdd1243dSDimitry Andric       if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
380bdd1243dSDimitry Andric         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
3810b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
382bdd1243dSDimitry Andric     }
3830b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
3840b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
3850b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3860b57cec5SDimitry Andric   }
3870b57cec5SDimitry Andric 
3880b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
3890b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
3900b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
3910b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
3920b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
3930b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3940b57cec5SDimitry Andric 
3958bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
3968bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
3978bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
3988bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
3998bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
4008bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
4018bcb0991SDimitry Andric 
4020b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric 
4058bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
4060b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
4070b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
4080b57cec5SDimitry Andric   // relocations available to do anything else..
4090b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
4100b57cec5SDimitry Andric       !GV->hasInternalLinkage())
4110b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
4140b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
4150b57cec5SDimitry Andric   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
4160b57cec5SDimitry Andric       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4170b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4180b57cec5SDimitry Andric 
419bdd1243dSDimitry Andric   if (getTargetTriple().isOSWindows()) {
420bdd1243dSDimitry Andric     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
421bdd1243dSDimitry Andric         GV->hasDLLImportStorageClass()) {
422bdd1243dSDimitry Andric       // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
423bdd1243dSDimitry Andric       // not MO_DLLIMPORTAUX.
424bdd1243dSDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
425bdd1243dSDimitry Andric     }
426bdd1243dSDimitry Andric 
427480093f4SDimitry Andric     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
428480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
429bdd1243dSDimitry Andric   }
430480093f4SDimitry Andric 
4310b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4320b57cec5SDimitry Andric }
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
4350b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
4360b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
4370b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
4380b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
4390b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
4400b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
4410b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
4420b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
4430b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
4440b57cec5SDimitry Andric }
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
4470b57cec5SDimitry Andric   return EnableEarlyIfConvert;
4480b57cec5SDimitry Andric }
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
4510b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
4520b57cec5SDimitry Andric     return false;
4530b57cec5SDimitry Andric 
45481ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
45581ad6265SDimitry Andric     return true;
4560b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
4570eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
4580b57cec5SDimitry Andric   }
4590b57cec5SDimitry Andric 
4600b57cec5SDimitry Andric   return false;
4610b57cec5SDimitry Andric }
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
4640b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
4658bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
4690b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
4700b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
4710b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
4720b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
4730b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
4740b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
4750b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
4760b57cec5SDimitry Andric }
4775ffd83dbSDimitry Andric 
478fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
479bdd1243dSDimitry Andric 
480*06c3fb27SDimitry Andric bool AArch64Subtarget::isNeonAvailable() const {
481*06c3fb27SDimitry Andric   if (!hasNEON())
482bdd1243dSDimitry Andric     return false;
483*06c3fb27SDimitry Andric 
484*06c3fb27SDimitry Andric   // The 'force-streaming-comaptible-sve' flag overrides the streaming
485*06c3fb27SDimitry Andric   // function attributes.
486*06c3fb27SDimitry Andric   if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
487*06c3fb27SDimitry Andric     return !ForceStreamingCompatibleSVE;
488*06c3fb27SDimitry Andric 
489*06c3fb27SDimitry Andric   return !isStreaming() && !isStreamingCompatible();
490bdd1243dSDimitry Andric }
491