xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
2706c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h"
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
350b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric static cl::opt<bool>
380b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
390b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
420b57cec5SDimitry Andric static cl::opt<bool>
430b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
440b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static cl::opt<bool>
470b57cec5SDimitry Andric     UseNonLazyBind("aarch64-enable-nonlazybind",
480b57cec5SDimitry Andric                    cl::desc("Call nonlazybind functions via direct GOT load"),
490b57cec5SDimitry Andric                    cl::init(false), cl::Hidden);
500b57cec5SDimitry Andric 
51fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
52fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
535ffd83dbSDimitry Andric 
5481ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
5581ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
5681ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
5781ad6265SDimitry Andric 
58bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register
59bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments
60bdd1243dSDimitry Andric // to function call.
61bdd1243dSDimitry Andric static cl::list<std::string>
62bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
63bdd1243dSDimitry Andric                   "registers, so they can't be used by register allocator. "
64bdd1243dSDimitry Andric                   "Should only be used for testing register allocator."),
65bdd1243dSDimitry Andric                   cl::CommaSeparated, cl::Hidden);
66bdd1243dSDimitry Andric 
6706c3fb27SDimitry Andric static cl::opt<bool> ForceStreamingCompatibleSVE(
6806c3fb27SDimitry Andric     "force-streaming-compatible-sve",
6906c3fb27SDimitry Andric     cl::desc(
7006c3fb27SDimitry Andric         "Force the use of streaming-compatible SVE code for all functions"),
7106c3fb27SDimitry Andric     cl::Hidden);
72bdd1243dSDimitry Andric 
73*5f757f3fSDimitry Andric static cl::opt<AArch64PAuth::AuthCheckMethod>
74*5f757f3fSDimitry Andric     AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
75*5f757f3fSDimitry Andric                                cl::Hidden,
76*5f757f3fSDimitry Andric                                cl::desc("Override the variant of check applied "
77*5f757f3fSDimitry Andric                                         "to authenticated LR during tail call"),
78*5f757f3fSDimitry Andric                                cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
79*5f757f3fSDimitry Andric 
80*5f757f3fSDimitry Andric static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
81*5f757f3fSDimitry Andric     "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
82*5f757f3fSDimitry Andric     cl::desc("Set minimum number of entries to use a jump table on AArch64"));
83*5f757f3fSDimitry Andric 
8481ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
8581ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
8681ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
8781ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
8881ad6265SDimitry Andric }
8981ad6265SDimitry Andric 
90349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
91*5f757f3fSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString,
92*5f757f3fSDimitry Andric     bool HasMinSize) {
930b57cec5SDimitry Andric   // Determine default and user-specified characteristics
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   if (CPUString.empty())
960b57cec5SDimitry Andric     CPUString = "generic";
970b57cec5SDimitry Andric 
98349cc55cSDimitry Andric   if (TuneCPUString.empty())
99349cc55cSDimitry Andric     TuneCPUString = CPUString;
100349cc55cSDimitry Andric 
101349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
102*5f757f3fSDimitry Andric   initializeProperties(HasMinSize);
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   return *this;
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric 
107*5f757f3fSDimitry Andric void AArch64Subtarget::initializeProperties(bool HasMinSize) {
1080b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
1090b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
1100b57cec5SDimitry Andric   // features.
1110b57cec5SDimitry Andric   switch (ARMProcFamily) {
1120b57cec5SDimitry Andric   case Others:
1130b57cec5SDimitry Andric     break;
1145ffd83dbSDimitry Andric   case Carmel:
1155ffd83dbSDimitry Andric     CacheLineSize = 64;
1165ffd83dbSDimitry Andric     break;
1170b57cec5SDimitry Andric   case CortexA35:
1180b57cec5SDimitry Andric   case CortexA53:
1190b57cec5SDimitry Andric   case CortexA55:
12006c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
12106c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12281ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1230b57cec5SDimitry Andric     break;
1240b57cec5SDimitry Andric   case CortexA57:
1250b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
12606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
12706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12881ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1298bcb0991SDimitry Andric     break;
1308bcb0991SDimitry Andric   case CortexA65:
13106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
1320b57cec5SDimitry Andric     break;
1330b57cec5SDimitry Andric   case CortexA72:
1340b57cec5SDimitry Andric   case CortexA73:
1350b57cec5SDimitry Andric   case CortexA75:
13606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
13706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
13881ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
13981ad6265SDimitry Andric     break;
1400b57cec5SDimitry Andric   case CortexA76:
1415ffd83dbSDimitry Andric   case CortexA77:
1425ffd83dbSDimitry Andric   case CortexA78:
143e8d8bef9SDimitry Andric   case CortexA78C:
144e8d8bef9SDimitry Andric   case CortexR82:
1455ffd83dbSDimitry Andric   case CortexX1:
1461fd87a68SDimitry Andric   case CortexX1C:
14706c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
14806c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
14981ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1500b57cec5SDimitry Andric     break;
151349cc55cSDimitry Andric   case CortexA510:
152*5f757f3fSDimitry Andric   case CortexA520:
15306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
15481ad6265SDimitry Andric     VScaleForTuning = 1;
15506c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
15681ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
15781ad6265SDimitry Andric     break;
158349cc55cSDimitry Andric   case CortexA710:
159bdd1243dSDimitry Andric   case CortexA715:
160*5f757f3fSDimitry Andric   case CortexA720:
161349cc55cSDimitry Andric   case CortexX2:
162bdd1243dSDimitry Andric   case CortexX3:
163*5f757f3fSDimitry Andric   case CortexX4:
16406c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
165349cc55cSDimitry Andric     VScaleForTuning = 1;
16606c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
16781ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
168349cc55cSDimitry Andric     break;
1695ffd83dbSDimitry Andric   case A64FX:
1705ffd83dbSDimitry Andric     CacheLineSize = 256;
17106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
17206c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
173e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
174e8d8bef9SDimitry Andric     PrefetchDistance = 128;
175e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
176e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
177349cc55cSDimitry Andric     VScaleForTuning = 4;
1785ffd83dbSDimitry Andric     break;
179480093f4SDimitry Andric   case AppleA7:
180480093f4SDimitry Andric   case AppleA10:
181480093f4SDimitry Andric   case AppleA11:
182480093f4SDimitry Andric   case AppleA12:
183480093f4SDimitry Andric   case AppleA13:
184e8d8bef9SDimitry Andric   case AppleA14:
185bdd1243dSDimitry Andric   case AppleA15:
186bdd1243dSDimitry Andric   case AppleA16:
187*5f757f3fSDimitry Andric   case AppleA17:
1880b57cec5SDimitry Andric     CacheLineSize = 64;
1890b57cec5SDimitry Andric     PrefetchDistance = 280;
1900b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1910b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
192bdd1243dSDimitry Andric     switch (ARMProcFamily) {
193bdd1243dSDimitry Andric     case AppleA14:
194bdd1243dSDimitry Andric     case AppleA15:
195bdd1243dSDimitry Andric     case AppleA16:
196*5f757f3fSDimitry Andric     case AppleA17:
197bdd1243dSDimitry Andric       MaxInterleaveFactor = 4;
198bdd1243dSDimitry Andric       break;
199bdd1243dSDimitry Andric     default:
200bdd1243dSDimitry Andric       break;
201bdd1243dSDimitry Andric     }
2020b57cec5SDimitry Andric     break;
2030b57cec5SDimitry Andric   case ExynosM3:
2040b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2050b57cec5SDimitry Andric     MaxJumpTableSize = 20;
20606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(32);
20706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
2080b57cec5SDimitry Andric     break;
2090b57cec5SDimitry Andric   case Falkor:
2100b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2110b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2120b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2130b57cec5SDimitry Andric     CacheLineSize = 128;
2140b57cec5SDimitry Andric     PrefetchDistance = 820;
2150b57cec5SDimitry Andric     MinPrefetchStride = 2048;
2160b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
2170b57cec5SDimitry Andric     break;
2180b57cec5SDimitry Andric   case Kryo:
2190b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2200b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
2210b57cec5SDimitry Andric     CacheLineSize = 128;
2220b57cec5SDimitry Andric     PrefetchDistance = 740;
2230b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2240b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
2250b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2260b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2270b57cec5SDimitry Andric     break;
2288bcb0991SDimitry Andric   case NeoverseE1:
22906c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
2308bcb0991SDimitry Andric     break;
2318bcb0991SDimitry Andric   case NeoverseN1:
23206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
23306c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
23404eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
235349cc55cSDimitry Andric     break;
236e8d8bef9SDimitry Andric   case NeoverseN2:
237bdd1243dSDimitry Andric   case NeoverseV2:
23806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
23906c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24004eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
241349cc55cSDimitry Andric     VScaleForTuning = 1;
242349cc55cSDimitry Andric     break;
243e8d8bef9SDimitry Andric   case NeoverseV1:
24406c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
24506c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24604eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
247349cc55cSDimitry Andric     VScaleForTuning = 2;
24806c3fb27SDimitry Andric     DefaultSVETFOpts = TailFoldingOpts::Simple;
249349cc55cSDimitry Andric     break;
250349cc55cSDimitry Andric   case Neoverse512TVB:
25106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
252349cc55cSDimitry Andric     VScaleForTuning = 1;
253349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2548bcb0991SDimitry Andric     break;
2550b57cec5SDimitry Andric   case Saphira:
2560b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2570b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2580b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2590b57cec5SDimitry Andric     break;
2600b57cec5SDimitry Andric   case ThunderX2T99:
2610b57cec5SDimitry Andric     CacheLineSize = 64;
26206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
26306c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2640b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2650b57cec5SDimitry Andric     PrefetchDistance = 128;
2660b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2670b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2680b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2690b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2700b57cec5SDimitry Andric     break;
2710b57cec5SDimitry Andric   case ThunderX:
2720b57cec5SDimitry Andric   case ThunderXT88:
2730b57cec5SDimitry Andric   case ThunderXT81:
2740b57cec5SDimitry Andric   case ThunderXT83:
2750b57cec5SDimitry Andric     CacheLineSize = 128;
27606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
27706c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2780b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2790b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2800b57cec5SDimitry Andric     break;
2810b57cec5SDimitry Andric   case TSV110:
2820b57cec5SDimitry Andric     CacheLineSize = 64;
28306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
28406c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2850b57cec5SDimitry Andric     break;
286e837bb5cSDimitry Andric   case ThunderX3T110:
287e837bb5cSDimitry Andric     CacheLineSize = 64;
28806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
28906c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
290e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
291e837bb5cSDimitry Andric     PrefetchDistance = 128;
292e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
293e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
294e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
295e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
296e837bb5cSDimitry Andric     break;
2972a66634dSDimitry Andric   case Ampere1:
298bdd1243dSDimitry Andric   case Ampere1A:
2992a66634dSDimitry Andric     CacheLineSize = 64;
30006c3fb27SDimitry Andric     PrefFunctionAlignment = Align(64);
30106c3fb27SDimitry Andric     PrefLoopAlignment = Align(64);
3022a66634dSDimitry Andric     MaxInterleaveFactor = 4;
3032a66634dSDimitry Andric     break;
3040b57cec5SDimitry Andric   }
305*5f757f3fSDimitry Andric 
306*5f757f3fSDimitry Andric   if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
307*5f757f3fSDimitry Andric     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
3080b57cec5SDimitry Andric }
3090b57cec5SDimitry Andric 
310bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
311bdd1243dSDimitry Andric                                    StringRef TuneCPU, StringRef FS,
312fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
313fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
314bdd1243dSDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride,
31506c3fb27SDimitry Andric                                    bool StreamingSVEMode,
316*5f757f3fSDimitry Andric                                    bool StreamingCompatibleSVEMode,
317*5f757f3fSDimitry Andric                                    bool HasMinSize)
318349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
3190b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
320bdd1243dSDimitry Andric       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
3210b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
322*5f757f3fSDimitry Andric       IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
32306c3fb27SDimitry Andric       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
324fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
325fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
326*5f757f3fSDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
327349cc55cSDimitry Andric       TLInfo(TM, *this) {
3280b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
3290b57cec5SDimitry Andric     ReserveXRegister.set(18);
3300b57cec5SDimitry Andric 
3310b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
3325ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
3330b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
3380b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
3390b57cec5SDimitry Andric   // TII/TRI as well?
3400b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
3410b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
344bdd1243dSDimitry Andric 
345bdd1243dSDimitry Andric   auto TRI = getRegisterInfo();
346bdd1243dSDimitry Andric   StringSet<> ReservedRegNames;
347bdd1243dSDimitry Andric   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
348bdd1243dSDimitry Andric   for (unsigned i = 0; i < 29; ++i) {
349bdd1243dSDimitry Andric     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
350bdd1243dSDimitry Andric       ReserveXRegisterForRA.set(i);
351bdd1243dSDimitry Andric   }
352bdd1243dSDimitry Andric   // X30 is named LR, so we can't use TRI->getName to check X30.
353bdd1243dSDimitry Andric   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
354bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(30);
355bdd1243dSDimitry Andric   // X29 is named FP, so we can't use TRI->getName to check X29.
356bdd1243dSDimitry Andric   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
357bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(29);
358*5f757f3fSDimitry Andric 
359*5f757f3fSDimitry Andric   AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM));
3600b57cec5SDimitry Andric }
3610b57cec5SDimitry Andric 
3620b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
3630b57cec5SDimitry Andric   return CallLoweringInfo.get();
3640b57cec5SDimitry Andric }
3650b57cec5SDimitry Andric 
3665ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
3675ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
3685ffd83dbSDimitry Andric }
3695ffd83dbSDimitry Andric 
3708bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3710b57cec5SDimitry Andric   return InstSelector.get();
3720b57cec5SDimitry Andric }
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3750b57cec5SDimitry Andric   return Legalizer.get();
3760b57cec5SDimitry Andric }
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3790b57cec5SDimitry Andric   return RegBankInfo.get();
3800b57cec5SDimitry Andric }
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3830b57cec5SDimitry Andric /// referenced for the current subtarget.
3848bcb0991SDimitry Andric unsigned
3850b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3860b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3870b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3880b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3890b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3900b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3910b57cec5SDimitry Andric 
39206c3fb27SDimitry Andric   // All globals dynamically protected by MTE must have their address tags
39306c3fb27SDimitry Andric   // synthesized. This is done by having the loader stash the tag in the GOT
39406c3fb27SDimitry Andric   // entry. Force all tagged globals (even ones with internal linkage) through
39506c3fb27SDimitry Andric   // the GOT.
39606c3fb27SDimitry Andric   if (GV->isTagged())
39706c3fb27SDimitry Andric     return AArch64II::MO_GOT;
39806c3fb27SDimitry Andric 
3990b57cec5SDimitry Andric   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
400bdd1243dSDimitry Andric     if (GV->hasDLLImportStorageClass()) {
401bdd1243dSDimitry Andric       if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
402bdd1243dSDimitry Andric         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
4030b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
404bdd1243dSDimitry Andric     }
4050b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
4060b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
4070b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4080b57cec5SDimitry Andric   }
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
4110b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
4120b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
4130b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
4140b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
4150b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4160b57cec5SDimitry Andric 
4178bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
4188bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
4198bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
4208bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
4218bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
4228bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
4238bcb0991SDimitry Andric 
4240b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric 
4278bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
4280b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
4290b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
4300b57cec5SDimitry Andric   // relocations available to do anything else..
4310b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
4320b57cec5SDimitry Andric       !GV->hasInternalLinkage())
4330b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
4360b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
4370b57cec5SDimitry Andric   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
4380b57cec5SDimitry Andric       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4390b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4400b57cec5SDimitry Andric 
441bdd1243dSDimitry Andric   if (getTargetTriple().isOSWindows()) {
442bdd1243dSDimitry Andric     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
443bdd1243dSDimitry Andric         GV->hasDLLImportStorageClass()) {
444bdd1243dSDimitry Andric       // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
445bdd1243dSDimitry Andric       // not MO_DLLIMPORTAUX.
446bdd1243dSDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
447bdd1243dSDimitry Andric     }
448bdd1243dSDimitry Andric 
449480093f4SDimitry Andric     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
450480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
451bdd1243dSDimitry Andric   }
452480093f4SDimitry Andric 
4530b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
4570b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
4580b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
4590b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
4600b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
4610b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
4620b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
4630b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
4640b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
4650b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
4690b57cec5SDimitry Andric   return EnableEarlyIfConvert;
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
4730b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
4740b57cec5SDimitry Andric     return false;
4750b57cec5SDimitry Andric 
47681ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
47781ad6265SDimitry Andric     return true;
4780b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
4790eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
4800b57cec5SDimitry Andric   }
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric   return false;
4830b57cec5SDimitry Andric }
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
4860b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
4878bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
4880b57cec5SDimitry Andric }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
4910b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
4920b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
4930b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
4940b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
4950b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
4960b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
4970b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
4980b57cec5SDimitry Andric }
4995ffd83dbSDimitry Andric 
500fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
501bdd1243dSDimitry Andric 
502*5f757f3fSDimitry Andric bool AArch64Subtarget::isStreamingCompatible() const {
503*5f757f3fSDimitry Andric   return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE;
504*5f757f3fSDimitry Andric }
505*5f757f3fSDimitry Andric 
50606c3fb27SDimitry Andric bool AArch64Subtarget::isNeonAvailable() const {
507*5f757f3fSDimitry Andric   return hasNEON() &&
508*5f757f3fSDimitry Andric          (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
509*5f757f3fSDimitry Andric }
51006c3fb27SDimitry Andric 
511*5f757f3fSDimitry Andric bool AArch64Subtarget::isSVEAvailable() const {
512*5f757f3fSDimitry Andric   return hasSVE() &&
513*5f757f3fSDimitry Andric          (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
514*5f757f3fSDimitry Andric }
51506c3fb27SDimitry Andric 
516*5f757f3fSDimitry Andric // If return address signing is enabled, tail calls are emitted as follows:
517*5f757f3fSDimitry Andric //
518*5f757f3fSDimitry Andric // ```
519*5f757f3fSDimitry Andric //   <authenticate LR>
520*5f757f3fSDimitry Andric //   <check LR>
521*5f757f3fSDimitry Andric //   TCRETURN          ; the callee may sign and spill the LR in its prologue
522*5f757f3fSDimitry Andric // ```
523*5f757f3fSDimitry Andric //
524*5f757f3fSDimitry Andric // LR may require explicit checking because if FEAT_FPAC is not implemented
525*5f757f3fSDimitry Andric // and LR was tampered with, then `<authenticate LR>` will not generate an
526*5f757f3fSDimitry Andric // exception on its own. Later, if the callee spills the signed LR value and
527*5f757f3fSDimitry Andric // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
528*5f757f3fSDimitry Andric // the higher bits of LR thus hiding the authentication failure.
529*5f757f3fSDimitry Andric AArch64PAuth::AuthCheckMethod
530*5f757f3fSDimitry Andric AArch64Subtarget::getAuthenticatedLRCheckMethod() const {
531*5f757f3fSDimitry Andric   if (AuthenticatedLRCheckMethod.getNumOccurrences())
532*5f757f3fSDimitry Andric     return AuthenticatedLRCheckMethod;
533*5f757f3fSDimitry Andric 
534*5f757f3fSDimitry Andric   // At now, use None by default because checks may introduce an unexpected
535*5f757f3fSDimitry Andric   // performance regression or incompatibility with execute-only mappings.
536*5f757f3fSDimitry Andric   return AArch64PAuth::AuthCheckMethod::None;
537bdd1243dSDimitry Andric }
538