xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 4c2d3b022a1d543dbbff75a0c53e8d3d7242216d)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
2706c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h"
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric using namespace llvm;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
350b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric static cl::opt<bool>
380b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
390b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
420b57cec5SDimitry Andric static cl::opt<bool>
430b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
440b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric static cl::opt<bool>
470b57cec5SDimitry Andric     UseNonLazyBind("aarch64-enable-nonlazybind",
480b57cec5SDimitry Andric                    cl::desc("Call nonlazybind functions via direct GOT load"),
490b57cec5SDimitry Andric                    cl::init(false), cl::Hidden);
500b57cec5SDimitry Andric 
51fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
52fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
535ffd83dbSDimitry Andric 
5481ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
5581ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
5681ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
5781ad6265SDimitry Andric 
58bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register
59bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments
60bdd1243dSDimitry Andric // to function call.
61bdd1243dSDimitry Andric static cl::list<std::string>
62bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
63bdd1243dSDimitry Andric                   "registers, so they can't be used by register allocator. "
64bdd1243dSDimitry Andric                   "Should only be used for testing register allocator."),
65bdd1243dSDimitry Andric                   cl::CommaSeparated, cl::Hidden);
66bdd1243dSDimitry Andric 
6706c3fb27SDimitry Andric static cl::opt<bool> ForceStreamingCompatibleSVE(
6806c3fb27SDimitry Andric     "force-streaming-compatible-sve",
6906c3fb27SDimitry Andric     cl::desc(
7006c3fb27SDimitry Andric         "Force the use of streaming-compatible SVE code for all functions"),
7106c3fb27SDimitry Andric     cl::Hidden);
72bdd1243dSDimitry Andric 
735f757f3fSDimitry Andric static cl::opt<AArch64PAuth::AuthCheckMethod>
745f757f3fSDimitry Andric     AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method",
755f757f3fSDimitry Andric                                cl::Hidden,
765f757f3fSDimitry Andric                                cl::desc("Override the variant of check applied "
775f757f3fSDimitry Andric                                         "to authenticated LR during tail call"),
785f757f3fSDimitry Andric                                cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR));
795f757f3fSDimitry Andric 
805f757f3fSDimitry Andric static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
815f757f3fSDimitry Andric     "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
825f757f3fSDimitry Andric     cl::desc("Set minimum number of entries to use a jump table on AArch64"));
835f757f3fSDimitry Andric 
8481ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
8581ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
8681ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
8781ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
8881ad6265SDimitry Andric }
8981ad6265SDimitry Andric 
90349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
915f757f3fSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString,
925f757f3fSDimitry Andric     bool HasMinSize) {
930b57cec5SDimitry Andric   // Determine default and user-specified characteristics
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   if (CPUString.empty())
960b57cec5SDimitry Andric     CPUString = "generic";
970b57cec5SDimitry Andric 
98349cc55cSDimitry Andric   if (TuneCPUString.empty())
99349cc55cSDimitry Andric     TuneCPUString = CPUString;
100349cc55cSDimitry Andric 
101349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
1025f757f3fSDimitry Andric   initializeProperties(HasMinSize);
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   return *this;
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric 
1075f757f3fSDimitry Andric void AArch64Subtarget::initializeProperties(bool HasMinSize) {
1080b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
1090b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
1100b57cec5SDimitry Andric   // features.
1110b57cec5SDimitry Andric   switch (ARMProcFamily) {
1120b57cec5SDimitry Andric   case Others:
1130b57cec5SDimitry Andric     break;
1145ffd83dbSDimitry Andric   case Carmel:
1155ffd83dbSDimitry Andric     CacheLineSize = 64;
1165ffd83dbSDimitry Andric     break;
1170b57cec5SDimitry Andric   case CortexA35:
1180b57cec5SDimitry Andric   case CortexA53:
1190b57cec5SDimitry Andric   case CortexA55:
12006c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
12106c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12281ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1230b57cec5SDimitry Andric     break;
1240b57cec5SDimitry Andric   case CortexA57:
1250b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
12606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
12706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
12881ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1298bcb0991SDimitry Andric     break;
1308bcb0991SDimitry Andric   case CortexA65:
13106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
1320b57cec5SDimitry Andric     break;
1330b57cec5SDimitry Andric   case CortexA72:
1340b57cec5SDimitry Andric   case CortexA73:
1350b57cec5SDimitry Andric   case CortexA75:
13606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
13706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
13881ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
13981ad6265SDimitry Andric     break;
1400b57cec5SDimitry Andric   case CortexA76:
1415ffd83dbSDimitry Andric   case CortexA77:
1425ffd83dbSDimitry Andric   case CortexA78:
143e8d8bef9SDimitry Andric   case CortexA78C:
144e8d8bef9SDimitry Andric   case CortexR82:
1455ffd83dbSDimitry Andric   case CortexX1:
1461fd87a68SDimitry Andric   case CortexX1C:
14706c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
14806c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
14981ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1500b57cec5SDimitry Andric     break;
151349cc55cSDimitry Andric   case CortexA510:
1525f757f3fSDimitry Andric   case CortexA520:
15306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
15481ad6265SDimitry Andric     VScaleForTuning = 1;
15506c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
15681ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
15781ad6265SDimitry Andric     break;
158349cc55cSDimitry Andric   case CortexA710:
159bdd1243dSDimitry Andric   case CortexA715:
1605f757f3fSDimitry Andric   case CortexA720:
161349cc55cSDimitry Andric   case CortexX2:
162bdd1243dSDimitry Andric   case CortexX3:
1635f757f3fSDimitry Andric   case CortexX4:
16406c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
165349cc55cSDimitry Andric     VScaleForTuning = 1;
16606c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
16781ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
168349cc55cSDimitry Andric     break;
1695ffd83dbSDimitry Andric   case A64FX:
1705ffd83dbSDimitry Andric     CacheLineSize = 256;
17106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
17206c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
173e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
174e8d8bef9SDimitry Andric     PrefetchDistance = 128;
175e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
176e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
177349cc55cSDimitry Andric     VScaleForTuning = 4;
1785ffd83dbSDimitry Andric     break;
179480093f4SDimitry Andric   case AppleA7:
180480093f4SDimitry Andric   case AppleA10:
181480093f4SDimitry Andric   case AppleA11:
182480093f4SDimitry Andric   case AppleA12:
183480093f4SDimitry Andric   case AppleA13:
184e8d8bef9SDimitry Andric   case AppleA14:
185bdd1243dSDimitry Andric   case AppleA15:
186bdd1243dSDimitry Andric   case AppleA16:
1875f757f3fSDimitry Andric   case AppleA17:
1880b57cec5SDimitry Andric     CacheLineSize = 64;
1890b57cec5SDimitry Andric     PrefetchDistance = 280;
1900b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1910b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
192bdd1243dSDimitry Andric     switch (ARMProcFamily) {
193bdd1243dSDimitry Andric     case AppleA14:
194bdd1243dSDimitry Andric     case AppleA15:
195bdd1243dSDimitry Andric     case AppleA16:
1965f757f3fSDimitry Andric     case AppleA17:
197bdd1243dSDimitry Andric       MaxInterleaveFactor = 4;
198bdd1243dSDimitry Andric       break;
199bdd1243dSDimitry Andric     default:
200bdd1243dSDimitry Andric       break;
201bdd1243dSDimitry Andric     }
2020b57cec5SDimitry Andric     break;
2030b57cec5SDimitry Andric   case ExynosM3:
2040b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2050b57cec5SDimitry Andric     MaxJumpTableSize = 20;
20606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(32);
20706c3fb27SDimitry Andric     PrefLoopAlignment = Align(16);
2080b57cec5SDimitry Andric     break;
2090b57cec5SDimitry Andric   case Falkor:
2100b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2110b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2120b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2130b57cec5SDimitry Andric     CacheLineSize = 128;
2140b57cec5SDimitry Andric     PrefetchDistance = 820;
2150b57cec5SDimitry Andric     MinPrefetchStride = 2048;
2160b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
2170b57cec5SDimitry Andric     break;
2180b57cec5SDimitry Andric   case Kryo:
2190b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2200b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
2210b57cec5SDimitry Andric     CacheLineSize = 128;
2220b57cec5SDimitry Andric     PrefetchDistance = 740;
2230b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2240b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
2250b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2260b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2270b57cec5SDimitry Andric     break;
2288bcb0991SDimitry Andric   case NeoverseE1:
22906c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
2308bcb0991SDimitry Andric     break;
2318bcb0991SDimitry Andric   case NeoverseN1:
23206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
23306c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
23404eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
235349cc55cSDimitry Andric     break;
236e8d8bef9SDimitry Andric   case NeoverseN2:
237bdd1243dSDimitry Andric   case NeoverseV2:
23806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
23906c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24004eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
241349cc55cSDimitry Andric     VScaleForTuning = 1;
242349cc55cSDimitry Andric     break;
243e8d8bef9SDimitry Andric   case NeoverseV1:
24406c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
24506c3fb27SDimitry Andric     PrefLoopAlignment = Align(32);
24604eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
247349cc55cSDimitry Andric     VScaleForTuning = 2;
24806c3fb27SDimitry Andric     DefaultSVETFOpts = TailFoldingOpts::Simple;
249349cc55cSDimitry Andric     break;
250349cc55cSDimitry Andric   case Neoverse512TVB:
25106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
252349cc55cSDimitry Andric     VScaleForTuning = 1;
253349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2548bcb0991SDimitry Andric     break;
2550b57cec5SDimitry Andric   case Saphira:
2560b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2570b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2580b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2590b57cec5SDimitry Andric     break;
2600b57cec5SDimitry Andric   case ThunderX2T99:
2610b57cec5SDimitry Andric     CacheLineSize = 64;
26206c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
26306c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2640b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2650b57cec5SDimitry Andric     PrefetchDistance = 128;
2660b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2670b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2680b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2690b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2700b57cec5SDimitry Andric     break;
2710b57cec5SDimitry Andric   case ThunderX:
2720b57cec5SDimitry Andric   case ThunderXT88:
2730b57cec5SDimitry Andric   case ThunderXT81:
2740b57cec5SDimitry Andric   case ThunderXT83:
2750b57cec5SDimitry Andric     CacheLineSize = 128;
27606c3fb27SDimitry Andric     PrefFunctionAlignment = Align(8);
27706c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2780b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2790b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2800b57cec5SDimitry Andric     break;
2810b57cec5SDimitry Andric   case TSV110:
2820b57cec5SDimitry Andric     CacheLineSize = 64;
28306c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
28406c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
2850b57cec5SDimitry Andric     break;
286e837bb5cSDimitry Andric   case ThunderX3T110:
287e837bb5cSDimitry Andric     CacheLineSize = 64;
28806c3fb27SDimitry Andric     PrefFunctionAlignment = Align(16);
28906c3fb27SDimitry Andric     PrefLoopAlignment = Align(4);
290e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
291e837bb5cSDimitry Andric     PrefetchDistance = 128;
292e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
293e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
294e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
295e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
296e837bb5cSDimitry Andric     break;
2972a66634dSDimitry Andric   case Ampere1:
298bdd1243dSDimitry Andric   case Ampere1A:
299*4c2d3b02SDimitry Andric   case Ampere1B:
3002a66634dSDimitry Andric     CacheLineSize = 64;
30106c3fb27SDimitry Andric     PrefFunctionAlignment = Align(64);
30206c3fb27SDimitry Andric     PrefLoopAlignment = Align(64);
3032a66634dSDimitry Andric     MaxInterleaveFactor = 4;
3042a66634dSDimitry Andric     break;
3050b57cec5SDimitry Andric   }
3065f757f3fSDimitry Andric 
3075f757f3fSDimitry Andric   if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
3085f757f3fSDimitry Andric     MinimumJumpTableEntries = AArch64MinimumJumpTableEntries;
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric 
311bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
312bdd1243dSDimitry Andric                                    StringRef TuneCPU, StringRef FS,
313fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
314fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
315bdd1243dSDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride,
31606c3fb27SDimitry Andric                                    bool StreamingSVEMode,
3175f757f3fSDimitry Andric                                    bool StreamingCompatibleSVEMode,
3185f757f3fSDimitry Andric                                    bool HasMinSize)
319349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
3200b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
321bdd1243dSDimitry Andric       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
3220b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
3235f757f3fSDimitry Andric       IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode),
32406c3fb27SDimitry Andric       StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
325fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
326fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
3275f757f3fSDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
328349cc55cSDimitry Andric       TLInfo(TM, *this) {
3290b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
3300b57cec5SDimitry Andric     ReserveXRegister.set(18);
3310b57cec5SDimitry Andric 
3320b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
3335ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
3340b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
3390b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
3400b57cec5SDimitry Andric   // TII/TRI as well?
3410b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
3420b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
345bdd1243dSDimitry Andric 
346bdd1243dSDimitry Andric   auto TRI = getRegisterInfo();
347bdd1243dSDimitry Andric   StringSet<> ReservedRegNames;
348bdd1243dSDimitry Andric   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
349bdd1243dSDimitry Andric   for (unsigned i = 0; i < 29; ++i) {
350bdd1243dSDimitry Andric     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
351bdd1243dSDimitry Andric       ReserveXRegisterForRA.set(i);
352bdd1243dSDimitry Andric   }
353bdd1243dSDimitry Andric   // X30 is named LR, so we can't use TRI->getName to check X30.
354bdd1243dSDimitry Andric   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
355bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(30);
356bdd1243dSDimitry Andric   // X29 is named FP, so we can't use TRI->getName to check X29.
357bdd1243dSDimitry Andric   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
358bdd1243dSDimitry Andric     ReserveXRegisterForRA.set(29);
3595f757f3fSDimitry Andric 
3605f757f3fSDimitry Andric   AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM));
3610b57cec5SDimitry Andric }
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
3640b57cec5SDimitry Andric   return CallLoweringInfo.get();
3650b57cec5SDimitry Andric }
3660b57cec5SDimitry Andric 
3675ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
3685ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
3695ffd83dbSDimitry Andric }
3705ffd83dbSDimitry Andric 
3718bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3720b57cec5SDimitry Andric   return InstSelector.get();
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3760b57cec5SDimitry Andric   return Legalizer.get();
3770b57cec5SDimitry Andric }
3780b57cec5SDimitry Andric 
3790b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3800b57cec5SDimitry Andric   return RegBankInfo.get();
3810b57cec5SDimitry Andric }
3820b57cec5SDimitry Andric 
3830b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3840b57cec5SDimitry Andric /// referenced for the current subtarget.
3858bcb0991SDimitry Andric unsigned
3860b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3870b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3880b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3890b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3900b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3910b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3920b57cec5SDimitry Andric 
39306c3fb27SDimitry Andric   // All globals dynamically protected by MTE must have their address tags
39406c3fb27SDimitry Andric   // synthesized. This is done by having the loader stash the tag in the GOT
39506c3fb27SDimitry Andric   // entry. Force all tagged globals (even ones with internal linkage) through
39606c3fb27SDimitry Andric   // the GOT.
39706c3fb27SDimitry Andric   if (GV->isTagged())
39806c3fb27SDimitry Andric     return AArch64II::MO_GOT;
39906c3fb27SDimitry Andric 
4000b57cec5SDimitry Andric   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
401bdd1243dSDimitry Andric     if (GV->hasDLLImportStorageClass()) {
4020b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
403bdd1243dSDimitry Andric     }
4040b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
4050b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
4060b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4070b57cec5SDimitry Andric   }
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
4100b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
4110b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
4120b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
4130b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
4140b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4150b57cec5SDimitry Andric 
4168bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
4178bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
4188bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
4198bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
4208bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
4218bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
4228bcb0991SDimitry Andric 
4230b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric 
4268bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
4270b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
4280b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
4290b57cec5SDimitry Andric   // relocations available to do anything else..
4300b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
4310b57cec5SDimitry Andric       !GV->hasInternalLinkage())
4320b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4330b57cec5SDimitry Andric 
4340b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
4350b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
4360b57cec5SDimitry Andric   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
4370b57cec5SDimitry Andric       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
4380b57cec5SDimitry Andric     return AArch64II::MO_GOT;
4390b57cec5SDimitry Andric 
440bdd1243dSDimitry Andric   if (getTargetTriple().isOSWindows()) {
4417a6dacacSDimitry Andric     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) {
4427a6dacacSDimitry Andric       if (GV->hasDLLImportStorageClass()) {
4437a6dacacSDimitry Andric         // On Arm64EC, if we're calling a symbol from the import table
4447a6dacacSDimitry Andric         // directly, use MO_ARM64EC_CALLMANGLE.
4457a6dacacSDimitry Andric         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT |
4467a6dacacSDimitry Andric                AArch64II::MO_ARM64EC_CALLMANGLE;
4477a6dacacSDimitry Andric       }
4487a6dacacSDimitry Andric       if (GV->hasExternalLinkage()) {
4497a6dacacSDimitry Andric         // If we're calling a symbol directly, use the mangled form in the
4507a6dacacSDimitry Andric         // call instruction.
4517a6dacacSDimitry Andric         return AArch64II::MO_ARM64EC_CALLMANGLE;
4527a6dacacSDimitry Andric       }
453bdd1243dSDimitry Andric     }
454bdd1243dSDimitry Andric 
455480093f4SDimitry Andric     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
456480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
457bdd1243dSDimitry Andric   }
458480093f4SDimitry Andric 
4590b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
4600b57cec5SDimitry Andric }
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
4630b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
4640b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
4650b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
4660b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
4670b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
4680b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
4690b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
4700b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
4710b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
4750b57cec5SDimitry Andric   return EnableEarlyIfConvert;
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
4790b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
4800b57cec5SDimitry Andric     return false;
4810b57cec5SDimitry Andric 
48281ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
48381ad6265SDimitry Andric     return true;
4840b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
4850eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
4860b57cec5SDimitry Andric   }
4870b57cec5SDimitry Andric 
4880b57cec5SDimitry Andric   return false;
4890b57cec5SDimitry Andric }
4900b57cec5SDimitry Andric 
4910b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
4920b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
4938bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
4940b57cec5SDimitry Andric }
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
4970b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
4980b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
4990b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
5000b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
5010b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
5020b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
5030b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
5040b57cec5SDimitry Andric }
5055ffd83dbSDimitry Andric 
506fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
507bdd1243dSDimitry Andric 
5085f757f3fSDimitry Andric bool AArch64Subtarget::isStreamingCompatible() const {
5095f757f3fSDimitry Andric   return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE;
5105f757f3fSDimitry Andric }
5115f757f3fSDimitry Andric 
51206c3fb27SDimitry Andric bool AArch64Subtarget::isNeonAvailable() const {
5135f757f3fSDimitry Andric   return hasNEON() &&
5145f757f3fSDimitry Andric          (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
5155f757f3fSDimitry Andric }
51606c3fb27SDimitry Andric 
5175f757f3fSDimitry Andric bool AArch64Subtarget::isSVEAvailable() const {
5185f757f3fSDimitry Andric   return hasSVE() &&
5195f757f3fSDimitry Andric          (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
5205f757f3fSDimitry Andric }
52106c3fb27SDimitry Andric 
5225f757f3fSDimitry Andric // If return address signing is enabled, tail calls are emitted as follows:
5235f757f3fSDimitry Andric //
5245f757f3fSDimitry Andric // ```
5255f757f3fSDimitry Andric //   <authenticate LR>
5265f757f3fSDimitry Andric //   <check LR>
5275f757f3fSDimitry Andric //   TCRETURN          ; the callee may sign and spill the LR in its prologue
5285f757f3fSDimitry Andric // ```
5295f757f3fSDimitry Andric //
5305f757f3fSDimitry Andric // LR may require explicit checking because if FEAT_FPAC is not implemented
5315f757f3fSDimitry Andric // and LR was tampered with, then `<authenticate LR>` will not generate an
5325f757f3fSDimitry Andric // exception on its own. Later, if the callee spills the signed LR value and
5335f757f3fSDimitry Andric // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces
5345f757f3fSDimitry Andric // the higher bits of LR thus hiding the authentication failure.
5355f757f3fSDimitry Andric AArch64PAuth::AuthCheckMethod
5365f757f3fSDimitry Andric AArch64Subtarget::getAuthenticatedLRCheckMethod() const {
5375f757f3fSDimitry Andric   if (AuthenticatedLRCheckMethod.getNumOccurrences())
5385f757f3fSDimitry Andric     return AuthenticatedLRCheckMethod;
5395f757f3fSDimitry Andric 
5405f757f3fSDimitry Andric   // At now, use None by default because checks may introduce an unexpected
5415f757f3fSDimitry Andric   // performance regression or incompatibility with execute-only mappings.
5425f757f3fSDimitry Andric   return AArch64PAuth::AuthCheckMethod::None;
543bdd1243dSDimitry Andric }
544