xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64Subtarget.h"
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AArch64.h"
160b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h"
180b57cec5SDimitry Andric #include "AArch64TargetMachine.h"
195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h"
205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h"
220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h"
260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
2704eeddc0SDimitry Andric #include "llvm/Support/AArch64TargetParser.h"
280b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget"
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR
350b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC
360b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc"
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric static cl::opt<bool>
390b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
400b57cec5SDimitry Andric                      "converter pass"), cl::init(true), cl::Hidden);
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it.
430b57cec5SDimitry Andric static cl::opt<bool>
440b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
450b57cec5SDimitry Andric                          "an address is ignored"), cl::init(false), cl::Hidden);
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric static cl::opt<bool>
480b57cec5SDimitry Andric     UseNonLazyBind("aarch64-enable-nonlazybind",
490b57cec5SDimitry Andric                    cl::desc("Call nonlazybind functions via direct GOT load"),
500b57cec5SDimitry Andric                    cl::init(false), cl::Hidden);
510b57cec5SDimitry Andric 
52fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
53fe6060f1SDimitry Andric                            cl::desc("Enable the use of AA during codegen."));
545ffd83dbSDimitry Andric 
55*81ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56*81ad6265SDimitry Andric     "aarch64-insert-extract-base-cost",
57*81ad6265SDimitry Andric     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58*81ad6265SDimitry Andric 
59*81ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
60*81ad6265SDimitry Andric   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
61*81ad6265SDimitry Andric     return OverrideVectorInsertExtractBaseCost;
62*81ad6265SDimitry Andric   return VectorInsertExtractBaseCost;
63*81ad6265SDimitry Andric }
64*81ad6265SDimitry Andric 
65349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
66349cc55cSDimitry Andric     StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
670b57cec5SDimitry Andric   // Determine default and user-specified characteristics
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric   if (CPUString.empty())
700b57cec5SDimitry Andric     CPUString = "generic";
710b57cec5SDimitry Andric 
72349cc55cSDimitry Andric   if (TuneCPUString.empty())
73349cc55cSDimitry Andric     TuneCPUString = CPUString;
74349cc55cSDimitry Andric 
75349cc55cSDimitry Andric   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
760b57cec5SDimitry Andric   initializeProperties();
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   return *this;
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() {
820b57cec5SDimitry Andric   // Initialize CPU specific properties. We should add a tablegen feature for
830b57cec5SDimitry Andric   // this in the future so we can specify it together with the subtarget
840b57cec5SDimitry Andric   // features.
850b57cec5SDimitry Andric   switch (ARMProcFamily) {
860b57cec5SDimitry Andric   case Others:
870b57cec5SDimitry Andric     break;
885ffd83dbSDimitry Andric   case Carmel:
895ffd83dbSDimitry Andric     CacheLineSize = 64;
905ffd83dbSDimitry Andric     break;
910b57cec5SDimitry Andric   case CortexA35:
920b57cec5SDimitry Andric   case CortexA53:
930b57cec5SDimitry Andric   case CortexA55:
94fe6060f1SDimitry Andric     PrefFunctionLogAlignment = 4;
95*81ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
96*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
970b57cec5SDimitry Andric     break;
980b57cec5SDimitry Andric   case CortexA57:
990b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1008bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
101*81ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
102*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
1038bcb0991SDimitry Andric     break;
1048bcb0991SDimitry Andric   case CortexA65:
1058bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
1060b57cec5SDimitry Andric     break;
1070b57cec5SDimitry Andric   case CortexA72:
1080b57cec5SDimitry Andric   case CortexA73:
1090b57cec5SDimitry Andric   case CortexA75:
110*81ad6265SDimitry Andric     PrefFunctionLogAlignment = 4;
111*81ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
112*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
113*81ad6265SDimitry Andric     break;
1140b57cec5SDimitry Andric   case CortexA76:
1155ffd83dbSDimitry Andric   case CortexA77:
1165ffd83dbSDimitry Andric   case CortexA78:
117e8d8bef9SDimitry Andric   case CortexA78C:
118e8d8bef9SDimitry Andric   case CortexR82:
1195ffd83dbSDimitry Andric   case CortexX1:
1201fd87a68SDimitry Andric   case CortexX1C:
1218bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
122*81ad6265SDimitry Andric     PrefLoopLogAlignment = 5;
123*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
1240b57cec5SDimitry Andric     break;
125349cc55cSDimitry Andric   case CortexA510:
126*81ad6265SDimitry Andric     PrefFunctionLogAlignment = 4;
127*81ad6265SDimitry Andric     VScaleForTuning = 1;
128*81ad6265SDimitry Andric     PrefLoopLogAlignment = 4;
129*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 8;
130*81ad6265SDimitry Andric     break;
131349cc55cSDimitry Andric   case CortexA710:
132349cc55cSDimitry Andric   case CortexX2:
133349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
134349cc55cSDimitry Andric     VScaleForTuning = 1;
135*81ad6265SDimitry Andric     PrefLoopLogAlignment = 5;
136*81ad6265SDimitry Andric     MaxBytesForLoopAlignment = 16;
137349cc55cSDimitry Andric     break;
1385ffd83dbSDimitry Andric   case A64FX:
1395ffd83dbSDimitry Andric     CacheLineSize = 256;
140e8d8bef9SDimitry Andric     PrefFunctionLogAlignment = 3;
141e8d8bef9SDimitry Andric     PrefLoopLogAlignment = 2;
142e8d8bef9SDimitry Andric     MaxInterleaveFactor = 4;
143e8d8bef9SDimitry Andric     PrefetchDistance = 128;
144e8d8bef9SDimitry Andric     MinPrefetchStride = 1024;
145e8d8bef9SDimitry Andric     MaxPrefetchIterationsAhead = 4;
146349cc55cSDimitry Andric     VScaleForTuning = 4;
1475ffd83dbSDimitry Andric     break;
148480093f4SDimitry Andric   case AppleA7:
149480093f4SDimitry Andric   case AppleA10:
150480093f4SDimitry Andric   case AppleA11:
151480093f4SDimitry Andric   case AppleA12:
152480093f4SDimitry Andric   case AppleA13:
153e8d8bef9SDimitry Andric   case AppleA14:
1540b57cec5SDimitry Andric     CacheLineSize = 64;
1550b57cec5SDimitry Andric     PrefetchDistance = 280;
1560b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1570b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 3;
1580b57cec5SDimitry Andric     break;
1590b57cec5SDimitry Andric   case ExynosM3:
1600b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1610b57cec5SDimitry Andric     MaxJumpTableSize = 20;
1628bcb0991SDimitry Andric     PrefFunctionLogAlignment = 5;
1638bcb0991SDimitry Andric     PrefLoopLogAlignment = 4;
1640b57cec5SDimitry Andric     break;
1650b57cec5SDimitry Andric   case Falkor:
1660b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1670b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
1680b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
1690b57cec5SDimitry Andric     CacheLineSize = 128;
1700b57cec5SDimitry Andric     PrefetchDistance = 820;
1710b57cec5SDimitry Andric     MinPrefetchStride = 2048;
1720b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 8;
1730b57cec5SDimitry Andric     break;
1740b57cec5SDimitry Andric   case Kryo:
1750b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
1760b57cec5SDimitry Andric     VectorInsertExtractBaseCost = 2;
1770b57cec5SDimitry Andric     CacheLineSize = 128;
1780b57cec5SDimitry Andric     PrefetchDistance = 740;
1790b57cec5SDimitry Andric     MinPrefetchStride = 1024;
1800b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 11;
1810b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
1820b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
1830b57cec5SDimitry Andric     break;
1848bcb0991SDimitry Andric   case NeoverseE1:
1858bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
1868bcb0991SDimitry Andric     break;
1878bcb0991SDimitry Andric   case NeoverseN1:
188349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
18904eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
19004eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
191349cc55cSDimitry Andric     break;
192e8d8bef9SDimitry Andric   case NeoverseN2:
193349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
19404eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
19504eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
196349cc55cSDimitry Andric     VScaleForTuning = 1;
197349cc55cSDimitry Andric     break;
198e8d8bef9SDimitry Andric   case NeoverseV1:
1998bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
20004eeddc0SDimitry Andric     PrefLoopLogAlignment = 5;
20104eeddc0SDimitry Andric     MaxBytesForLoopAlignment = 16;
202349cc55cSDimitry Andric     VScaleForTuning = 2;
203349cc55cSDimitry Andric     break;
204349cc55cSDimitry Andric   case Neoverse512TVB:
205349cc55cSDimitry Andric     PrefFunctionLogAlignment = 4;
206349cc55cSDimitry Andric     VScaleForTuning = 1;
207349cc55cSDimitry Andric     MaxInterleaveFactor = 4;
2088bcb0991SDimitry Andric     break;
2090b57cec5SDimitry Andric   case Saphira:
2100b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2110b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2120b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2130b57cec5SDimitry Andric     break;
2140b57cec5SDimitry Andric   case ThunderX2T99:
2150b57cec5SDimitry Andric     CacheLineSize = 64;
2168bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
2178bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2180b57cec5SDimitry Andric     MaxInterleaveFactor = 4;
2190b57cec5SDimitry Andric     PrefetchDistance = 128;
2200b57cec5SDimitry Andric     MinPrefetchStride = 1024;
2210b57cec5SDimitry Andric     MaxPrefetchIterationsAhead = 4;
2220b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2230b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2240b57cec5SDimitry Andric     break;
2250b57cec5SDimitry Andric   case ThunderX:
2260b57cec5SDimitry Andric   case ThunderXT88:
2270b57cec5SDimitry Andric   case ThunderXT81:
2280b57cec5SDimitry Andric   case ThunderXT83:
2290b57cec5SDimitry Andric     CacheLineSize = 128;
2308bcb0991SDimitry Andric     PrefFunctionLogAlignment = 3;
2318bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2320b57cec5SDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2330b57cec5SDimitry Andric     MinVectorRegisterBitWidth = 128;
2340b57cec5SDimitry Andric     break;
2350b57cec5SDimitry Andric   case TSV110:
2360b57cec5SDimitry Andric     CacheLineSize = 64;
2378bcb0991SDimitry Andric     PrefFunctionLogAlignment = 4;
2388bcb0991SDimitry Andric     PrefLoopLogAlignment = 2;
2390b57cec5SDimitry Andric     break;
240e837bb5cSDimitry Andric   case ThunderX3T110:
241e837bb5cSDimitry Andric     CacheLineSize = 64;
242e837bb5cSDimitry Andric     PrefFunctionLogAlignment = 4;
243e837bb5cSDimitry Andric     PrefLoopLogAlignment = 2;
244e837bb5cSDimitry Andric     MaxInterleaveFactor = 4;
245e837bb5cSDimitry Andric     PrefetchDistance = 128;
246e837bb5cSDimitry Andric     MinPrefetchStride = 1024;
247e837bb5cSDimitry Andric     MaxPrefetchIterationsAhead = 4;
248e837bb5cSDimitry Andric     // FIXME: remove this to enable 64-bit SLP if performance looks good.
249e837bb5cSDimitry Andric     MinVectorRegisterBitWidth = 128;
250e837bb5cSDimitry Andric     break;
2512a66634dSDimitry Andric   case Ampere1:
2522a66634dSDimitry Andric     CacheLineSize = 64;
2532a66634dSDimitry Andric     PrefFunctionLogAlignment = 6;
2542a66634dSDimitry Andric     PrefLoopLogAlignment = 6;
2552a66634dSDimitry Andric     MaxInterleaveFactor = 4;
2562a66634dSDimitry Andric     break;
2570b57cec5SDimitry Andric   }
2580b57cec5SDimitry Andric }
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
261349cc55cSDimitry Andric                                    const std::string &TuneCPU,
2620b57cec5SDimitry Andric                                    const std::string &FS,
263fe6060f1SDimitry Andric                                    const TargetMachine &TM, bool LittleEndian,
264fe6060f1SDimitry Andric                                    unsigned MinSVEVectorSizeInBitsOverride,
265fe6060f1SDimitry Andric                                    unsigned MaxSVEVectorSizeInBitsOverride)
266349cc55cSDimitry Andric     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
2670b57cec5SDimitry Andric       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
2680b57cec5SDimitry Andric       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
2690b57cec5SDimitry Andric       IsLittle(LittleEndian),
270fe6060f1SDimitry Andric       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
271fe6060f1SDimitry Andric       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
27204eeddc0SDimitry Andric       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
273349cc55cSDimitry Andric       TLInfo(TM, *this) {
2740b57cec5SDimitry Andric   if (AArch64::isX18ReservedByDefault(TT))
2750b57cec5SDimitry Andric     ReserveXRegister.set(18);
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
2785ffd83dbSDimitry Andric   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
2790b57cec5SDimitry Andric   Legalizer.reset(new AArch64LegalizerInfo(*this));
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   // FIXME: At this point, we can't rely on Subtarget having RBI.
2840b57cec5SDimitry Andric   // It's awkward to mix passing RBI and the Subtarget; should we pass
2850b57cec5SDimitry Andric   // TII/TRI as well?
2860b57cec5SDimitry Andric   InstSelector.reset(createAArch64InstructionSelector(
2870b57cec5SDimitry Andric       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   RegBankInfo.reset(RBI);
2900b57cec5SDimitry Andric }
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const {
2930b57cec5SDimitry Andric   return CallLoweringInfo.get();
2940b57cec5SDimitry Andric }
2950b57cec5SDimitry Andric 
2965ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
2975ffd83dbSDimitry Andric   return InlineAsmLoweringInfo.get();
2985ffd83dbSDimitry Andric }
2995ffd83dbSDimitry Andric 
3008bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
3010b57cec5SDimitry Andric   return InstSelector.get();
3020b57cec5SDimitry Andric }
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
3050b57cec5SDimitry Andric   return Legalizer.get();
3060b57cec5SDimitry Andric }
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
3090b57cec5SDimitry Andric   return RegBankInfo.get();
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be
3130b57cec5SDimitry Andric /// referenced for the current subtarget.
3148bcb0991SDimitry Andric unsigned
3150b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
3160b57cec5SDimitry Andric                                           const TargetMachine &TM) const {
3170b57cec5SDimitry Andric   // MachO large model always goes via a GOT, simply to get a single 8-byte
3180b57cec5SDimitry Andric   // absolute relocation on all global addresses.
3190b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
3200b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
3230b57cec5SDimitry Andric     if (GV->hasDLLImportStorageClass())
3240b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
3250b57cec5SDimitry Andric     if (getTargetTriple().isOSWindows())
3260b57cec5SDimitry Andric       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
3270b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3280b57cec5SDimitry Andric   }
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric   // The small code model's direct accesses use ADRP, which cannot
3310b57cec5SDimitry Andric   // necessarily produce the value 0 (if the code is above 4GB).
3320b57cec5SDimitry Andric   // Same for the tiny code model, where we have a pc relative LDR.
3330b57cec5SDimitry Andric   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
3340b57cec5SDimitry Andric       GV->hasExternalWeakLinkage())
3350b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3360b57cec5SDimitry Andric 
3378bcb0991SDimitry Andric   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
3388bcb0991SDimitry Andric   // that their nominal addresses are tagged and outside of the code model. In
3398bcb0991SDimitry Andric   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
3408bcb0991SDimitry Andric   // tag if necessary based on MO_TAGGED.
3418bcb0991SDimitry Andric   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
3428bcb0991SDimitry Andric     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
3438bcb0991SDimitry Andric 
3440b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
3450b57cec5SDimitry Andric }
3460b57cec5SDimitry Andric 
3478bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference(
3480b57cec5SDimitry Andric     const GlobalValue *GV, const TargetMachine &TM) const {
3490b57cec5SDimitry Andric   // MachO large model always goes via a GOT, because we don't have the
3500b57cec5SDimitry Andric   // relocations available to do anything else..
3510b57cec5SDimitry Andric   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
3520b57cec5SDimitry Andric       !GV->hasInternalLinkage())
3530b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric   // NonLazyBind goes via GOT unless we know it's available locally.
3560b57cec5SDimitry Andric   auto *F = dyn_cast<Function>(GV);
3570b57cec5SDimitry Andric   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
3580b57cec5SDimitry Andric       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
3590b57cec5SDimitry Andric     return AArch64II::MO_GOT;
3600b57cec5SDimitry Andric 
361480093f4SDimitry Andric   // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
362480093f4SDimitry Andric   if (getTargetTriple().isOSWindows())
363480093f4SDimitry Andric     return ClassifyGlobalReference(GV, TM);
364480093f4SDimitry Andric 
3650b57cec5SDimitry Andric   return AArch64II::MO_NO_FLAG;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
3690b57cec5SDimitry Andric                                            unsigned NumRegionInstrs) const {
3700b57cec5SDimitry Andric   // LNT run (at least on Cyclone) showed reasonably significant gains for
3710b57cec5SDimitry Andric   // bi-directional scheduling. 253.perlbmk.
3720b57cec5SDimitry Andric   Policy.OnlyTopDown = false;
3730b57cec5SDimitry Andric   Policy.OnlyBottomUp = false;
3740b57cec5SDimitry Andric   // Enabling or Disabling the latency heuristic is a close call: It seems to
3750b57cec5SDimitry Andric   // help nearly no benchmark on out-of-order architectures, on the other hand
3760b57cec5SDimitry Andric   // it regresses register pressure on a few benchmarking.
3770b57cec5SDimitry Andric   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const {
3810b57cec5SDimitry Andric   return EnableEarlyIfConvert;
3820b57cec5SDimitry Andric }
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
3850b57cec5SDimitry Andric   if (!UseAddressTopByteIgnored)
3860b57cec5SDimitry Andric     return false;
3870b57cec5SDimitry Andric 
388*81ad6265SDimitry Andric   if (TargetTriple.isDriverKit())
389*81ad6265SDimitry Andric     return true;
3900b57cec5SDimitry Andric   if (TargetTriple.isiOS()) {
3910eae32dcSDimitry Andric     return TargetTriple.getiOSVersion() >= VersionTuple(8);
3920b57cec5SDimitry Andric   }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric   return false;
3950b57cec5SDimitry Andric }
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint>
3980b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const {
3998bcb0991SDimitry Andric   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
4000b57cec5SDimitry Andric }
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
4030b57cec5SDimitry Andric   // We usually compute max call frame size after ISel. Do the computation now
4040b57cec5SDimitry Andric   // if the .mir file didn't specify it. Note that this will probably give you
4050b57cec5SDimitry Andric   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
4060b57cec5SDimitry Andric   // instructions, specify explicitly if you need it to be correct.
4070b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
4080b57cec5SDimitry Andric   if (!MFI.isMaxCallFrameSizeComputed())
4090b57cec5SDimitry Andric     MFI.computeMaxCallFrameSize(MF);
4100b57cec5SDimitry Andric }
4115ffd83dbSDimitry Andric 
412fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; }
413