10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "AArch64Subtarget.h" 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AArch64.h" 160b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h" 180b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h" 205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h" 215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h" 220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 2706c3fb27SDimitry Andric #include "llvm/TargetParser/AArch64TargetParser.h" 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric using namespace llvm; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget" 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 350b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc" 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric static cl::opt<bool> 380b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 390b57cec5SDimitry Andric "converter pass"), cl::init(true), cl::Hidden); 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it. 420b57cec5SDimitry Andric static cl::opt<bool> 430b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 440b57cec5SDimitry Andric "an address is ignored"), cl::init(false), cl::Hidden); 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric static cl::opt<bool> 470b57cec5SDimitry Andric UseNonLazyBind("aarch64-enable-nonlazybind", 480b57cec5SDimitry Andric cl::desc("Call nonlazybind functions via direct GOT load"), 490b57cec5SDimitry Andric cl::init(false), cl::Hidden); 500b57cec5SDimitry Andric 51fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), 52fe6060f1SDimitry Andric cl::desc("Enable the use of AA during codegen.")); 535ffd83dbSDimitry Andric 5481ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost( 5581ad6265SDimitry Andric "aarch64-insert-extract-base-cost", 5681ad6265SDimitry Andric cl::desc("Base cost of vector insert/extract element"), cl::Hidden); 5781ad6265SDimitry Andric 58bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register 59bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments 60bdd1243dSDimitry Andric // to function call. 61bdd1243dSDimitry Andric static cl::list<std::string> 62bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " 63bdd1243dSDimitry Andric "registers, so they can't be used by register allocator. " 64bdd1243dSDimitry Andric "Should only be used for testing register allocator."), 65bdd1243dSDimitry Andric cl::CommaSeparated, cl::Hidden); 66bdd1243dSDimitry Andric 6706c3fb27SDimitry Andric static cl::opt<bool> ForceStreamingCompatibleSVE( 6806c3fb27SDimitry Andric "force-streaming-compatible-sve", 6906c3fb27SDimitry Andric cl::desc( 7006c3fb27SDimitry Andric "Force the use of streaming-compatible SVE code for all functions"), 7106c3fb27SDimitry Andric cl::Hidden); 72bdd1243dSDimitry Andric 735f757f3fSDimitry Andric static cl::opt<AArch64PAuth::AuthCheckMethod> 745f757f3fSDimitry Andric AuthenticatedLRCheckMethod("aarch64-authenticated-lr-check-method", 755f757f3fSDimitry Andric cl::Hidden, 765f757f3fSDimitry Andric cl::desc("Override the variant of check applied " 775f757f3fSDimitry Andric "to authenticated LR during tail call"), 785f757f3fSDimitry Andric cl::values(AUTH_CHECK_METHOD_CL_VALUES_LR)); 795f757f3fSDimitry Andric 805f757f3fSDimitry Andric static cl::opt<unsigned> AArch64MinimumJumpTableEntries( 815f757f3fSDimitry Andric "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, 825f757f3fSDimitry Andric cl::desc("Set minimum number of entries to use a jump table on AArch64")); 835f757f3fSDimitry Andric 8481ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { 8581ad6265SDimitry Andric if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) 8681ad6265SDimitry Andric return OverrideVectorInsertExtractBaseCost; 8781ad6265SDimitry Andric return VectorInsertExtractBaseCost; 8881ad6265SDimitry Andric } 8981ad6265SDimitry Andric 90349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( 915f757f3fSDimitry Andric StringRef FS, StringRef CPUString, StringRef TuneCPUString, 925f757f3fSDimitry Andric bool HasMinSize) { 930b57cec5SDimitry Andric // Determine default and user-specified characteristics 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric if (CPUString.empty()) 960b57cec5SDimitry Andric CPUString = "generic"; 970b57cec5SDimitry Andric 98349cc55cSDimitry Andric if (TuneCPUString.empty()) 99349cc55cSDimitry Andric TuneCPUString = CPUString; 100349cc55cSDimitry Andric 101349cc55cSDimitry Andric ParseSubtargetFeatures(CPUString, TuneCPUString, FS); 1025f757f3fSDimitry Andric initializeProperties(HasMinSize); 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric return *this; 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1075f757f3fSDimitry Andric void AArch64Subtarget::initializeProperties(bool HasMinSize) { 1080b57cec5SDimitry Andric // Initialize CPU specific properties. We should add a tablegen feature for 1090b57cec5SDimitry Andric // this in the future so we can specify it together with the subtarget 1100b57cec5SDimitry Andric // features. 1110b57cec5SDimitry Andric switch (ARMProcFamily) { 1120b57cec5SDimitry Andric case Others: 1130b57cec5SDimitry Andric break; 1145ffd83dbSDimitry Andric case Carmel: 1155ffd83dbSDimitry Andric CacheLineSize = 64; 1165ffd83dbSDimitry Andric break; 1170b57cec5SDimitry Andric case CortexA35: 1180b57cec5SDimitry Andric case CortexA53: 1190b57cec5SDimitry Andric case CortexA55: 12006c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 12106c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 12281ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1230b57cec5SDimitry Andric break; 1240b57cec5SDimitry Andric case CortexA57: 1250b57cec5SDimitry Andric MaxInterleaveFactor = 4; 12606c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 12706c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 12881ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1298bcb0991SDimitry Andric break; 1308bcb0991SDimitry Andric case CortexA65: 13106c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 1320b57cec5SDimitry Andric break; 1330b57cec5SDimitry Andric case CortexA72: 1340b57cec5SDimitry Andric case CortexA73: 1350b57cec5SDimitry Andric case CortexA75: 13606c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 13706c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 13881ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 13981ad6265SDimitry Andric break; 1400b57cec5SDimitry Andric case CortexA76: 1415ffd83dbSDimitry Andric case CortexA77: 1425ffd83dbSDimitry Andric case CortexA78: 143e8d8bef9SDimitry Andric case CortexA78C: 144e8d8bef9SDimitry Andric case CortexR82: 1455ffd83dbSDimitry Andric case CortexX1: 1461fd87a68SDimitry Andric case CortexX1C: 14706c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 14806c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 14981ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 1500b57cec5SDimitry Andric break; 151349cc55cSDimitry Andric case CortexA510: 1525f757f3fSDimitry Andric case CortexA520: 15306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 15481ad6265SDimitry Andric VScaleForTuning = 1; 15506c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 15681ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 15781ad6265SDimitry Andric break; 158349cc55cSDimitry Andric case CortexA710: 159bdd1243dSDimitry Andric case CortexA715: 1605f757f3fSDimitry Andric case CortexA720: 161349cc55cSDimitry Andric case CortexX2: 162bdd1243dSDimitry Andric case CortexX3: 1635f757f3fSDimitry Andric case CortexX4: 16406c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 165349cc55cSDimitry Andric VScaleForTuning = 1; 16606c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 16781ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 168349cc55cSDimitry Andric break; 1695ffd83dbSDimitry Andric case A64FX: 1705ffd83dbSDimitry Andric CacheLineSize = 256; 17106c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 17206c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 173e8d8bef9SDimitry Andric MaxInterleaveFactor = 4; 174e8d8bef9SDimitry Andric PrefetchDistance = 128; 175e8d8bef9SDimitry Andric MinPrefetchStride = 1024; 176e8d8bef9SDimitry Andric MaxPrefetchIterationsAhead = 4; 177349cc55cSDimitry Andric VScaleForTuning = 4; 1785ffd83dbSDimitry Andric break; 179480093f4SDimitry Andric case AppleA7: 180480093f4SDimitry Andric case AppleA10: 181480093f4SDimitry Andric case AppleA11: 182480093f4SDimitry Andric case AppleA12: 183480093f4SDimitry Andric case AppleA13: 184e8d8bef9SDimitry Andric case AppleA14: 185bdd1243dSDimitry Andric case AppleA15: 186bdd1243dSDimitry Andric case AppleA16: 1875f757f3fSDimitry Andric case AppleA17: 1880b57cec5SDimitry Andric CacheLineSize = 64; 1890b57cec5SDimitry Andric PrefetchDistance = 280; 1900b57cec5SDimitry Andric MinPrefetchStride = 2048; 1910b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 3; 192bdd1243dSDimitry Andric switch (ARMProcFamily) { 193bdd1243dSDimitry Andric case AppleA14: 194bdd1243dSDimitry Andric case AppleA15: 195bdd1243dSDimitry Andric case AppleA16: 1965f757f3fSDimitry Andric case AppleA17: 197bdd1243dSDimitry Andric MaxInterleaveFactor = 4; 198bdd1243dSDimitry Andric break; 199bdd1243dSDimitry Andric default: 200bdd1243dSDimitry Andric break; 201bdd1243dSDimitry Andric } 2020b57cec5SDimitry Andric break; 2030b57cec5SDimitry Andric case ExynosM3: 2040b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2050b57cec5SDimitry Andric MaxJumpTableSize = 20; 20606c3fb27SDimitry Andric PrefFunctionAlignment = Align(32); 20706c3fb27SDimitry Andric PrefLoopAlignment = Align(16); 2080b57cec5SDimitry Andric break; 2090b57cec5SDimitry Andric case Falkor: 2100b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2110b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2120b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2130b57cec5SDimitry Andric CacheLineSize = 128; 2140b57cec5SDimitry Andric PrefetchDistance = 820; 2150b57cec5SDimitry Andric MinPrefetchStride = 2048; 2160b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 8; 2170b57cec5SDimitry Andric break; 2180b57cec5SDimitry Andric case Kryo: 2190b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2200b57cec5SDimitry Andric VectorInsertExtractBaseCost = 2; 2210b57cec5SDimitry Andric CacheLineSize = 128; 2220b57cec5SDimitry Andric PrefetchDistance = 740; 2230b57cec5SDimitry Andric MinPrefetchStride = 1024; 2240b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 11; 2250b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2260b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2270b57cec5SDimitry Andric break; 2288bcb0991SDimitry Andric case NeoverseE1: 22906c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 2308bcb0991SDimitry Andric break; 2318bcb0991SDimitry Andric case NeoverseN1: 23206c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 23306c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 23404eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 235349cc55cSDimitry Andric break; 236e8d8bef9SDimitry Andric case NeoverseN2: 237bdd1243dSDimitry Andric case NeoverseV2: 23806c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 23906c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 24004eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 241349cc55cSDimitry Andric VScaleForTuning = 1; 242349cc55cSDimitry Andric break; 243e8d8bef9SDimitry Andric case NeoverseV1: 24406c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 24506c3fb27SDimitry Andric PrefLoopAlignment = Align(32); 24604eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 247349cc55cSDimitry Andric VScaleForTuning = 2; 24806c3fb27SDimitry Andric DefaultSVETFOpts = TailFoldingOpts::Simple; 249349cc55cSDimitry Andric break; 250349cc55cSDimitry Andric case Neoverse512TVB: 25106c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 252349cc55cSDimitry Andric VScaleForTuning = 1; 253349cc55cSDimitry Andric MaxInterleaveFactor = 4; 2548bcb0991SDimitry Andric break; 2550b57cec5SDimitry Andric case Saphira: 2560b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2570b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2580b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2590b57cec5SDimitry Andric break; 2600b57cec5SDimitry Andric case ThunderX2T99: 2610b57cec5SDimitry Andric CacheLineSize = 64; 26206c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 26306c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2640b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2650b57cec5SDimitry Andric PrefetchDistance = 128; 2660b57cec5SDimitry Andric MinPrefetchStride = 1024; 2670b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 4; 2680b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2690b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2700b57cec5SDimitry Andric break; 2710b57cec5SDimitry Andric case ThunderX: 2720b57cec5SDimitry Andric case ThunderXT88: 2730b57cec5SDimitry Andric case ThunderXT81: 2740b57cec5SDimitry Andric case ThunderXT83: 2750b57cec5SDimitry Andric CacheLineSize = 128; 27606c3fb27SDimitry Andric PrefFunctionAlignment = Align(8); 27706c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2780b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2790b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2800b57cec5SDimitry Andric break; 2810b57cec5SDimitry Andric case TSV110: 2820b57cec5SDimitry Andric CacheLineSize = 64; 28306c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 28406c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 2850b57cec5SDimitry Andric break; 286e837bb5cSDimitry Andric case ThunderX3T110: 287e837bb5cSDimitry Andric CacheLineSize = 64; 28806c3fb27SDimitry Andric PrefFunctionAlignment = Align(16); 28906c3fb27SDimitry Andric PrefLoopAlignment = Align(4); 290e837bb5cSDimitry Andric MaxInterleaveFactor = 4; 291e837bb5cSDimitry Andric PrefetchDistance = 128; 292e837bb5cSDimitry Andric MinPrefetchStride = 1024; 293e837bb5cSDimitry Andric MaxPrefetchIterationsAhead = 4; 294e837bb5cSDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 295e837bb5cSDimitry Andric MinVectorRegisterBitWidth = 128; 296e837bb5cSDimitry Andric break; 2972a66634dSDimitry Andric case Ampere1: 298bdd1243dSDimitry Andric case Ampere1A: 299*4c2d3b02SDimitry Andric case Ampere1B: 3002a66634dSDimitry Andric CacheLineSize = 64; 30106c3fb27SDimitry Andric PrefFunctionAlignment = Align(64); 30206c3fb27SDimitry Andric PrefLoopAlignment = Align(64); 3032a66634dSDimitry Andric MaxInterleaveFactor = 4; 3042a66634dSDimitry Andric break; 3050b57cec5SDimitry Andric } 3065f757f3fSDimitry Andric 3075f757f3fSDimitry Andric if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize) 3085f757f3fSDimitry Andric MinimumJumpTableEntries = AArch64MinimumJumpTableEntries; 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 311bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, 312bdd1243dSDimitry Andric StringRef TuneCPU, StringRef FS, 313fe6060f1SDimitry Andric const TargetMachine &TM, bool LittleEndian, 314fe6060f1SDimitry Andric unsigned MinSVEVectorSizeInBitsOverride, 315bdd1243dSDimitry Andric unsigned MaxSVEVectorSizeInBitsOverride, 31606c3fb27SDimitry Andric bool StreamingSVEMode, 3175f757f3fSDimitry Andric bool StreamingCompatibleSVEMode, 3185f757f3fSDimitry Andric bool HasMinSize) 319349cc55cSDimitry Andric : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), 3200b57cec5SDimitry Andric ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 321bdd1243dSDimitry Andric ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), 3220b57cec5SDimitry Andric CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 3235f757f3fSDimitry Andric IsLittle(LittleEndian), StreamingSVEMode(StreamingSVEMode), 32406c3fb27SDimitry Andric StreamingCompatibleSVEMode(StreamingCompatibleSVEMode), 325fe6060f1SDimitry Andric MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), 326fe6060f1SDimitry Andric MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), 3275f757f3fSDimitry Andric InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), 328349cc55cSDimitry Andric TLInfo(TM, *this) { 3290b57cec5SDimitry Andric if (AArch64::isX18ReservedByDefault(TT)) 3300b57cec5SDimitry Andric ReserveXRegister.set(18); 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 3335ffd83dbSDimitry Andric InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 3340b57cec5SDimitry Andric Legalizer.reset(new AArch64LegalizerInfo(*this)); 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 3390b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 3400b57cec5SDimitry Andric // TII/TRI as well? 3410b57cec5SDimitry Andric InstSelector.reset(createAArch64InstructionSelector( 3420b57cec5SDimitry Andric *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric RegBankInfo.reset(RBI); 345bdd1243dSDimitry Andric 346bdd1243dSDimitry Andric auto TRI = getRegisterInfo(); 347bdd1243dSDimitry Andric StringSet<> ReservedRegNames; 348bdd1243dSDimitry Andric ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end()); 349bdd1243dSDimitry Andric for (unsigned i = 0; i < 29; ++i) { 350bdd1243dSDimitry Andric if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i))) 351bdd1243dSDimitry Andric ReserveXRegisterForRA.set(i); 352bdd1243dSDimitry Andric } 353bdd1243dSDimitry Andric // X30 is named LR, so we can't use TRI->getName to check X30. 354bdd1243dSDimitry Andric if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR")) 355bdd1243dSDimitry Andric ReserveXRegisterForRA.set(30); 356bdd1243dSDimitry Andric // X29 is named FP, so we can't use TRI->getName to check X29. 357bdd1243dSDimitry Andric if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) 358bdd1243dSDimitry Andric ReserveXRegisterForRA.set(29); 3595f757f3fSDimitry Andric 3605f757f3fSDimitry Andric AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM)); 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const { 3640b57cec5SDimitry Andric return CallLoweringInfo.get(); 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3675ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 3685ffd83dbSDimitry Andric return InlineAsmLoweringInfo.get(); 3695ffd83dbSDimitry Andric } 3705ffd83dbSDimitry Andric 3718bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 3720b57cec5SDimitry Andric return InstSelector.get(); 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 3760b57cec5SDimitry Andric return Legalizer.get(); 3770b57cec5SDimitry Andric } 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 3800b57cec5SDimitry Andric return RegBankInfo.get(); 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric 3830b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be 3840b57cec5SDimitry Andric /// referenced for the current subtarget. 3858bcb0991SDimitry Andric unsigned 3860b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 3870b57cec5SDimitry Andric const TargetMachine &TM) const { 3880b57cec5SDimitry Andric // MachO large model always goes via a GOT, simply to get a single 8-byte 3890b57cec5SDimitry Andric // absolute relocation on all global addresses. 3900b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 3910b57cec5SDimitry Andric return AArch64II::MO_GOT; 3920b57cec5SDimitry Andric 39306c3fb27SDimitry Andric // All globals dynamically protected by MTE must have their address tags 39406c3fb27SDimitry Andric // synthesized. This is done by having the loader stash the tag in the GOT 39506c3fb27SDimitry Andric // entry. Force all tagged globals (even ones with internal linkage) through 39606c3fb27SDimitry Andric // the GOT. 39706c3fb27SDimitry Andric if (GV->isTagged()) 39806c3fb27SDimitry Andric return AArch64II::MO_GOT; 39906c3fb27SDimitry Andric 4000b57cec5SDimitry Andric if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) { 401bdd1243dSDimitry Andric if (GV->hasDLLImportStorageClass()) { 4020b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 403bdd1243dSDimitry Andric } 4040b57cec5SDimitry Andric if (getTargetTriple().isOSWindows()) 4050b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 4060b57cec5SDimitry Andric return AArch64II::MO_GOT; 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric // The small code model's direct accesses use ADRP, which cannot 4100b57cec5SDimitry Andric // necessarily produce the value 0 (if the code is above 4GB). 4110b57cec5SDimitry Andric // Same for the tiny code model, where we have a pc relative LDR. 4120b57cec5SDimitry Andric if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 4130b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 4140b57cec5SDimitry Andric return AArch64II::MO_GOT; 4150b57cec5SDimitry Andric 4168bcb0991SDimitry Andric // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 4178bcb0991SDimitry Andric // that their nominal addresses are tagged and outside of the code model. In 4188bcb0991SDimitry Andric // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 4198bcb0991SDimitry Andric // tag if necessary based on MO_TAGGED. 4208bcb0991SDimitry Andric if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 4218bcb0991SDimitry Andric return AArch64II::MO_NC | AArch64II::MO_TAGGED; 4228bcb0991SDimitry Andric 4230b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric 4268bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference( 4270b57cec5SDimitry Andric const GlobalValue *GV, const TargetMachine &TM) const { 4280b57cec5SDimitry Andric // MachO large model always goes via a GOT, because we don't have the 4290b57cec5SDimitry Andric // relocations available to do anything else.. 4300b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 4310b57cec5SDimitry Andric !GV->hasInternalLinkage()) 4320b57cec5SDimitry Andric return AArch64II::MO_GOT; 4330b57cec5SDimitry Andric 4340b57cec5SDimitry Andric // NonLazyBind goes via GOT unless we know it's available locally. 4350b57cec5SDimitry Andric auto *F = dyn_cast<Function>(GV); 4360b57cec5SDimitry Andric if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && 4370b57cec5SDimitry Andric !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 4380b57cec5SDimitry Andric return AArch64II::MO_GOT; 4390b57cec5SDimitry Andric 440bdd1243dSDimitry Andric if (getTargetTriple().isOSWindows()) { 4417a6dacacSDimitry Andric if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) { 4427a6dacacSDimitry Andric if (GV->hasDLLImportStorageClass()) { 4437a6dacacSDimitry Andric // On Arm64EC, if we're calling a symbol from the import table 4447a6dacacSDimitry Andric // directly, use MO_ARM64EC_CALLMANGLE. 4457a6dacacSDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT | 4467a6dacacSDimitry Andric AArch64II::MO_ARM64EC_CALLMANGLE; 4477a6dacacSDimitry Andric } 4487a6dacacSDimitry Andric if (GV->hasExternalLinkage()) { 4497a6dacacSDimitry Andric // If we're calling a symbol directly, use the mangled form in the 4507a6dacacSDimitry Andric // call instruction. 4517a6dacacSDimitry Andric return AArch64II::MO_ARM64EC_CALLMANGLE; 4527a6dacacSDimitry Andric } 453bdd1243dSDimitry Andric } 454bdd1243dSDimitry Andric 455480093f4SDimitry Andric // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 456480093f4SDimitry Andric return ClassifyGlobalReference(GV, TM); 457bdd1243dSDimitry Andric } 458480093f4SDimitry Andric 4590b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 4630b57cec5SDimitry Andric unsigned NumRegionInstrs) const { 4640b57cec5SDimitry Andric // LNT run (at least on Cyclone) showed reasonably significant gains for 4650b57cec5SDimitry Andric // bi-directional scheduling. 253.perlbmk. 4660b57cec5SDimitry Andric Policy.OnlyTopDown = false; 4670b57cec5SDimitry Andric Policy.OnlyBottomUp = false; 4680b57cec5SDimitry Andric // Enabling or Disabling the latency heuristic is a close call: It seems to 4690b57cec5SDimitry Andric // help nearly no benchmark on out-of-order architectures, on the other hand 4700b57cec5SDimitry Andric // it regresses register pressure on a few benchmarking. 4710b57cec5SDimitry Andric Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const { 4750b57cec5SDimitry Andric return EnableEarlyIfConvert; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 4790b57cec5SDimitry Andric if (!UseAddressTopByteIgnored) 4800b57cec5SDimitry Andric return false; 4810b57cec5SDimitry Andric 48281ad6265SDimitry Andric if (TargetTriple.isDriverKit()) 48381ad6265SDimitry Andric return true; 4840b57cec5SDimitry Andric if (TargetTriple.isiOS()) { 4850eae32dcSDimitry Andric return TargetTriple.getiOSVersion() >= VersionTuple(8); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric return false; 4890b57cec5SDimitry Andric } 4900b57cec5SDimitry Andric 4910b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint> 4920b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const { 4938bcb0991SDimitry Andric return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 4970b57cec5SDimitry Andric // We usually compute max call frame size after ISel. Do the computation now 4980b57cec5SDimitry Andric // if the .mir file didn't specify it. Note that this will probably give you 4990b57cec5SDimitry Andric // bogus values after PEI has eliminated the callframe setup/destroy pseudo 5000b57cec5SDimitry Andric // instructions, specify explicitly if you need it to be correct. 5010b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 5020b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed()) 5030b57cec5SDimitry Andric MFI.computeMaxCallFrameSize(MF); 5040b57cec5SDimitry Andric } 5055ffd83dbSDimitry Andric 506fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; } 507bdd1243dSDimitry Andric 5085f757f3fSDimitry Andric bool AArch64Subtarget::isStreamingCompatible() const { 5095f757f3fSDimitry Andric return StreamingCompatibleSVEMode || ForceStreamingCompatibleSVE; 5105f757f3fSDimitry Andric } 5115f757f3fSDimitry Andric 51206c3fb27SDimitry Andric bool AArch64Subtarget::isNeonAvailable() const { 5135f757f3fSDimitry Andric return hasNEON() && 5145f757f3fSDimitry Andric (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 5155f757f3fSDimitry Andric } 51606c3fb27SDimitry Andric 5175f757f3fSDimitry Andric bool AArch64Subtarget::isSVEAvailable() const { 5185f757f3fSDimitry Andric return hasSVE() && 5195f757f3fSDimitry Andric (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 5205f757f3fSDimitry Andric } 52106c3fb27SDimitry Andric 5225f757f3fSDimitry Andric // If return address signing is enabled, tail calls are emitted as follows: 5235f757f3fSDimitry Andric // 5245f757f3fSDimitry Andric // ``` 5255f757f3fSDimitry Andric // <authenticate LR> 5265f757f3fSDimitry Andric // <check LR> 5275f757f3fSDimitry Andric // TCRETURN ; the callee may sign and spill the LR in its prologue 5285f757f3fSDimitry Andric // ``` 5295f757f3fSDimitry Andric // 5305f757f3fSDimitry Andric // LR may require explicit checking because if FEAT_FPAC is not implemented 5315f757f3fSDimitry Andric // and LR was tampered with, then `<authenticate LR>` will not generate an 5325f757f3fSDimitry Andric // exception on its own. Later, if the callee spills the signed LR value and 5335f757f3fSDimitry Andric // neither FEAT_PAuth2 nor FEAT_EPAC are implemented, the valid PAC replaces 5345f757f3fSDimitry Andric // the higher bits of LR thus hiding the authentication failure. 5355f757f3fSDimitry Andric AArch64PAuth::AuthCheckMethod 5365f757f3fSDimitry Andric AArch64Subtarget::getAuthenticatedLRCheckMethod() const { 5375f757f3fSDimitry Andric if (AuthenticatedLRCheckMethod.getNumOccurrences()) 5385f757f3fSDimitry Andric return AuthenticatedLRCheckMethod; 5395f757f3fSDimitry Andric 5405f757f3fSDimitry Andric // At now, use None by default because checks may introduce an unexpected 5415f757f3fSDimitry Andric // performance regression or incompatibility with execute-only mappings. 5425f757f3fSDimitry Andric return AArch64PAuth::AuthCheckMethod::None; 543bdd1243dSDimitry Andric } 544