10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "AArch64Subtarget.h" 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AArch64.h" 160b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h" 180b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h" 205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h" 215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h" 220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 260b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 2704eeddc0SDimitry Andric #include "llvm/Support/AArch64TargetParser.h" 280b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h" 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget" 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 350b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 360b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc" 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric static cl::opt<bool> 390b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 400b57cec5SDimitry Andric "converter pass"), cl::init(true), cl::Hidden); 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it. 430b57cec5SDimitry Andric static cl::opt<bool> 440b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 450b57cec5SDimitry Andric "an address is ignored"), cl::init(false), cl::Hidden); 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric static cl::opt<bool> 480b57cec5SDimitry Andric UseNonLazyBind("aarch64-enable-nonlazybind", 490b57cec5SDimitry Andric cl::desc("Call nonlazybind functions via direct GOT load"), 500b57cec5SDimitry Andric cl::init(false), cl::Hidden); 510b57cec5SDimitry Andric 52fe6060f1SDimitry Andric static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true), 53fe6060f1SDimitry Andric cl::desc("Enable the use of AA during codegen.")); 545ffd83dbSDimitry Andric 5581ad6265SDimitry Andric static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost( 5681ad6265SDimitry Andric "aarch64-insert-extract-base-cost", 5781ad6265SDimitry Andric cl::desc("Base cost of vector insert/extract element"), cl::Hidden); 5881ad6265SDimitry Andric 59*bdd1243dSDimitry Andric // Reserve a list of X# registers, so they are unavailable for register 60*bdd1243dSDimitry Andric // allocator, but can still be used as ABI requests, such as passing arguments 61*bdd1243dSDimitry Andric // to function call. 62*bdd1243dSDimitry Andric static cl::list<std::string> 63*bdd1243dSDimitry Andric ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " 64*bdd1243dSDimitry Andric "registers, so they can't be used by register allocator. " 65*bdd1243dSDimitry Andric "Should only be used for testing register allocator."), 66*bdd1243dSDimitry Andric cl::CommaSeparated, cl::Hidden); 67*bdd1243dSDimitry Andric 68*bdd1243dSDimitry Andric static cl::opt<bool> 69*bdd1243dSDimitry Andric ForceStreamingCompatibleSVE("force-streaming-compatible-sve", 70*bdd1243dSDimitry Andric cl::init(false), cl::Hidden); 71*bdd1243dSDimitry Andric 7281ad6265SDimitry Andric unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { 7381ad6265SDimitry Andric if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) 7481ad6265SDimitry Andric return OverrideVectorInsertExtractBaseCost; 7581ad6265SDimitry Andric return VectorInsertExtractBaseCost; 7681ad6265SDimitry Andric } 7781ad6265SDimitry Andric 78349cc55cSDimitry Andric AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( 79349cc55cSDimitry Andric StringRef FS, StringRef CPUString, StringRef TuneCPUString) { 800b57cec5SDimitry Andric // Determine default and user-specified characteristics 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric if (CPUString.empty()) 830b57cec5SDimitry Andric CPUString = "generic"; 840b57cec5SDimitry Andric 85349cc55cSDimitry Andric if (TuneCPUString.empty()) 86349cc55cSDimitry Andric TuneCPUString = CPUString; 87349cc55cSDimitry Andric 88349cc55cSDimitry Andric ParseSubtargetFeatures(CPUString, TuneCPUString, FS); 890b57cec5SDimitry Andric initializeProperties(); 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric return *this; 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() { 950b57cec5SDimitry Andric // Initialize CPU specific properties. We should add a tablegen feature for 960b57cec5SDimitry Andric // this in the future so we can specify it together with the subtarget 970b57cec5SDimitry Andric // features. 980b57cec5SDimitry Andric switch (ARMProcFamily) { 990b57cec5SDimitry Andric case Others: 1000b57cec5SDimitry Andric break; 1015ffd83dbSDimitry Andric case Carmel: 1025ffd83dbSDimitry Andric CacheLineSize = 64; 1035ffd83dbSDimitry Andric break; 1040b57cec5SDimitry Andric case CortexA35: 1050b57cec5SDimitry Andric case CortexA53: 1060b57cec5SDimitry Andric case CortexA55: 107fe6060f1SDimitry Andric PrefFunctionLogAlignment = 4; 10881ad6265SDimitry Andric PrefLoopLogAlignment = 4; 10981ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1100b57cec5SDimitry Andric break; 1110b57cec5SDimitry Andric case CortexA57: 1120b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1138bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 11481ad6265SDimitry Andric PrefLoopLogAlignment = 4; 11581ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 1168bcb0991SDimitry Andric break; 1178bcb0991SDimitry Andric case CortexA65: 1188bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 1190b57cec5SDimitry Andric break; 1200b57cec5SDimitry Andric case CortexA72: 1210b57cec5SDimitry Andric case CortexA73: 1220b57cec5SDimitry Andric case CortexA75: 12381ad6265SDimitry Andric PrefFunctionLogAlignment = 4; 12481ad6265SDimitry Andric PrefLoopLogAlignment = 4; 12581ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 12681ad6265SDimitry Andric break; 1270b57cec5SDimitry Andric case CortexA76: 1285ffd83dbSDimitry Andric case CortexA77: 1295ffd83dbSDimitry Andric case CortexA78: 130e8d8bef9SDimitry Andric case CortexA78C: 131e8d8bef9SDimitry Andric case CortexR82: 1325ffd83dbSDimitry Andric case CortexX1: 1331fd87a68SDimitry Andric case CortexX1C: 1348bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 13581ad6265SDimitry Andric PrefLoopLogAlignment = 5; 13681ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 1370b57cec5SDimitry Andric break; 138349cc55cSDimitry Andric case CortexA510: 13981ad6265SDimitry Andric PrefFunctionLogAlignment = 4; 14081ad6265SDimitry Andric VScaleForTuning = 1; 14181ad6265SDimitry Andric PrefLoopLogAlignment = 4; 14281ad6265SDimitry Andric MaxBytesForLoopAlignment = 8; 14381ad6265SDimitry Andric break; 144349cc55cSDimitry Andric case CortexA710: 145*bdd1243dSDimitry Andric case CortexA715: 146349cc55cSDimitry Andric case CortexX2: 147*bdd1243dSDimitry Andric case CortexX3: 148349cc55cSDimitry Andric PrefFunctionLogAlignment = 4; 149349cc55cSDimitry Andric VScaleForTuning = 1; 15081ad6265SDimitry Andric PrefLoopLogAlignment = 5; 15181ad6265SDimitry Andric MaxBytesForLoopAlignment = 16; 152349cc55cSDimitry Andric break; 1535ffd83dbSDimitry Andric case A64FX: 1545ffd83dbSDimitry Andric CacheLineSize = 256; 155e8d8bef9SDimitry Andric PrefFunctionLogAlignment = 3; 156e8d8bef9SDimitry Andric PrefLoopLogAlignment = 2; 157e8d8bef9SDimitry Andric MaxInterleaveFactor = 4; 158e8d8bef9SDimitry Andric PrefetchDistance = 128; 159e8d8bef9SDimitry Andric MinPrefetchStride = 1024; 160e8d8bef9SDimitry Andric MaxPrefetchIterationsAhead = 4; 161349cc55cSDimitry Andric VScaleForTuning = 4; 1625ffd83dbSDimitry Andric break; 163480093f4SDimitry Andric case AppleA7: 164480093f4SDimitry Andric case AppleA10: 165480093f4SDimitry Andric case AppleA11: 166480093f4SDimitry Andric case AppleA12: 167480093f4SDimitry Andric case AppleA13: 168e8d8bef9SDimitry Andric case AppleA14: 169*bdd1243dSDimitry Andric case AppleA15: 170*bdd1243dSDimitry Andric case AppleA16: 1710b57cec5SDimitry Andric CacheLineSize = 64; 1720b57cec5SDimitry Andric PrefetchDistance = 280; 1730b57cec5SDimitry Andric MinPrefetchStride = 2048; 1740b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 3; 175*bdd1243dSDimitry Andric switch (ARMProcFamily) { 176*bdd1243dSDimitry Andric case AppleA14: 177*bdd1243dSDimitry Andric case AppleA15: 178*bdd1243dSDimitry Andric case AppleA16: 179*bdd1243dSDimitry Andric MaxInterleaveFactor = 4; 180*bdd1243dSDimitry Andric break; 181*bdd1243dSDimitry Andric default: 182*bdd1243dSDimitry Andric break; 183*bdd1243dSDimitry Andric } 1840b57cec5SDimitry Andric break; 1850b57cec5SDimitry Andric case ExynosM3: 1860b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1870b57cec5SDimitry Andric MaxJumpTableSize = 20; 1888bcb0991SDimitry Andric PrefFunctionLogAlignment = 5; 1898bcb0991SDimitry Andric PrefLoopLogAlignment = 4; 1900b57cec5SDimitry Andric break; 1910b57cec5SDimitry Andric case Falkor: 1920b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1930b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1940b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1950b57cec5SDimitry Andric CacheLineSize = 128; 1960b57cec5SDimitry Andric PrefetchDistance = 820; 1970b57cec5SDimitry Andric MinPrefetchStride = 2048; 1980b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 8; 1990b57cec5SDimitry Andric break; 2000b57cec5SDimitry Andric case Kryo: 2010b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2020b57cec5SDimitry Andric VectorInsertExtractBaseCost = 2; 2030b57cec5SDimitry Andric CacheLineSize = 128; 2040b57cec5SDimitry Andric PrefetchDistance = 740; 2050b57cec5SDimitry Andric MinPrefetchStride = 1024; 2060b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 11; 2070b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2080b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2090b57cec5SDimitry Andric break; 2108bcb0991SDimitry Andric case NeoverseE1: 2118bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 2128bcb0991SDimitry Andric break; 2138bcb0991SDimitry Andric case NeoverseN1: 214349cc55cSDimitry Andric PrefFunctionLogAlignment = 4; 21504eeddc0SDimitry Andric PrefLoopLogAlignment = 5; 21604eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 217349cc55cSDimitry Andric break; 218e8d8bef9SDimitry Andric case NeoverseN2: 219*bdd1243dSDimitry Andric case NeoverseV2: 220349cc55cSDimitry Andric PrefFunctionLogAlignment = 4; 22104eeddc0SDimitry Andric PrefLoopLogAlignment = 5; 22204eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 223349cc55cSDimitry Andric VScaleForTuning = 1; 224349cc55cSDimitry Andric break; 225e8d8bef9SDimitry Andric case NeoverseV1: 2268bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 22704eeddc0SDimitry Andric PrefLoopLogAlignment = 5; 22804eeddc0SDimitry Andric MaxBytesForLoopAlignment = 16; 229349cc55cSDimitry Andric VScaleForTuning = 2; 230349cc55cSDimitry Andric break; 231349cc55cSDimitry Andric case Neoverse512TVB: 232349cc55cSDimitry Andric PrefFunctionLogAlignment = 4; 233349cc55cSDimitry Andric VScaleForTuning = 1; 234349cc55cSDimitry Andric MaxInterleaveFactor = 4; 2358bcb0991SDimitry Andric break; 2360b57cec5SDimitry Andric case Saphira: 2370b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2380b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2390b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2400b57cec5SDimitry Andric break; 2410b57cec5SDimitry Andric case ThunderX2T99: 2420b57cec5SDimitry Andric CacheLineSize = 64; 2438bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 2448bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 2450b57cec5SDimitry Andric MaxInterleaveFactor = 4; 2460b57cec5SDimitry Andric PrefetchDistance = 128; 2470b57cec5SDimitry Andric MinPrefetchStride = 1024; 2480b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 4; 2490b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2500b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2510b57cec5SDimitry Andric break; 2520b57cec5SDimitry Andric case ThunderX: 2530b57cec5SDimitry Andric case ThunderXT88: 2540b57cec5SDimitry Andric case ThunderXT81: 2550b57cec5SDimitry Andric case ThunderXT83: 2560b57cec5SDimitry Andric CacheLineSize = 128; 2578bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 2588bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 2590b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 2600b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 2610b57cec5SDimitry Andric break; 2620b57cec5SDimitry Andric case TSV110: 2630b57cec5SDimitry Andric CacheLineSize = 64; 2648bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 2658bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 2660b57cec5SDimitry Andric break; 267e837bb5cSDimitry Andric case ThunderX3T110: 268e837bb5cSDimitry Andric CacheLineSize = 64; 269e837bb5cSDimitry Andric PrefFunctionLogAlignment = 4; 270e837bb5cSDimitry Andric PrefLoopLogAlignment = 2; 271e837bb5cSDimitry Andric MaxInterleaveFactor = 4; 272e837bb5cSDimitry Andric PrefetchDistance = 128; 273e837bb5cSDimitry Andric MinPrefetchStride = 1024; 274e837bb5cSDimitry Andric MaxPrefetchIterationsAhead = 4; 275e837bb5cSDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 276e837bb5cSDimitry Andric MinVectorRegisterBitWidth = 128; 277e837bb5cSDimitry Andric break; 2782a66634dSDimitry Andric case Ampere1: 279*bdd1243dSDimitry Andric case Ampere1A: 2802a66634dSDimitry Andric CacheLineSize = 64; 2812a66634dSDimitry Andric PrefFunctionLogAlignment = 6; 2822a66634dSDimitry Andric PrefLoopLogAlignment = 6; 2832a66634dSDimitry Andric MaxInterleaveFactor = 4; 2842a66634dSDimitry Andric break; 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric 288*bdd1243dSDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, 289*bdd1243dSDimitry Andric StringRef TuneCPU, StringRef FS, 290fe6060f1SDimitry Andric const TargetMachine &TM, bool LittleEndian, 291fe6060f1SDimitry Andric unsigned MinSVEVectorSizeInBitsOverride, 292*bdd1243dSDimitry Andric unsigned MaxSVEVectorSizeInBitsOverride, 293*bdd1243dSDimitry Andric bool StreamingSVEModeDisabled) 294349cc55cSDimitry Andric : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), 2950b57cec5SDimitry Andric ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 296*bdd1243dSDimitry Andric ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()), 2970b57cec5SDimitry Andric CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 2980b57cec5SDimitry Andric IsLittle(LittleEndian), 299*bdd1243dSDimitry Andric StreamingSVEModeDisabled(StreamingSVEModeDisabled), 300fe6060f1SDimitry Andric MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), 301fe6060f1SDimitry Andric MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), 30204eeddc0SDimitry Andric InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), 303349cc55cSDimitry Andric TLInfo(TM, *this) { 3040b57cec5SDimitry Andric if (AArch64::isX18ReservedByDefault(TT)) 3050b57cec5SDimitry Andric ReserveXRegister.set(18); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 3085ffd83dbSDimitry Andric InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 3090b57cec5SDimitry Andric Legalizer.reset(new AArch64LegalizerInfo(*this)); 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 3140b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 3150b57cec5SDimitry Andric // TII/TRI as well? 3160b57cec5SDimitry Andric InstSelector.reset(createAArch64InstructionSelector( 3170b57cec5SDimitry Andric *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric RegBankInfo.reset(RBI); 320*bdd1243dSDimitry Andric 321*bdd1243dSDimitry Andric auto TRI = getRegisterInfo(); 322*bdd1243dSDimitry Andric StringSet<> ReservedRegNames; 323*bdd1243dSDimitry Andric ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end()); 324*bdd1243dSDimitry Andric for (unsigned i = 0; i < 29; ++i) { 325*bdd1243dSDimitry Andric if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i))) 326*bdd1243dSDimitry Andric ReserveXRegisterForRA.set(i); 327*bdd1243dSDimitry Andric } 328*bdd1243dSDimitry Andric // X30 is named LR, so we can't use TRI->getName to check X30. 329*bdd1243dSDimitry Andric if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR")) 330*bdd1243dSDimitry Andric ReserveXRegisterForRA.set(30); 331*bdd1243dSDimitry Andric // X29 is named FP, so we can't use TRI->getName to check X29. 332*bdd1243dSDimitry Andric if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP")) 333*bdd1243dSDimitry Andric ReserveXRegisterForRA.set(29); 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const { 3370b57cec5SDimitry Andric return CallLoweringInfo.get(); 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3405ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 3415ffd83dbSDimitry Andric return InlineAsmLoweringInfo.get(); 3425ffd83dbSDimitry Andric } 3435ffd83dbSDimitry Andric 3448bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 3450b57cec5SDimitry Andric return InstSelector.get(); 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 3490b57cec5SDimitry Andric return Legalizer.get(); 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 3530b57cec5SDimitry Andric return RegBankInfo.get(); 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be 3570b57cec5SDimitry Andric /// referenced for the current subtarget. 3588bcb0991SDimitry Andric unsigned 3590b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 3600b57cec5SDimitry Andric const TargetMachine &TM) const { 3610b57cec5SDimitry Andric // MachO large model always goes via a GOT, simply to get a single 8-byte 3620b57cec5SDimitry Andric // absolute relocation on all global addresses. 3630b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 3640b57cec5SDimitry Andric return AArch64II::MO_GOT; 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) { 367*bdd1243dSDimitry Andric if (GV->hasDLLImportStorageClass()) { 368*bdd1243dSDimitry Andric if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy()) 369*bdd1243dSDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX; 3700b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 371*bdd1243dSDimitry Andric } 3720b57cec5SDimitry Andric if (getTargetTriple().isOSWindows()) 3730b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 3740b57cec5SDimitry Andric return AArch64II::MO_GOT; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric // The small code model's direct accesses use ADRP, which cannot 3780b57cec5SDimitry Andric // necessarily produce the value 0 (if the code is above 4GB). 3790b57cec5SDimitry Andric // Same for the tiny code model, where we have a pc relative LDR. 3800b57cec5SDimitry Andric if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 3810b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 3820b57cec5SDimitry Andric return AArch64II::MO_GOT; 3830b57cec5SDimitry Andric 3848bcb0991SDimitry Andric // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 3858bcb0991SDimitry Andric // that their nominal addresses are tagged and outside of the code model. In 3868bcb0991SDimitry Andric // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 3878bcb0991SDimitry Andric // tag if necessary based on MO_TAGGED. 3888bcb0991SDimitry Andric if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 3898bcb0991SDimitry Andric return AArch64II::MO_NC | AArch64II::MO_TAGGED; 3908bcb0991SDimitry Andric 3910b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3948bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference( 3950b57cec5SDimitry Andric const GlobalValue *GV, const TargetMachine &TM) const { 3960b57cec5SDimitry Andric // MachO large model always goes via a GOT, because we don't have the 3970b57cec5SDimitry Andric // relocations available to do anything else.. 3980b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 3990b57cec5SDimitry Andric !GV->hasInternalLinkage()) 4000b57cec5SDimitry Andric return AArch64II::MO_GOT; 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric // NonLazyBind goes via GOT unless we know it's available locally. 4030b57cec5SDimitry Andric auto *F = dyn_cast<Function>(GV); 4040b57cec5SDimitry Andric if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && 4050b57cec5SDimitry Andric !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 4060b57cec5SDimitry Andric return AArch64II::MO_GOT; 4070b57cec5SDimitry Andric 408*bdd1243dSDimitry Andric if (getTargetTriple().isOSWindows()) { 409*bdd1243dSDimitry Andric if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() && 410*bdd1243dSDimitry Andric GV->hasDLLImportStorageClass()) { 411*bdd1243dSDimitry Andric // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT, 412*bdd1243dSDimitry Andric // not MO_DLLIMPORTAUX. 413*bdd1243dSDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 414*bdd1243dSDimitry Andric } 415*bdd1243dSDimitry Andric 416480093f4SDimitry Andric // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 417480093f4SDimitry Andric return ClassifyGlobalReference(GV, TM); 418*bdd1243dSDimitry Andric } 419480093f4SDimitry Andric 4200b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 4210b57cec5SDimitry Andric } 4220b57cec5SDimitry Andric 4230b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 4240b57cec5SDimitry Andric unsigned NumRegionInstrs) const { 4250b57cec5SDimitry Andric // LNT run (at least on Cyclone) showed reasonably significant gains for 4260b57cec5SDimitry Andric // bi-directional scheduling. 253.perlbmk. 4270b57cec5SDimitry Andric Policy.OnlyTopDown = false; 4280b57cec5SDimitry Andric Policy.OnlyBottomUp = false; 4290b57cec5SDimitry Andric // Enabling or Disabling the latency heuristic is a close call: It seems to 4300b57cec5SDimitry Andric // help nearly no benchmark on out-of-order architectures, on the other hand 4310b57cec5SDimitry Andric // it regresses register pressure on a few benchmarking. 4320b57cec5SDimitry Andric Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const { 4360b57cec5SDimitry Andric return EnableEarlyIfConvert; 4370b57cec5SDimitry Andric } 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 4400b57cec5SDimitry Andric if (!UseAddressTopByteIgnored) 4410b57cec5SDimitry Andric return false; 4420b57cec5SDimitry Andric 44381ad6265SDimitry Andric if (TargetTriple.isDriverKit()) 44481ad6265SDimitry Andric return true; 4450b57cec5SDimitry Andric if (TargetTriple.isiOS()) { 4460eae32dcSDimitry Andric return TargetTriple.getiOSVersion() >= VersionTuple(8); 4470b57cec5SDimitry Andric } 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric return false; 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint> 4530b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const { 4548bcb0991SDimitry Andric return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 4580b57cec5SDimitry Andric // We usually compute max call frame size after ISel. Do the computation now 4590b57cec5SDimitry Andric // if the .mir file didn't specify it. Note that this will probably give you 4600b57cec5SDimitry Andric // bogus values after PEI has eliminated the callframe setup/destroy pseudo 4610b57cec5SDimitry Andric // instructions, specify explicitly if you need it to be correct. 4620b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 4630b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed()) 4640b57cec5SDimitry Andric MFI.computeMaxCallFrameSize(MF); 4650b57cec5SDimitry Andric } 4665ffd83dbSDimitry Andric 467fe6060f1SDimitry Andric bool AArch64Subtarget::useAA() const { return UseAA; } 468*bdd1243dSDimitry Andric 469*bdd1243dSDimitry Andric bool AArch64Subtarget::forceStreamingCompatibleSVE() const { 470*bdd1243dSDimitry Andric if (ForceStreamingCompatibleSVE) { 471*bdd1243dSDimitry Andric assert(hasSVEorSME() && "Expected SVE to be available"); 472*bdd1243dSDimitry Andric return hasSVEorSME(); 473*bdd1243dSDimitry Andric } 474*bdd1243dSDimitry Andric return false; 475*bdd1243dSDimitry Andric } 476