1*0b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include "AArch64Subtarget.h" 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "AArch64.h" 16*0b57cec5SDimitry Andric #include "AArch64CallLowering.h" 17*0b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 18*0b57cec5SDimitry Andric #include "AArch64LegalizerInfo.h" 19*0b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h" 20*0b57cec5SDimitry Andric #include "AArch64RegisterBankInfo.h" 21*0b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 22*0b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 25*0b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 26*0b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h" 27*0b57cec5SDimitry Andric 28*0b57cec5SDimitry Andric using namespace llvm; 29*0b57cec5SDimitry Andric 30*0b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget" 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 33*0b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 34*0b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc" 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric static cl::opt<bool> 37*0b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 38*0b57cec5SDimitry Andric "converter pass"), cl::init(true), cl::Hidden); 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it. 41*0b57cec5SDimitry Andric static cl::opt<bool> 42*0b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 43*0b57cec5SDimitry Andric "an address is ignored"), cl::init(false), cl::Hidden); 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric static cl::opt<bool> 46*0b57cec5SDimitry Andric UseNonLazyBind("aarch64-enable-nonlazybind", 47*0b57cec5SDimitry Andric cl::desc("Call nonlazybind functions via direct GOT load"), 48*0b57cec5SDimitry Andric cl::init(false), cl::Hidden); 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric AArch64Subtarget & 51*0b57cec5SDimitry Andric AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, 52*0b57cec5SDimitry Andric StringRef CPUString) { 53*0b57cec5SDimitry Andric // Determine default and user-specified characteristics 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric if (CPUString.empty()) 56*0b57cec5SDimitry Andric CPUString = "generic"; 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric ParseSubtargetFeatures(CPUString, FS); 59*0b57cec5SDimitry Andric initializeProperties(); 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric return *this; 62*0b57cec5SDimitry Andric } 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() { 65*0b57cec5SDimitry Andric // Initialize CPU specific properties. We should add a tablegen feature for 66*0b57cec5SDimitry Andric // this in the future so we can specify it together with the subtarget 67*0b57cec5SDimitry Andric // features. 68*0b57cec5SDimitry Andric switch (ARMProcFamily) { 69*0b57cec5SDimitry Andric case Others: 70*0b57cec5SDimitry Andric break; 71*0b57cec5SDimitry Andric case CortexA35: 72*0b57cec5SDimitry Andric break; 73*0b57cec5SDimitry Andric case CortexA53: 74*0b57cec5SDimitry Andric PrefFunctionAlignment = 3; 75*0b57cec5SDimitry Andric break; 76*0b57cec5SDimitry Andric case CortexA55: 77*0b57cec5SDimitry Andric break; 78*0b57cec5SDimitry Andric case CortexA57: 79*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 80*0b57cec5SDimitry Andric PrefFunctionAlignment = 4; 81*0b57cec5SDimitry Andric break; 82*0b57cec5SDimitry Andric case CortexA72: 83*0b57cec5SDimitry Andric case CortexA73: 84*0b57cec5SDimitry Andric case CortexA75: 85*0b57cec5SDimitry Andric case CortexA76: 86*0b57cec5SDimitry Andric PrefFunctionAlignment = 4; 87*0b57cec5SDimitry Andric break; 88*0b57cec5SDimitry Andric case Cyclone: 89*0b57cec5SDimitry Andric CacheLineSize = 64; 90*0b57cec5SDimitry Andric PrefetchDistance = 280; 91*0b57cec5SDimitry Andric MinPrefetchStride = 2048; 92*0b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 3; 93*0b57cec5SDimitry Andric break; 94*0b57cec5SDimitry Andric case ExynosM1: 95*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 96*0b57cec5SDimitry Andric MaxJumpTableSize = 8; 97*0b57cec5SDimitry Andric PrefFunctionAlignment = 4; 98*0b57cec5SDimitry Andric PrefLoopAlignment = 3; 99*0b57cec5SDimitry Andric break; 100*0b57cec5SDimitry Andric case ExynosM3: 101*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 102*0b57cec5SDimitry Andric MaxJumpTableSize = 20; 103*0b57cec5SDimitry Andric PrefFunctionAlignment = 5; 104*0b57cec5SDimitry Andric PrefLoopAlignment = 4; 105*0b57cec5SDimitry Andric break; 106*0b57cec5SDimitry Andric case Falkor: 107*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 108*0b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 109*0b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 110*0b57cec5SDimitry Andric CacheLineSize = 128; 111*0b57cec5SDimitry Andric PrefetchDistance = 820; 112*0b57cec5SDimitry Andric MinPrefetchStride = 2048; 113*0b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 8; 114*0b57cec5SDimitry Andric break; 115*0b57cec5SDimitry Andric case Kryo: 116*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 117*0b57cec5SDimitry Andric VectorInsertExtractBaseCost = 2; 118*0b57cec5SDimitry Andric CacheLineSize = 128; 119*0b57cec5SDimitry Andric PrefetchDistance = 740; 120*0b57cec5SDimitry Andric MinPrefetchStride = 1024; 121*0b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 11; 122*0b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 123*0b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 124*0b57cec5SDimitry Andric break; 125*0b57cec5SDimitry Andric case Saphira: 126*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 127*0b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 128*0b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 129*0b57cec5SDimitry Andric break; 130*0b57cec5SDimitry Andric case ThunderX2T99: 131*0b57cec5SDimitry Andric CacheLineSize = 64; 132*0b57cec5SDimitry Andric PrefFunctionAlignment = 3; 133*0b57cec5SDimitry Andric PrefLoopAlignment = 2; 134*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 135*0b57cec5SDimitry Andric PrefetchDistance = 128; 136*0b57cec5SDimitry Andric MinPrefetchStride = 1024; 137*0b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 4; 138*0b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 139*0b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 140*0b57cec5SDimitry Andric break; 141*0b57cec5SDimitry Andric case ThunderX: 142*0b57cec5SDimitry Andric case ThunderXT88: 143*0b57cec5SDimitry Andric case ThunderXT81: 144*0b57cec5SDimitry Andric case ThunderXT83: 145*0b57cec5SDimitry Andric CacheLineSize = 128; 146*0b57cec5SDimitry Andric PrefFunctionAlignment = 3; 147*0b57cec5SDimitry Andric PrefLoopAlignment = 2; 148*0b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 149*0b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 150*0b57cec5SDimitry Andric break; 151*0b57cec5SDimitry Andric case TSV110: 152*0b57cec5SDimitry Andric CacheLineSize = 64; 153*0b57cec5SDimitry Andric PrefFunctionAlignment = 4; 154*0b57cec5SDimitry Andric PrefLoopAlignment = 2; 155*0b57cec5SDimitry Andric break; 156*0b57cec5SDimitry Andric } 157*0b57cec5SDimitry Andric } 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, 160*0b57cec5SDimitry Andric const std::string &FS, 161*0b57cec5SDimitry Andric const TargetMachine &TM, bool LittleEndian) 162*0b57cec5SDimitry Andric : AArch64GenSubtargetInfo(TT, CPU, FS), 163*0b57cec5SDimitry Andric ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 164*0b57cec5SDimitry Andric CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 165*0b57cec5SDimitry Andric IsLittle(LittleEndian), 166*0b57cec5SDimitry Andric TargetTriple(TT), FrameLowering(), 167*0b57cec5SDimitry Andric InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), 168*0b57cec5SDimitry Andric TLInfo(TM, *this) { 169*0b57cec5SDimitry Andric if (AArch64::isX18ReservedByDefault(TT)) 170*0b57cec5SDimitry Andric ReserveXRegister.set(18); 171*0b57cec5SDimitry Andric 172*0b57cec5SDimitry Andric CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 173*0b57cec5SDimitry Andric Legalizer.reset(new AArch64LegalizerInfo(*this)); 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andric auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 178*0b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 179*0b57cec5SDimitry Andric // TII/TRI as well? 180*0b57cec5SDimitry Andric InstSelector.reset(createAArch64InstructionSelector( 181*0b57cec5SDimitry Andric *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric RegBankInfo.reset(RBI); 184*0b57cec5SDimitry Andric } 185*0b57cec5SDimitry Andric 186*0b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const { 187*0b57cec5SDimitry Andric return CallLoweringInfo.get(); 188*0b57cec5SDimitry Andric } 189*0b57cec5SDimitry Andric 190*0b57cec5SDimitry Andric const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 191*0b57cec5SDimitry Andric return InstSelector.get(); 192*0b57cec5SDimitry Andric } 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 195*0b57cec5SDimitry Andric return Legalizer.get(); 196*0b57cec5SDimitry Andric } 197*0b57cec5SDimitry Andric 198*0b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 199*0b57cec5SDimitry Andric return RegBankInfo.get(); 200*0b57cec5SDimitry Andric } 201*0b57cec5SDimitry Andric 202*0b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be 203*0b57cec5SDimitry Andric /// referenced for the current subtarget. 204*0b57cec5SDimitry Andric unsigned char 205*0b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 206*0b57cec5SDimitry Andric const TargetMachine &TM) const { 207*0b57cec5SDimitry Andric // MachO large model always goes via a GOT, simply to get a single 8-byte 208*0b57cec5SDimitry Andric // absolute relocation on all global addresses. 209*0b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 210*0b57cec5SDimitry Andric return AArch64II::MO_GOT; 211*0b57cec5SDimitry Andric 212*0b57cec5SDimitry Andric if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) { 213*0b57cec5SDimitry Andric if (GV->hasDLLImportStorageClass()) 214*0b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 215*0b57cec5SDimitry Andric if (getTargetTriple().isOSWindows()) 216*0b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 217*0b57cec5SDimitry Andric return AArch64II::MO_GOT; 218*0b57cec5SDimitry Andric } 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric // The small code model's direct accesses use ADRP, which cannot 221*0b57cec5SDimitry Andric // necessarily produce the value 0 (if the code is above 4GB). 222*0b57cec5SDimitry Andric // Same for the tiny code model, where we have a pc relative LDR. 223*0b57cec5SDimitry Andric if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 224*0b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 225*0b57cec5SDimitry Andric return AArch64II::MO_GOT; 226*0b57cec5SDimitry Andric 227*0b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 228*0b57cec5SDimitry Andric } 229*0b57cec5SDimitry Andric 230*0b57cec5SDimitry Andric unsigned char AArch64Subtarget::classifyGlobalFunctionReference( 231*0b57cec5SDimitry Andric const GlobalValue *GV, const TargetMachine &TM) const { 232*0b57cec5SDimitry Andric // MachO large model always goes via a GOT, because we don't have the 233*0b57cec5SDimitry Andric // relocations available to do anything else.. 234*0b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 235*0b57cec5SDimitry Andric !GV->hasInternalLinkage()) 236*0b57cec5SDimitry Andric return AArch64II::MO_GOT; 237*0b57cec5SDimitry Andric 238*0b57cec5SDimitry Andric // NonLazyBind goes via GOT unless we know it's available locally. 239*0b57cec5SDimitry Andric auto *F = dyn_cast<Function>(GV); 240*0b57cec5SDimitry Andric if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && 241*0b57cec5SDimitry Andric !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 242*0b57cec5SDimitry Andric return AArch64II::MO_GOT; 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 245*0b57cec5SDimitry Andric } 246*0b57cec5SDimitry Andric 247*0b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 248*0b57cec5SDimitry Andric unsigned NumRegionInstrs) const { 249*0b57cec5SDimitry Andric // LNT run (at least on Cyclone) showed reasonably significant gains for 250*0b57cec5SDimitry Andric // bi-directional scheduling. 253.perlbmk. 251*0b57cec5SDimitry Andric Policy.OnlyTopDown = false; 252*0b57cec5SDimitry Andric Policy.OnlyBottomUp = false; 253*0b57cec5SDimitry Andric // Enabling or Disabling the latency heuristic is a close call: It seems to 254*0b57cec5SDimitry Andric // help nearly no benchmark on out-of-order architectures, on the other hand 255*0b57cec5SDimitry Andric // it regresses register pressure on a few benchmarking. 256*0b57cec5SDimitry Andric Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 257*0b57cec5SDimitry Andric } 258*0b57cec5SDimitry Andric 259*0b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const { 260*0b57cec5SDimitry Andric return EnableEarlyIfConvert; 261*0b57cec5SDimitry Andric } 262*0b57cec5SDimitry Andric 263*0b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 264*0b57cec5SDimitry Andric if (!UseAddressTopByteIgnored) 265*0b57cec5SDimitry Andric return false; 266*0b57cec5SDimitry Andric 267*0b57cec5SDimitry Andric if (TargetTriple.isiOS()) { 268*0b57cec5SDimitry Andric unsigned Major, Minor, Micro; 269*0b57cec5SDimitry Andric TargetTriple.getiOSVersion(Major, Minor, Micro); 270*0b57cec5SDimitry Andric return Major >= 8; 271*0b57cec5SDimitry Andric } 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric return false; 274*0b57cec5SDimitry Andric } 275*0b57cec5SDimitry Andric 276*0b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint> 277*0b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const { 278*0b57cec5SDimitry Andric return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr; 279*0b57cec5SDimitry Andric } 280*0b57cec5SDimitry Andric 281*0b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 282*0b57cec5SDimitry Andric // We usually compute max call frame size after ISel. Do the computation now 283*0b57cec5SDimitry Andric // if the .mir file didn't specify it. Note that this will probably give you 284*0b57cec5SDimitry Andric // bogus values after PEI has eliminated the callframe setup/destroy pseudo 285*0b57cec5SDimitry Andric // instructions, specify explicitly if you need it to be correct. 286*0b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 287*0b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed()) 288*0b57cec5SDimitry Andric MFI.computeMaxCallFrameSize(MF); 289*0b57cec5SDimitry Andric } 290