10b57cec5SDimitry Andric //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the AArch64 specific subclass of TargetSubtarget. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "AArch64Subtarget.h" 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AArch64.h" 160b57cec5SDimitry Andric #include "AArch64InstrInfo.h" 170b57cec5SDimitry Andric #include "AArch64PBQPRegAlloc.h" 180b57cec5SDimitry Andric #include "AArch64TargetMachine.h" 195ffd83dbSDimitry Andric #include "GISel/AArch64CallLowering.h" 205ffd83dbSDimitry Andric #include "GISel/AArch64LegalizerInfo.h" 215ffd83dbSDimitry Andric #include "GISel/AArch64RegisterBankInfo.h" 220b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 240b57cec5SDimitry Andric #include "llvm/CodeGen/MachineScheduler.h" 250b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 260b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h" 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric using namespace llvm; 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-subtarget" 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 330b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 340b57cec5SDimitry Andric #include "AArch64GenSubtargetInfo.inc" 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric static cl::opt<bool> 370b57cec5SDimitry Andric EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 380b57cec5SDimitry Andric "converter pass"), cl::init(true), cl::Hidden); 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric // If OS supports TBI, use this flag to enable it. 410b57cec5SDimitry Andric static cl::opt<bool> 420b57cec5SDimitry Andric UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 430b57cec5SDimitry Andric "an address is ignored"), cl::init(false), cl::Hidden); 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric static cl::opt<bool> 460b57cec5SDimitry Andric UseNonLazyBind("aarch64-enable-nonlazybind", 470b57cec5SDimitry Andric cl::desc("Call nonlazybind functions via direct GOT load"), 480b57cec5SDimitry Andric cl::init(false), cl::Hidden); 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric static cl::opt<unsigned> SVEVectorBitsMax( 515ffd83dbSDimitry Andric "aarch64-sve-vector-bits-max", 525ffd83dbSDimitry Andric cl::desc("Assume SVE vector registers are at most this big, " 535ffd83dbSDimitry Andric "with zero meaning no maximum size is assumed."), 545ffd83dbSDimitry Andric cl::init(0), cl::Hidden); 555ffd83dbSDimitry Andric 565ffd83dbSDimitry Andric static cl::opt<unsigned> SVEVectorBitsMin( 575ffd83dbSDimitry Andric "aarch64-sve-vector-bits-min", 585ffd83dbSDimitry Andric cl::desc("Assume SVE vector registers are at least this big, " 595ffd83dbSDimitry Andric "with zero meaning no minimum size is assumed."), 605ffd83dbSDimitry Andric cl::init(0), cl::Hidden); 615ffd83dbSDimitry Andric 620b57cec5SDimitry Andric AArch64Subtarget & 630b57cec5SDimitry Andric AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, 640b57cec5SDimitry Andric StringRef CPUString) { 650b57cec5SDimitry Andric // Determine default and user-specified characteristics 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric if (CPUString.empty()) 680b57cec5SDimitry Andric CPUString = "generic"; 690b57cec5SDimitry Andric 70*e8d8bef9SDimitry Andric ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); 710b57cec5SDimitry Andric initializeProperties(); 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric return *this; 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric void AArch64Subtarget::initializeProperties() { 770b57cec5SDimitry Andric // Initialize CPU specific properties. We should add a tablegen feature for 780b57cec5SDimitry Andric // this in the future so we can specify it together with the subtarget 790b57cec5SDimitry Andric // features. 800b57cec5SDimitry Andric switch (ARMProcFamily) { 810b57cec5SDimitry Andric case Others: 820b57cec5SDimitry Andric break; 835ffd83dbSDimitry Andric case Carmel: 845ffd83dbSDimitry Andric CacheLineSize = 64; 855ffd83dbSDimitry Andric break; 860b57cec5SDimitry Andric case CortexA35: 870b57cec5SDimitry Andric break; 880b57cec5SDimitry Andric case CortexA53: 898bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 900b57cec5SDimitry Andric break; 910b57cec5SDimitry Andric case CortexA55: 920b57cec5SDimitry Andric break; 930b57cec5SDimitry Andric case CortexA57: 940b57cec5SDimitry Andric MaxInterleaveFactor = 4; 958bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 968bcb0991SDimitry Andric break; 978bcb0991SDimitry Andric case CortexA65: 988bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 990b57cec5SDimitry Andric break; 1000b57cec5SDimitry Andric case CortexA72: 1010b57cec5SDimitry Andric case CortexA73: 1020b57cec5SDimitry Andric case CortexA75: 1030b57cec5SDimitry Andric case CortexA76: 1045ffd83dbSDimitry Andric case CortexA77: 1055ffd83dbSDimitry Andric case CortexA78: 106*e8d8bef9SDimitry Andric case CortexA78C: 107*e8d8bef9SDimitry Andric case CortexR82: 1085ffd83dbSDimitry Andric case CortexX1: 1098bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 1100b57cec5SDimitry Andric break; 1115ffd83dbSDimitry Andric case A64FX: 1125ffd83dbSDimitry Andric CacheLineSize = 256; 113*e8d8bef9SDimitry Andric PrefFunctionLogAlignment = 3; 114*e8d8bef9SDimitry Andric PrefLoopLogAlignment = 2; 115*e8d8bef9SDimitry Andric MaxInterleaveFactor = 4; 116*e8d8bef9SDimitry Andric PrefetchDistance = 128; 117*e8d8bef9SDimitry Andric MinPrefetchStride = 1024; 118*e8d8bef9SDimitry Andric MaxPrefetchIterationsAhead = 4; 1195ffd83dbSDimitry Andric break; 120480093f4SDimitry Andric case AppleA7: 121480093f4SDimitry Andric case AppleA10: 122480093f4SDimitry Andric case AppleA11: 123480093f4SDimitry Andric case AppleA12: 124480093f4SDimitry Andric case AppleA13: 125*e8d8bef9SDimitry Andric case AppleA14: 1260b57cec5SDimitry Andric CacheLineSize = 64; 1270b57cec5SDimitry Andric PrefetchDistance = 280; 1280b57cec5SDimitry Andric MinPrefetchStride = 2048; 1290b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 3; 1300b57cec5SDimitry Andric break; 1310b57cec5SDimitry Andric case ExynosM3: 1320b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1330b57cec5SDimitry Andric MaxJumpTableSize = 20; 1348bcb0991SDimitry Andric PrefFunctionLogAlignment = 5; 1358bcb0991SDimitry Andric PrefLoopLogAlignment = 4; 1360b57cec5SDimitry Andric break; 1370b57cec5SDimitry Andric case Falkor: 1380b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1390b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1400b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1410b57cec5SDimitry Andric CacheLineSize = 128; 1420b57cec5SDimitry Andric PrefetchDistance = 820; 1430b57cec5SDimitry Andric MinPrefetchStride = 2048; 1440b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 8; 1450b57cec5SDimitry Andric break; 1460b57cec5SDimitry Andric case Kryo: 1470b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1480b57cec5SDimitry Andric VectorInsertExtractBaseCost = 2; 1490b57cec5SDimitry Andric CacheLineSize = 128; 1500b57cec5SDimitry Andric PrefetchDistance = 740; 1510b57cec5SDimitry Andric MinPrefetchStride = 1024; 1520b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 11; 1530b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1540b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1550b57cec5SDimitry Andric break; 1568bcb0991SDimitry Andric case NeoverseE1: 1578bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 1588bcb0991SDimitry Andric break; 1598bcb0991SDimitry Andric case NeoverseN1: 160*e8d8bef9SDimitry Andric case NeoverseN2: 161*e8d8bef9SDimitry Andric case NeoverseV1: 1628bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 1638bcb0991SDimitry Andric break; 1640b57cec5SDimitry Andric case Saphira: 1650b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1660b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1670b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1680b57cec5SDimitry Andric break; 1690b57cec5SDimitry Andric case ThunderX2T99: 1700b57cec5SDimitry Andric CacheLineSize = 64; 1718bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 1728bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 1730b57cec5SDimitry Andric MaxInterleaveFactor = 4; 1740b57cec5SDimitry Andric PrefetchDistance = 128; 1750b57cec5SDimitry Andric MinPrefetchStride = 1024; 1760b57cec5SDimitry Andric MaxPrefetchIterationsAhead = 4; 1770b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1780b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1790b57cec5SDimitry Andric break; 1800b57cec5SDimitry Andric case ThunderX: 1810b57cec5SDimitry Andric case ThunderXT88: 1820b57cec5SDimitry Andric case ThunderXT81: 1830b57cec5SDimitry Andric case ThunderXT83: 1840b57cec5SDimitry Andric CacheLineSize = 128; 1858bcb0991SDimitry Andric PrefFunctionLogAlignment = 3; 1868bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 1870b57cec5SDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 1880b57cec5SDimitry Andric MinVectorRegisterBitWidth = 128; 1890b57cec5SDimitry Andric break; 1900b57cec5SDimitry Andric case TSV110: 1910b57cec5SDimitry Andric CacheLineSize = 64; 1928bcb0991SDimitry Andric PrefFunctionLogAlignment = 4; 1938bcb0991SDimitry Andric PrefLoopLogAlignment = 2; 1940b57cec5SDimitry Andric break; 195e837bb5cSDimitry Andric case ThunderX3T110: 196e837bb5cSDimitry Andric CacheLineSize = 64; 197e837bb5cSDimitry Andric PrefFunctionLogAlignment = 4; 198e837bb5cSDimitry Andric PrefLoopLogAlignment = 2; 199e837bb5cSDimitry Andric MaxInterleaveFactor = 4; 200e837bb5cSDimitry Andric PrefetchDistance = 128; 201e837bb5cSDimitry Andric MinPrefetchStride = 1024; 202e837bb5cSDimitry Andric MaxPrefetchIterationsAhead = 4; 203e837bb5cSDimitry Andric // FIXME: remove this to enable 64-bit SLP if performance looks good. 204e837bb5cSDimitry Andric MinVectorRegisterBitWidth = 128; 205e837bb5cSDimitry Andric break; 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, 2100b57cec5SDimitry Andric const std::string &FS, 2110b57cec5SDimitry Andric const TargetMachine &TM, bool LittleEndian) 212*e8d8bef9SDimitry Andric : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), 2130b57cec5SDimitry Andric ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), 2140b57cec5SDimitry Andric CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), 2150b57cec5SDimitry Andric IsLittle(LittleEndian), 2160b57cec5SDimitry Andric TargetTriple(TT), FrameLowering(), 2170b57cec5SDimitry Andric InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), 2180b57cec5SDimitry Andric TLInfo(TM, *this) { 2190b57cec5SDimitry Andric if (AArch64::isX18ReservedByDefault(TT)) 2200b57cec5SDimitry Andric ReserveXRegister.set(18); 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); 2235ffd83dbSDimitry Andric InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); 2240b57cec5SDimitry Andric Legalizer.reset(new AArch64LegalizerInfo(*this)); 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 2290b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 2300b57cec5SDimitry Andric // TII/TRI as well? 2310b57cec5SDimitry Andric InstSelector.reset(createAArch64InstructionSelector( 2320b57cec5SDimitry Andric *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric RegBankInfo.reset(RBI); 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric const CallLowering *AArch64Subtarget::getCallLowering() const { 2380b57cec5SDimitry Andric return CallLoweringInfo.get(); 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2415ffd83dbSDimitry Andric const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const { 2425ffd83dbSDimitry Andric return InlineAsmLoweringInfo.get(); 2435ffd83dbSDimitry Andric } 2445ffd83dbSDimitry Andric 2458bcb0991SDimitry Andric InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 2460b57cec5SDimitry Andric return InstSelector.get(); 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 2500b57cec5SDimitry Andric return Legalizer.get(); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 2540b57cec5SDimitry Andric return RegBankInfo.get(); 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric /// Find the target operand flags that describe how a global value should be 2580b57cec5SDimitry Andric /// referenced for the current subtarget. 2598bcb0991SDimitry Andric unsigned 2600b57cec5SDimitry Andric AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 2610b57cec5SDimitry Andric const TargetMachine &TM) const { 2620b57cec5SDimitry Andric // MachO large model always goes via a GOT, simply to get a single 8-byte 2630b57cec5SDimitry Andric // absolute relocation on all global addresses. 2640b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 2650b57cec5SDimitry Andric return AArch64II::MO_GOT; 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) { 2680b57cec5SDimitry Andric if (GV->hasDLLImportStorageClass()) 2690b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT; 2700b57cec5SDimitry Andric if (getTargetTriple().isOSWindows()) 2710b57cec5SDimitry Andric return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB; 2720b57cec5SDimitry Andric return AArch64II::MO_GOT; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric // The small code model's direct accesses use ADRP, which cannot 2760b57cec5SDimitry Andric // necessarily produce the value 0 (if the code is above 4GB). 2770b57cec5SDimitry Andric // Same for the tiny code model, where we have a pc relative LDR. 2780b57cec5SDimitry Andric if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) && 2790b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 2800b57cec5SDimitry Andric return AArch64II::MO_GOT; 2810b57cec5SDimitry Andric 2828bcb0991SDimitry Andric // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate 2838bcb0991SDimitry Andric // that their nominal addresses are tagged and outside of the code model. In 2848bcb0991SDimitry Andric // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the 2858bcb0991SDimitry Andric // tag if necessary based on MO_TAGGED. 2868bcb0991SDimitry Andric if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType())) 2878bcb0991SDimitry Andric return AArch64II::MO_NC | AArch64II::MO_TAGGED; 2888bcb0991SDimitry Andric 2890b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 2900b57cec5SDimitry Andric } 2910b57cec5SDimitry Andric 2928bcb0991SDimitry Andric unsigned AArch64Subtarget::classifyGlobalFunctionReference( 2930b57cec5SDimitry Andric const GlobalValue *GV, const TargetMachine &TM) const { 2940b57cec5SDimitry Andric // MachO large model always goes via a GOT, because we don't have the 2950b57cec5SDimitry Andric // relocations available to do anything else.. 2960b57cec5SDimitry Andric if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 2970b57cec5SDimitry Andric !GV->hasInternalLinkage()) 2980b57cec5SDimitry Andric return AArch64II::MO_GOT; 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric // NonLazyBind goes via GOT unless we know it's available locally. 3010b57cec5SDimitry Andric auto *F = dyn_cast<Function>(GV); 3020b57cec5SDimitry Andric if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && 3030b57cec5SDimitry Andric !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 3040b57cec5SDimitry Andric return AArch64II::MO_GOT; 3050b57cec5SDimitry Andric 306480093f4SDimitry Andric // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB. 307480093f4SDimitry Andric if (getTargetTriple().isOSWindows()) 308480093f4SDimitry Andric return ClassifyGlobalReference(GV, TM); 309480093f4SDimitry Andric 3100b57cec5SDimitry Andric return AArch64II::MO_NO_FLAG; 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 3140b57cec5SDimitry Andric unsigned NumRegionInstrs) const { 3150b57cec5SDimitry Andric // LNT run (at least on Cyclone) showed reasonably significant gains for 3160b57cec5SDimitry Andric // bi-directional scheduling. 253.perlbmk. 3170b57cec5SDimitry Andric Policy.OnlyTopDown = false; 3180b57cec5SDimitry Andric Policy.OnlyBottomUp = false; 3190b57cec5SDimitry Andric // Enabling or Disabling the latency heuristic is a close call: It seems to 3200b57cec5SDimitry Andric // help nearly no benchmark on out-of-order architectures, on the other hand 3210b57cec5SDimitry Andric // it regresses register pressure on a few benchmarking. 3220b57cec5SDimitry Andric Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric bool AArch64Subtarget::enableEarlyIfConversion() const { 3260b57cec5SDimitry Andric return EnableEarlyIfConvert; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 3300b57cec5SDimitry Andric if (!UseAddressTopByteIgnored) 3310b57cec5SDimitry Andric return false; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric if (TargetTriple.isiOS()) { 3340b57cec5SDimitry Andric unsigned Major, Minor, Micro; 3350b57cec5SDimitry Andric TargetTriple.getiOSVersion(Major, Minor, Micro); 3360b57cec5SDimitry Andric return Major >= 8; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric return false; 3400b57cec5SDimitry Andric } 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric std::unique_ptr<PBQPRAConstraint> 3430b57cec5SDimitry Andric AArch64Subtarget::getCustomPBQPConstraints() const { 3448bcb0991SDimitry Andric return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr; 3450b57cec5SDimitry Andric } 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const { 3480b57cec5SDimitry Andric // We usually compute max call frame size after ISel. Do the computation now 3490b57cec5SDimitry Andric // if the .mir file didn't specify it. Note that this will probably give you 3500b57cec5SDimitry Andric // bogus values after PEI has eliminated the callframe setup/destroy pseudo 3510b57cec5SDimitry Andric // instructions, specify explicitly if you need it to be correct. 3520b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 3530b57cec5SDimitry Andric if (!MFI.isMaxCallFrameSizeComputed()) 3540b57cec5SDimitry Andric MFI.computeMaxCallFrameSize(MF); 3550b57cec5SDimitry Andric } 3565ffd83dbSDimitry Andric 3575ffd83dbSDimitry Andric unsigned AArch64Subtarget::getMaxSVEVectorSizeInBits() const { 3585ffd83dbSDimitry Andric assert(HasSVE && "Tried to get SVE vector length without SVE support!"); 3595ffd83dbSDimitry Andric assert(SVEVectorBitsMax % 128 == 0 && 3605ffd83dbSDimitry Andric "SVE requires vector length in multiples of 128!"); 3615ffd83dbSDimitry Andric assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) && 3625ffd83dbSDimitry Andric "Minimum SVE vector size should not be larger than its maximum!"); 3635ffd83dbSDimitry Andric if (SVEVectorBitsMax == 0) 3645ffd83dbSDimitry Andric return 0; 3655ffd83dbSDimitry Andric return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; 3665ffd83dbSDimitry Andric } 3675ffd83dbSDimitry Andric 3685ffd83dbSDimitry Andric unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const { 3695ffd83dbSDimitry Andric assert(HasSVE && "Tried to get SVE vector length without SVE support!"); 3705ffd83dbSDimitry Andric assert(SVEVectorBitsMin % 128 == 0 && 3715ffd83dbSDimitry Andric "SVE requires vector length in multiples of 128!"); 3725ffd83dbSDimitry Andric assert((SVEVectorBitsMax >= SVEVectorBitsMin || SVEVectorBitsMax == 0) && 3735ffd83dbSDimitry Andric "Minimum SVE vector size should not be larger than its maximum!"); 3745ffd83dbSDimitry Andric if (SVEVectorBitsMax == 0) 3755ffd83dbSDimitry Andric return (SVEVectorBitsMin / 128) * 128; 3765ffd83dbSDimitry Andric return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128; 3775ffd83dbSDimitry Andric } 378*e8d8bef9SDimitry Andric 379*e8d8bef9SDimitry Andric bool AArch64Subtarget::useSVEForFixedLengthVectors() const { 380*e8d8bef9SDimitry Andric // Prefer NEON unless larger SVE registers are available. 381*e8d8bef9SDimitry Andric return hasSVE() && getMinSVEVectorSizeInBits() >= 256; 382*e8d8bef9SDimitry Andric } 383