1*0b57cec5SDimitry Andric //===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This file implements the ARM specific subclass of TargetSubtargetInfo. 10*0b57cec5SDimitry Andric // 11*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12*0b57cec5SDimitry Andric 13*0b57cec5SDimitry Andric #include "ARM.h" 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "ARMCallLowering.h" 16*0b57cec5SDimitry Andric #include "ARMLegalizerInfo.h" 17*0b57cec5SDimitry Andric #include "ARMRegisterBankInfo.h" 18*0b57cec5SDimitry Andric #include "ARMSubtarget.h" 19*0b57cec5SDimitry Andric #include "ARMFrameLowering.h" 20*0b57cec5SDimitry Andric #include "ARMInstrInfo.h" 21*0b57cec5SDimitry Andric #include "ARMSubtarget.h" 22*0b57cec5SDimitry Andric #include "ARMTargetMachine.h" 23*0b57cec5SDimitry Andric #include "MCTargetDesc/ARMMCTargetDesc.h" 24*0b57cec5SDimitry Andric #include "Thumb1FrameLowering.h" 25*0b57cec5SDimitry Andric #include "Thumb1InstrInfo.h" 26*0b57cec5SDimitry Andric #include "Thumb2InstrInfo.h" 27*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 28*0b57cec5SDimitry Andric #include "llvm/ADT/Triple.h" 29*0b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 30*0b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 31*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 33*0b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 34*0b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 35*0b57cec5SDimitry Andric #include "llvm/MC/MCTargetOptions.h" 36*0b57cec5SDimitry Andric #include "llvm/Support/CodeGen.h" 37*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 38*0b57cec5SDimitry Andric #include "llvm/Support/TargetParser.h" 39*0b57cec5SDimitry Andric #include "llvm/Target/TargetOptions.h" 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric using namespace llvm; 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric #define DEBUG_TYPE "arm-subtarget" 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric #define GET_SUBTARGETINFO_TARGET_DESC 46*0b57cec5SDimitry Andric #define GET_SUBTARGETINFO_CTOR 47*0b57cec5SDimitry Andric #include "ARMGenSubtargetInfo.inc" 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric static cl::opt<bool> 50*0b57cec5SDimitry Andric UseFusedMulOps("arm-use-mulops", 51*0b57cec5SDimitry Andric cl::init(true), cl::Hidden); 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric enum ITMode { 54*0b57cec5SDimitry Andric DefaultIT, 55*0b57cec5SDimitry Andric RestrictedIT, 56*0b57cec5SDimitry Andric NoRestrictedIT 57*0b57cec5SDimitry Andric }; 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric static cl::opt<ITMode> 60*0b57cec5SDimitry Andric IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), 61*0b57cec5SDimitry Andric cl::ZeroOrMore, 62*0b57cec5SDimitry Andric cl::values(clEnumValN(DefaultIT, "arm-default-it", 63*0b57cec5SDimitry Andric "Generate IT block based on arch"), 64*0b57cec5SDimitry Andric clEnumValN(RestrictedIT, "arm-restrict-it", 65*0b57cec5SDimitry Andric "Disallow deprecated IT based on ARMv8"), 66*0b57cec5SDimitry Andric clEnumValN(NoRestrictedIT, "arm-no-restrict-it", 67*0b57cec5SDimitry Andric "Allow IT blocks based on ARMv7"))); 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric /// ForceFastISel - Use the fast-isel, even for subtargets where it is not 70*0b57cec5SDimitry Andric /// currently supported (for testing only). 71*0b57cec5SDimitry Andric static cl::opt<bool> 72*0b57cec5SDimitry Andric ForceFastISel("arm-force-fast-isel", 73*0b57cec5SDimitry Andric cl::init(false), cl::Hidden); 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric /// initializeSubtargetDependencies - Initializes using a CPU and feature string 76*0b57cec5SDimitry Andric /// so that we can use initializer lists for subtarget initialization. 77*0b57cec5SDimitry Andric ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, 78*0b57cec5SDimitry Andric StringRef FS) { 79*0b57cec5SDimitry Andric initializeEnvironment(); 80*0b57cec5SDimitry Andric initSubtargetFeatures(CPU, FS); 81*0b57cec5SDimitry Andric return *this; 82*0b57cec5SDimitry Andric } 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, 85*0b57cec5SDimitry Andric StringRef FS) { 86*0b57cec5SDimitry Andric ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); 87*0b57cec5SDimitry Andric if (STI.isThumb1Only()) 88*0b57cec5SDimitry Andric return (ARMFrameLowering *)new Thumb1FrameLowering(STI); 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric return new ARMFrameLowering(STI); 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, 94*0b57cec5SDimitry Andric const std::string &FS, 95*0b57cec5SDimitry Andric const ARMBaseTargetMachine &TM, bool IsLittle, 96*0b57cec5SDimitry Andric bool MinSize) 97*0b57cec5SDimitry Andric : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), 98*0b57cec5SDimitry Andric CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), 99*0b57cec5SDimitry Andric TargetTriple(TT), Options(TM.Options), TM(TM), 100*0b57cec5SDimitry Andric FrameLowering(initializeFrameLowering(CPU, FS)), 101*0b57cec5SDimitry Andric // At this point initializeSubtargetDependencies has been called so 102*0b57cec5SDimitry Andric // we can query directly. 103*0b57cec5SDimitry Andric InstrInfo(isThumb1Only() 104*0b57cec5SDimitry Andric ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) 105*0b57cec5SDimitry Andric : !isThumb() 106*0b57cec5SDimitry Andric ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) 107*0b57cec5SDimitry Andric : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), 108*0b57cec5SDimitry Andric TLInfo(TM, *this) { 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); 111*0b57cec5SDimitry Andric Legalizer.reset(new ARMLegalizerInfo(*this)); 112*0b57cec5SDimitry Andric 113*0b57cec5SDimitry Andric auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric // FIXME: At this point, we can't rely on Subtarget having RBI. 116*0b57cec5SDimitry Andric // It's awkward to mix passing RBI and the Subtarget; should we pass 117*0b57cec5SDimitry Andric // TII/TRI as well? 118*0b57cec5SDimitry Andric InstSelector.reset(createARMInstructionSelector( 119*0b57cec5SDimitry Andric *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI)); 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric RegBankInfo.reset(RBI); 122*0b57cec5SDimitry Andric } 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric const CallLowering *ARMSubtarget::getCallLowering() const { 125*0b57cec5SDimitry Andric return CallLoweringInfo.get(); 126*0b57cec5SDimitry Andric } 127*0b57cec5SDimitry Andric 128*0b57cec5SDimitry Andric const InstructionSelector *ARMSubtarget::getInstructionSelector() const { 129*0b57cec5SDimitry Andric return InstSelector.get(); 130*0b57cec5SDimitry Andric } 131*0b57cec5SDimitry Andric 132*0b57cec5SDimitry Andric const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const { 133*0b57cec5SDimitry Andric return Legalizer.get(); 134*0b57cec5SDimitry Andric } 135*0b57cec5SDimitry Andric 136*0b57cec5SDimitry Andric const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const { 137*0b57cec5SDimitry Andric return RegBankInfo.get(); 138*0b57cec5SDimitry Andric } 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric bool ARMSubtarget::isXRaySupported() const { 141*0b57cec5SDimitry Andric // We don't currently suppport Thumb, but Windows requires Thumb. 142*0b57cec5SDimitry Andric return hasV6Ops() && hasARMOps() && !isTargetWindows(); 143*0b57cec5SDimitry Andric } 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry Andric void ARMSubtarget::initializeEnvironment() { 146*0b57cec5SDimitry Andric // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this 147*0b57cec5SDimitry Andric // directly from it, but we can try to make sure they're consistent when both 148*0b57cec5SDimitry Andric // available. 149*0b57cec5SDimitry Andric UseSjLjEH = (isTargetDarwin() && !isTargetWatchABI() && 150*0b57cec5SDimitry Andric Options.ExceptionModel == ExceptionHandling::None) || 151*0b57cec5SDimitry Andric Options.ExceptionModel == ExceptionHandling::SjLj; 152*0b57cec5SDimitry Andric assert((!TM.getMCAsmInfo() || 153*0b57cec5SDimitry Andric (TM.getMCAsmInfo()->getExceptionHandlingType() == 154*0b57cec5SDimitry Andric ExceptionHandling::SjLj) == UseSjLjEH) && 155*0b57cec5SDimitry Andric "inconsistent sjlj choice between CodeGen and MC"); 156*0b57cec5SDimitry Andric } 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { 159*0b57cec5SDimitry Andric if (CPUString.empty()) { 160*0b57cec5SDimitry Andric CPUString = "generic"; 161*0b57cec5SDimitry Andric 162*0b57cec5SDimitry Andric if (isTargetDarwin()) { 163*0b57cec5SDimitry Andric StringRef ArchName = TargetTriple.getArchName(); 164*0b57cec5SDimitry Andric ARM::ArchKind AK = ARM::parseArch(ArchName); 165*0b57cec5SDimitry Andric if (AK == ARM::ArchKind::ARMV7S) 166*0b57cec5SDimitry Andric // Default to the Swift CPU when targeting armv7s/thumbv7s. 167*0b57cec5SDimitry Andric CPUString = "swift"; 168*0b57cec5SDimitry Andric else if (AK == ARM::ArchKind::ARMV7K) 169*0b57cec5SDimitry Andric // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. 170*0b57cec5SDimitry Andric // ARMv7k does not use SjLj exception handling. 171*0b57cec5SDimitry Andric CPUString = "cortex-a7"; 172*0b57cec5SDimitry Andric } 173*0b57cec5SDimitry Andric } 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry Andric // Insert the architecture feature derived from the target triple into the 176*0b57cec5SDimitry Andric // feature string. This is important for setting features that are implied 177*0b57cec5SDimitry Andric // based on the architecture version. 178*0b57cec5SDimitry Andric std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString); 179*0b57cec5SDimitry Andric if (!FS.empty()) { 180*0b57cec5SDimitry Andric if (!ArchFS.empty()) 181*0b57cec5SDimitry Andric ArchFS = (Twine(ArchFS) + "," + FS).str(); 182*0b57cec5SDimitry Andric else 183*0b57cec5SDimitry Andric ArchFS = FS; 184*0b57cec5SDimitry Andric } 185*0b57cec5SDimitry Andric ParseSubtargetFeatures(CPUString, ArchFS); 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric // FIXME: This used enable V6T2 support implicitly for Thumb2 mode. 188*0b57cec5SDimitry Andric // Assert this for now to make the change obvious. 189*0b57cec5SDimitry Andric assert(hasV6T2Ops() || !hasThumb2()); 190*0b57cec5SDimitry Andric 191*0b57cec5SDimitry Andric // Execute only support requires movt support 192*0b57cec5SDimitry Andric if (genExecuteOnly()) { 193*0b57cec5SDimitry Andric NoMovt = false; 194*0b57cec5SDimitry Andric assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target"); 195*0b57cec5SDimitry Andric } 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric // Keep a pointer to static instruction cost data for the specified CPU. 198*0b57cec5SDimitry Andric SchedModel = getSchedModelForCPU(CPUString); 199*0b57cec5SDimitry Andric 200*0b57cec5SDimitry Andric // Initialize scheduling itinerary for the specified CPU. 201*0b57cec5SDimitry Andric InstrItins = getInstrItineraryForCPU(CPUString); 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric // FIXME: this is invalid for WindowsCE 204*0b57cec5SDimitry Andric if (isTargetWindows()) 205*0b57cec5SDimitry Andric NoARM = true; 206*0b57cec5SDimitry Andric 207*0b57cec5SDimitry Andric if (isAAPCS_ABI()) 208*0b57cec5SDimitry Andric stackAlignment = 8; 209*0b57cec5SDimitry Andric if (isTargetNaCl() || isAAPCS16_ABI()) 210*0b57cec5SDimitry Andric stackAlignment = 16; 211*0b57cec5SDimitry Andric 212*0b57cec5SDimitry Andric // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: 213*0b57cec5SDimitry Andric // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 214*0b57cec5SDimitry Andric // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 215*0b57cec5SDimitry Andric // support in the assembler and linker to be used. This would need to be 216*0b57cec5SDimitry Andric // fixed to fully support tail calls in Thumb1. 217*0b57cec5SDimitry Andric // 218*0b57cec5SDimitry Andric // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M 219*0b57cec5SDimitry Andric // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This 220*0b57cec5SDimitry Andric // means if we need to reload LR, it takes extra instructions, which outweighs 221*0b57cec5SDimitry Andric // the value of the tail call; but here we don't know yet whether LR is going 222*0b57cec5SDimitry Andric // to be used. We take the optimistic approach of generating the tail call and 223*0b57cec5SDimitry Andric // perhaps taking a hit if we need to restore the LR. 224*0b57cec5SDimitry Andric 225*0b57cec5SDimitry Andric // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 226*0b57cec5SDimitry Andric // but we need to make sure there are enough registers; the only valid 227*0b57cec5SDimitry Andric // registers are the 4 used for parameters. We don't currently do this 228*0b57cec5SDimitry Andric // case. 229*0b57cec5SDimitry Andric 230*0b57cec5SDimitry Andric SupportsTailCall = !isThumb() || hasV8MBaselineOps(); 231*0b57cec5SDimitry Andric 232*0b57cec5SDimitry Andric if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0)) 233*0b57cec5SDimitry Andric SupportsTailCall = false; 234*0b57cec5SDimitry Andric 235*0b57cec5SDimitry Andric switch (IT) { 236*0b57cec5SDimitry Andric case DefaultIT: 237*0b57cec5SDimitry Andric RestrictIT = hasV8Ops(); 238*0b57cec5SDimitry Andric break; 239*0b57cec5SDimitry Andric case RestrictedIT: 240*0b57cec5SDimitry Andric RestrictIT = true; 241*0b57cec5SDimitry Andric break; 242*0b57cec5SDimitry Andric case NoRestrictedIT: 243*0b57cec5SDimitry Andric RestrictIT = false; 244*0b57cec5SDimitry Andric break; 245*0b57cec5SDimitry Andric } 246*0b57cec5SDimitry Andric 247*0b57cec5SDimitry Andric // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. 248*0b57cec5SDimitry Andric const FeatureBitset &Bits = getFeatureBits(); 249*0b57cec5SDimitry Andric if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters 250*0b57cec5SDimitry Andric (Options.UnsafeFPMath || isTargetDarwin())) 251*0b57cec5SDimitry Andric UseNEONForSinglePrecisionFP = true; 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andric if (isRWPI()) 254*0b57cec5SDimitry Andric ReserveR9 = true; 255*0b57cec5SDimitry Andric 256*0b57cec5SDimitry Andric // FIXME: Teach TableGen to deal with these instead of doing it manually here. 257*0b57cec5SDimitry Andric switch (ARMProcFamily) { 258*0b57cec5SDimitry Andric case Others: 259*0b57cec5SDimitry Andric case CortexA5: 260*0b57cec5SDimitry Andric break; 261*0b57cec5SDimitry Andric case CortexA7: 262*0b57cec5SDimitry Andric LdStMultipleTiming = DoubleIssue; 263*0b57cec5SDimitry Andric break; 264*0b57cec5SDimitry Andric case CortexA8: 265*0b57cec5SDimitry Andric LdStMultipleTiming = DoubleIssue; 266*0b57cec5SDimitry Andric break; 267*0b57cec5SDimitry Andric case CortexA9: 268*0b57cec5SDimitry Andric LdStMultipleTiming = DoubleIssueCheckUnalignedAccess; 269*0b57cec5SDimitry Andric PreISelOperandLatencyAdjustment = 1; 270*0b57cec5SDimitry Andric break; 271*0b57cec5SDimitry Andric case CortexA12: 272*0b57cec5SDimitry Andric break; 273*0b57cec5SDimitry Andric case CortexA15: 274*0b57cec5SDimitry Andric MaxInterleaveFactor = 2; 275*0b57cec5SDimitry Andric PreISelOperandLatencyAdjustment = 1; 276*0b57cec5SDimitry Andric PartialUpdateClearance = 12; 277*0b57cec5SDimitry Andric break; 278*0b57cec5SDimitry Andric case CortexA17: 279*0b57cec5SDimitry Andric case CortexA32: 280*0b57cec5SDimitry Andric case CortexA35: 281*0b57cec5SDimitry Andric case CortexA53: 282*0b57cec5SDimitry Andric case CortexA55: 283*0b57cec5SDimitry Andric case CortexA57: 284*0b57cec5SDimitry Andric case CortexA72: 285*0b57cec5SDimitry Andric case CortexA73: 286*0b57cec5SDimitry Andric case CortexA75: 287*0b57cec5SDimitry Andric case CortexA76: 288*0b57cec5SDimitry Andric case CortexR4: 289*0b57cec5SDimitry Andric case CortexR4F: 290*0b57cec5SDimitry Andric case CortexR5: 291*0b57cec5SDimitry Andric case CortexR7: 292*0b57cec5SDimitry Andric case CortexM3: 293*0b57cec5SDimitry Andric case CortexR52: 294*0b57cec5SDimitry Andric break; 295*0b57cec5SDimitry Andric case Exynos: 296*0b57cec5SDimitry Andric LdStMultipleTiming = SingleIssuePlusExtras; 297*0b57cec5SDimitry Andric MaxInterleaveFactor = 4; 298*0b57cec5SDimitry Andric if (!isThumb()) 299*0b57cec5SDimitry Andric PrefLoopAlignment = 3; 300*0b57cec5SDimitry Andric break; 301*0b57cec5SDimitry Andric case Kryo: 302*0b57cec5SDimitry Andric break; 303*0b57cec5SDimitry Andric case Krait: 304*0b57cec5SDimitry Andric PreISelOperandLatencyAdjustment = 1; 305*0b57cec5SDimitry Andric break; 306*0b57cec5SDimitry Andric case Swift: 307*0b57cec5SDimitry Andric MaxInterleaveFactor = 2; 308*0b57cec5SDimitry Andric LdStMultipleTiming = SingleIssuePlusExtras; 309*0b57cec5SDimitry Andric PreISelOperandLatencyAdjustment = 1; 310*0b57cec5SDimitry Andric PartialUpdateClearance = 12; 311*0b57cec5SDimitry Andric break; 312*0b57cec5SDimitry Andric } 313*0b57cec5SDimitry Andric } 314*0b57cec5SDimitry Andric 315*0b57cec5SDimitry Andric bool ARMSubtarget::isTargetHardFloat() const { return TM.isTargetHardFloat(); } 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric bool ARMSubtarget::isAPCS_ABI() const { 318*0b57cec5SDimitry Andric assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 319*0b57cec5SDimitry Andric return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; 320*0b57cec5SDimitry Andric } 321*0b57cec5SDimitry Andric bool ARMSubtarget::isAAPCS_ABI() const { 322*0b57cec5SDimitry Andric assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 323*0b57cec5SDimitry Andric return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || 324*0b57cec5SDimitry Andric TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric bool ARMSubtarget::isAAPCS16_ABI() const { 327*0b57cec5SDimitry Andric assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); 328*0b57cec5SDimitry Andric return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; 329*0b57cec5SDimitry Andric } 330*0b57cec5SDimitry Andric 331*0b57cec5SDimitry Andric bool ARMSubtarget::isROPI() const { 332*0b57cec5SDimitry Andric return TM.getRelocationModel() == Reloc::ROPI || 333*0b57cec5SDimitry Andric TM.getRelocationModel() == Reloc::ROPI_RWPI; 334*0b57cec5SDimitry Andric } 335*0b57cec5SDimitry Andric bool ARMSubtarget::isRWPI() const { 336*0b57cec5SDimitry Andric return TM.getRelocationModel() == Reloc::RWPI || 337*0b57cec5SDimitry Andric TM.getRelocationModel() == Reloc::ROPI_RWPI; 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { 341*0b57cec5SDimitry Andric if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 342*0b57cec5SDimitry Andric return true; 343*0b57cec5SDimitry Andric 344*0b57cec5SDimitry Andric // 32 bit macho has no relocation for a-b if a is undefined, even if b is in 345*0b57cec5SDimitry Andric // the section that is being relocated. This means we have to use o load even 346*0b57cec5SDimitry Andric // for GVs that are known to be local to the dso. 347*0b57cec5SDimitry Andric if (isTargetMachO() && TM.isPositionIndependent() && 348*0b57cec5SDimitry Andric (GV->isDeclarationForLinker() || GV->hasCommonLinkage())) 349*0b57cec5SDimitry Andric return true; 350*0b57cec5SDimitry Andric 351*0b57cec5SDimitry Andric return false; 352*0b57cec5SDimitry Andric } 353*0b57cec5SDimitry Andric 354*0b57cec5SDimitry Andric bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const { 355*0b57cec5SDimitry Andric return isTargetELF() && TM.isPositionIndependent() && 356*0b57cec5SDimitry Andric !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); 357*0b57cec5SDimitry Andric } 358*0b57cec5SDimitry Andric 359*0b57cec5SDimitry Andric unsigned ARMSubtarget::getMispredictionPenalty() const { 360*0b57cec5SDimitry Andric return SchedModel.MispredictPenalty; 361*0b57cec5SDimitry Andric } 362*0b57cec5SDimitry Andric 363*0b57cec5SDimitry Andric bool ARMSubtarget::enableMachineScheduler() const { 364*0b57cec5SDimitry Andric // The MachineScheduler can increase register usage, so we use more high 365*0b57cec5SDimitry Andric // registers and end up with more T2 instructions that cannot be converted to 366*0b57cec5SDimitry Andric // T1 instructions. At least until we do better at converting to thumb1 367*0b57cec5SDimitry Andric // instructions, on cortex-m at Oz where we are size-paranoid, don't use the 368*0b57cec5SDimitry Andric // Machine scheduler, relying on the DAG register pressure scheduler instead. 369*0b57cec5SDimitry Andric if (isMClass() && hasMinSize()) 370*0b57cec5SDimitry Andric return false; 371*0b57cec5SDimitry Andric // Enable the MachineScheduler before register allocation for subtargets 372*0b57cec5SDimitry Andric // with the use-misched feature. 373*0b57cec5SDimitry Andric return useMachineScheduler(); 374*0b57cec5SDimitry Andric } 375*0b57cec5SDimitry Andric 376*0b57cec5SDimitry Andric // This overrides the PostRAScheduler bit in the SchedModel for any CPU. 377*0b57cec5SDimitry Andric bool ARMSubtarget::enablePostRAScheduler() const { 378*0b57cec5SDimitry Andric if (disablePostRAScheduler()) 379*0b57cec5SDimitry Andric return false; 380*0b57cec5SDimitry Andric // Don't reschedule potential IT blocks. 381*0b57cec5SDimitry Andric return !isThumb1Only(); 382*0b57cec5SDimitry Andric } 383*0b57cec5SDimitry Andric 384*0b57cec5SDimitry Andric bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } 385*0b57cec5SDimitry Andric 386*0b57cec5SDimitry Andric bool ARMSubtarget::useStride4VFPs() const { 387*0b57cec5SDimitry Andric // For general targets, the prologue can grow when VFPs are allocated with 388*0b57cec5SDimitry Andric // stride 4 (more vpush instructions). But WatchOS uses a compact unwind 389*0b57cec5SDimitry Andric // format which it's more important to get right. 390*0b57cec5SDimitry Andric return isTargetWatchABI() || 391*0b57cec5SDimitry Andric (useWideStrideVFP() && !OptMinSize); 392*0b57cec5SDimitry Andric } 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric bool ARMSubtarget::useMovt() const { 395*0b57cec5SDimitry Andric // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit 396*0b57cec5SDimitry Andric // immediates as it is inherently position independent, and may be out of 397*0b57cec5SDimitry Andric // range otherwise. 398*0b57cec5SDimitry Andric return !NoMovt && hasV8MBaselineOps() && 399*0b57cec5SDimitry Andric (isTargetWindows() || !OptMinSize || genExecuteOnly()); 400*0b57cec5SDimitry Andric } 401*0b57cec5SDimitry Andric 402*0b57cec5SDimitry Andric bool ARMSubtarget::useFastISel() const { 403*0b57cec5SDimitry Andric // Enable fast-isel for any target, for testing only. 404*0b57cec5SDimitry Andric if (ForceFastISel) 405*0b57cec5SDimitry Andric return true; 406*0b57cec5SDimitry Andric 407*0b57cec5SDimitry Andric // Limit fast-isel to the targets that are or have been tested. 408*0b57cec5SDimitry Andric if (!hasV6Ops()) 409*0b57cec5SDimitry Andric return false; 410*0b57cec5SDimitry Andric 411*0b57cec5SDimitry Andric // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. 412*0b57cec5SDimitry Andric return TM.Options.EnableFastISel && 413*0b57cec5SDimitry Andric ((isTargetMachO() && !isThumb1Only()) || 414*0b57cec5SDimitry Andric (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb())); 415*0b57cec5SDimitry Andric } 416*0b57cec5SDimitry Andric 417*0b57cec5SDimitry Andric unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const { 418*0b57cec5SDimitry Andric // The GPR register class has multiple possible allocation orders, with 419*0b57cec5SDimitry Andric // tradeoffs preferred by different sub-architectures and optimisation goals. 420*0b57cec5SDimitry Andric // The allocation orders are: 421*0b57cec5SDimitry Andric // 0: (the default tablegen order, not used) 422*0b57cec5SDimitry Andric // 1: r14, r0-r13 423*0b57cec5SDimitry Andric // 2: r0-r7 424*0b57cec5SDimitry Andric // 3: r0-r7, r12, lr, r8-r11 425*0b57cec5SDimitry Andric // Note that the register allocator will change this order so that 426*0b57cec5SDimitry Andric // callee-saved registers are used later, as they require extra work in the 427*0b57cec5SDimitry Andric // prologue/epilogue (though we sometimes override that). 428*0b57cec5SDimitry Andric 429*0b57cec5SDimitry Andric // For thumb1-only targets, only the low registers are allocatable. 430*0b57cec5SDimitry Andric if (isThumb1Only()) 431*0b57cec5SDimitry Andric return 2; 432*0b57cec5SDimitry Andric 433*0b57cec5SDimitry Andric // Allocate low registers first, so we can select more 16-bit instructions. 434*0b57cec5SDimitry Andric // We also (in ignoreCSRForAllocationOrder) override the default behaviour 435*0b57cec5SDimitry Andric // with regards to callee-saved registers, because pushing extra registers is 436*0b57cec5SDimitry Andric // much cheaper (in terms of code size) than using high registers. After 437*0b57cec5SDimitry Andric // that, we allocate r12 (doesn't need to be saved), lr (saving it means we 438*0b57cec5SDimitry Andric // can return with the pop, don't need an extra "bx lr") and then the rest of 439*0b57cec5SDimitry Andric // the high registers. 440*0b57cec5SDimitry Andric if (isThumb2() && MF.getFunction().hasMinSize()) 441*0b57cec5SDimitry Andric return 3; 442*0b57cec5SDimitry Andric 443*0b57cec5SDimitry Andric // Otherwise, allocate in the default order, using LR first because saving it 444*0b57cec5SDimitry Andric // allows a shorter epilogue sequence. 445*0b57cec5SDimitry Andric return 1; 446*0b57cec5SDimitry Andric } 447*0b57cec5SDimitry Andric 448*0b57cec5SDimitry Andric bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, 449*0b57cec5SDimitry Andric unsigned PhysReg) const { 450*0b57cec5SDimitry Andric // To minimize code size in Thumb2, we prefer the usage of low regs (lower 451*0b57cec5SDimitry Andric // cost per use) so we can use narrow encoding. By default, caller-saved 452*0b57cec5SDimitry Andric // registers (e.g. lr, r12) are always allocated first, regardless of 453*0b57cec5SDimitry Andric // their cost per use. When optForMinSize, we prefer the low regs even if 454*0b57cec5SDimitry Andric // they are CSR because usually push/pop can be folded into existing ones. 455*0b57cec5SDimitry Andric return isThumb2() && MF.getFunction().hasMinSize() && 456*0b57cec5SDimitry Andric ARM::GPRRegClass.contains(PhysReg); 457*0b57cec5SDimitry Andric } 458