1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 15 16 #include "AArch64FrameLowering.h" 17 #include "AArch64ISelLowering.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64PointerAuth.h" 20 #include "AArch64RegisterInfo.h" 21 #include "AArch64SelectionDAGInfo.h" 22 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 26 #include "llvm/CodeGen/RegisterBankInfo.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/TargetParser/Triple.h" 30 31 #define GET_SUBTARGETINFO_HEADER 32 #include "AArch64GenSubtargetInfo.inc" 33 34 namespace llvm { 35 class GlobalValue; 36 class StringRef; 37 38 class AArch64Subtarget final : public AArch64GenSubtargetInfo { 39 public: 40 enum ARMProcFamilyEnum : uint8_t { 41 Generic, 42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, 43 #include "llvm/TargetParser/AArch64TargetParserDef.inc" 44 #undef ARM_PROCESSOR_FAMILY 45 }; 46 47 protected: 48 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. 49 ARMProcFamilyEnum ARMProcFamily = Generic; 50 51 // Enable 64-bit vectorization in SLP. 52 unsigned MinVectorRegisterBitWidth = 64; 53 54 // Bool members corresponding to the SubtargetFeatures defined in tablegen 55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 56 bool ATTRIBUTE = DEFAULT; 57 #include "AArch64GenSubtargetInfo.inc" 58 59 unsigned EpilogueVectorizationMinVF = 16; 60 uint8_t MaxInterleaveFactor = 2; 61 uint8_t VectorInsertExtractBaseCost = 2; 62 uint16_t CacheLineSize = 0; 63 // Default scatter/gather overhead. 64 unsigned ScatterOverhead = 10; 65 unsigned GatherOverhead = 10; 66 uint16_t PrefetchDistance = 0; 67 uint16_t MinPrefetchStride = 1; 68 unsigned MaxPrefetchIterationsAhead = UINT_MAX; 69 Align PrefFunctionAlignment; 70 Align PrefLoopAlignment; 71 unsigned MaxBytesForLoopAlignment = 0; 72 unsigned MinimumJumpTableEntries = 4; 73 unsigned MaxJumpTableSize = 0; 74 75 // ReserveXRegister[i] - X#i is not available as a general purpose register. 76 BitVector ReserveXRegister; 77 78 // ReserveXRegisterForRA[i] - X#i is not available for register allocator. 79 BitVector ReserveXRegisterForRA; 80 81 // CustomCallUsedXRegister[i] - X#i call saved. 82 BitVector CustomCallSavedXRegs; 83 84 bool IsLittle; 85 86 bool IsStreaming; 87 bool IsStreamingCompatible; 88 std::optional<unsigned> StreamingHazardSize; 89 unsigned MinSVEVectorSizeInBits; 90 unsigned MaxSVEVectorSizeInBits; 91 unsigned VScaleForTuning = 1; 92 TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; 93 94 bool EnableSubregLiveness; 95 96 /// TargetTriple - What processor and OS we're targeting. 97 Triple TargetTriple; 98 99 AArch64FrameLowering FrameLowering; 100 AArch64InstrInfo InstrInfo; 101 AArch64SelectionDAGInfo TSInfo; 102 AArch64TargetLowering TLInfo; 103 104 /// GlobalISel related APIs. 105 std::unique_ptr<CallLowering> CallLoweringInfo; 106 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 107 std::unique_ptr<InstructionSelector> InstSelector; 108 std::unique_ptr<LegalizerInfo> Legalizer; 109 std::unique_ptr<RegisterBankInfo> RegBankInfo; 110 111 private: 112 /// initializeSubtargetDependencies - Initializes using CPUString and the 113 /// passed in feature string so that we can use initializer lists for 114 /// subtarget initialization. 115 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, 116 StringRef CPUString, 117 StringRef TuneCPUString, 118 bool HasMinSize); 119 120 /// Initialize properties based on the selected processor family. 121 void initializeProperties(bool HasMinSize); 122 123 public: 124 /// This constructor initializes the data members to match that 125 /// of the specified triple. 126 AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, 127 StringRef FS, const TargetMachine &TM, bool LittleEndian, 128 unsigned MinSVEVectorSizeInBitsOverride = 0, 129 unsigned MaxSVEVectorSizeInBitsOverride = 0, 130 bool IsStreaming = false, bool IsStreamingCompatible = false, 131 bool HasMinSize = false); 132 133 virtual unsigned getHwModeSet() const override; 134 135 // Getters for SubtargetFeatures defined in tablegen 136 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 137 bool GETTER() const { return ATTRIBUTE; } 138 #include "AArch64GenSubtargetInfo.inc" 139 getSelectionDAGInfo()140 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { 141 return &TSInfo; 142 } getFrameLowering()143 const AArch64FrameLowering *getFrameLowering() const override { 144 return &FrameLowering; 145 } getTargetLowering()146 const AArch64TargetLowering *getTargetLowering() const override { 147 return &TLInfo; 148 } getInstrInfo()149 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } getRegisterInfo()150 const AArch64RegisterInfo *getRegisterInfo() const override { 151 return &getInstrInfo()->getRegisterInfo(); 152 } 153 const CallLowering *getCallLowering() const override; 154 const InlineAsmLowering *getInlineAsmLowering() const override; 155 InstructionSelector *getInstructionSelector() const override; 156 const LegalizerInfo *getLegalizerInfo() const override; 157 const RegisterBankInfo *getRegBankInfo() const override; getTargetTriple()158 const Triple &getTargetTriple() const { return TargetTriple; } enableMachineScheduler()159 bool enableMachineScheduler() const override { return true; } enablePostRAScheduler()160 bool enablePostRAScheduler() const override { return usePostRAScheduler(); } enableSubRegLiveness()161 bool enableSubRegLiveness() const override { return EnableSubregLiveness; } 162 163 bool enableMachinePipeliner() const override; useDFAforSMS()164 bool useDFAforSMS() const override { return false; } 165 166 /// Returns ARM processor family. 167 /// Avoid this function! CPU specifics should be kept local to this class 168 /// and preferably modeled with SubtargetFeatures or properties in 169 /// initializeProperties(). getProcFamily()170 ARMProcFamilyEnum getProcFamily() const { 171 return ARMProcFamily; 172 } 173 isXRaySupported()174 bool isXRaySupported() const override { return true; } 175 176 /// Returns true if the function has a streaming body. isStreaming()177 bool isStreaming() const { return IsStreaming; } 178 179 /// Returns true if the function has a streaming-compatible body. isStreamingCompatible()180 bool isStreamingCompatible() const { return IsStreamingCompatible; } 181 182 /// Returns the size of memory region that if accessed by both the CPU and 183 /// the SME unit could result in a hazard. 0 = disabled. getStreamingHazardSize()184 unsigned getStreamingHazardSize() const { 185 return StreamingHazardSize.value_or( 186 !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0); 187 } 188 189 /// Returns true if the target has NEON and the function at runtime is known 190 /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE 191 /// mode, which disables NEON instructions). isNeonAvailable()192 bool isNeonAvailable() const { 193 return hasNEON() && 194 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 195 } 196 197 /// Returns true if the target has SVE and can use the full range of SVE 198 /// instructions, for example because it knows the function is known not to be 199 /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. isSVEAvailable()200 bool isSVEAvailable() const { 201 return hasSVE() && 202 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 203 } 204 205 /// Returns true if the target has access to the streaming-compatible subset 206 /// of SVE instructions. isStreamingSVEAvailable()207 bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); } 208 209 /// Returns true if the target has access to either the full range of SVE 210 /// instructions, or the streaming-compatible subset of SVE instructions. isSVEorStreamingSVEAvailable()211 bool isSVEorStreamingSVEAvailable() const { 212 return hasSVE() || isStreamingSVEAvailable(); 213 } 214 getMinVectorRegisterBitWidth()215 unsigned getMinVectorRegisterBitWidth() const { 216 // Don't assume any minimum vector size when PSTATE.SM may not be 0, because 217 // we don't yet support streaming-compatible codegen support that we trust 218 // is safe for functions that may be executed in streaming-SVE mode. 219 // By returning '0' here, we disable vectorization. 220 if (!isSVEAvailable() && !isNeonAvailable()) 221 return 0; 222 return MinVectorRegisterBitWidth; 223 } 224 isXRegisterReserved(size_t i)225 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } isXRegisterReservedForRA(size_t i)226 bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } getNumXRegisterReserved()227 unsigned getNumXRegisterReserved() const { 228 BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); 229 AllReservedX |= ReserveXRegister; 230 AllReservedX |= ReserveXRegisterForRA; 231 return AllReservedX.count(); 232 } isLRReservedForRA()233 bool isLRReservedForRA() const { return ReserveLRForRA; } isXRegCustomCalleeSaved(size_t i)234 bool isXRegCustomCalleeSaved(size_t i) const { 235 return CustomCallSavedXRegs[i]; 236 } hasCustomCallingConv()237 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } 238 239 /// Return true if the CPU supports any kind of instruction fusion. hasFusion()240 bool hasFusion() const { 241 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || 242 hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || 243 hasFuseAdrpAdd() || hasFuseLiterals(); 244 } 245 getEpilogueVectorizationMinVF()246 unsigned getEpilogueVectorizationMinVF() const { 247 return EpilogueVectorizationMinVF; 248 } getMaxInterleaveFactor()249 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } 250 unsigned getVectorInsertExtractBaseCost() const; getCacheLineSize()251 unsigned getCacheLineSize() const override { return CacheLineSize; } getScatterOverhead()252 unsigned getScatterOverhead() const { return ScatterOverhead; } getGatherOverhead()253 unsigned getGatherOverhead() const { return GatherOverhead; } getPrefetchDistance()254 unsigned getPrefetchDistance() const override { return PrefetchDistance; } getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)255 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 256 unsigned NumStridedMemAccesses, 257 unsigned NumPrefetches, 258 bool HasCall) const override { 259 return MinPrefetchStride; 260 } getMaxPrefetchIterationsAhead()261 unsigned getMaxPrefetchIterationsAhead() const override { 262 return MaxPrefetchIterationsAhead; 263 } getPrefFunctionAlignment()264 Align getPrefFunctionAlignment() const { 265 return PrefFunctionAlignment; 266 } getPrefLoopAlignment()267 Align getPrefLoopAlignment() const { return PrefLoopAlignment; } 268 getMaxBytesForLoopAlignment()269 unsigned getMaxBytesForLoopAlignment() const { 270 return MaxBytesForLoopAlignment; 271 } 272 getMaximumJumpTableSize()273 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } getMinimumJumpTableEntries()274 unsigned getMinimumJumpTableEntries() const { 275 return MinimumJumpTableEntries; 276 } 277 278 /// CPU has TBI (top byte of addresses is ignored during HW address 279 /// translation) and OS enables it. 280 bool supportsAddressTopByteIgnored() const; 281 isLittleEndian()282 bool isLittleEndian() const { return IsLittle; } 283 isTargetDarwin()284 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } isTargetIOS()285 bool isTargetIOS() const { return TargetTriple.isiOS(); } isTargetLinux()286 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } isTargetWindows()287 bool isTargetWindows() const { return TargetTriple.isOSWindows(); } isTargetAndroid()288 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } isTargetFuchsia()289 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } isWindowsArm64EC()290 bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } 291 isTargetCOFF()292 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } isTargetELF()293 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } isTargetMachO()294 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 295 isTargetILP32()296 bool isTargetILP32() const { 297 return TargetTriple.isArch32Bit() || 298 TargetTriple.getEnvironment() == Triple::GNUILP32; 299 } 300 301 bool useAA() const override; 302 addrSinkUsingGEPs()303 bool addrSinkUsingGEPs() const override { 304 // Keeping GEPs inbounds is important for exploiting AArch64 305 // addressing-modes in ILP32 mode. 306 return useAA() || isTargetILP32(); 307 } 308 useSmallAddressing()309 bool useSmallAddressing() const { 310 switch (TLInfo.getTargetMachine().getCodeModel()) { 311 case CodeModel::Kernel: 312 // Kernel is currently allowed only for Fuchsia targets, 313 // where it is the same as Small for almost all purposes. 314 case CodeModel::Small: 315 return true; 316 default: 317 return false; 318 } 319 } 320 321 /// Returns whether the operating system makes it safer to store sensitive 322 /// values in x16 and x17 as opposed to other registers. 323 bool isX16X17Safer() const; 324 325 /// ParseSubtargetFeatures - Parses features string setting specified 326 /// subtarget options. Definition of function is auto generated by tblgen. 327 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 328 329 /// ClassifyGlobalReference - Find the target operand flags that describe 330 /// how a global value should be referenced for the current subtarget. 331 unsigned ClassifyGlobalReference(const GlobalValue *GV, 332 const TargetMachine &TM) const; 333 334 unsigned classifyGlobalFunctionReference(const GlobalValue *GV, 335 const TargetMachine &TM) const; 336 337 /// This function is design to compatible with the function def in other 338 /// targets and escape build error about the virtual function def in base 339 /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. 340 unsigned char classifyGlobalFunctionReference(const GlobalValue * GV)341 classifyGlobalFunctionReference(const GlobalValue *GV) const override { 342 return 0; 343 } 344 345 void overrideSchedPolicy(MachineSchedPolicy &Policy, 346 unsigned NumRegionInstrs) const override; 347 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 348 SDep &Dep, 349 const TargetSchedModel *SchedModel) const override; 350 351 bool enableEarlyIfConversion() const override; 352 353 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; 354 isCallingConvWin64(CallingConv::ID CC,bool IsVarArg)355 bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { 356 switch (CC) { 357 case CallingConv::C: 358 case CallingConv::Fast: 359 case CallingConv::Swift: 360 case CallingConv::SwiftTail: 361 return isTargetWindows(); 362 case CallingConv::PreserveNone: 363 return IsVarArg && isTargetWindows(); 364 case CallingConv::Win64: 365 return true; 366 default: 367 return false; 368 } 369 } 370 371 /// Return whether FrameLowering should always set the "extended frame 372 /// present" bit in FP, or set it based on a symbol in the runtime. swiftAsyncContextIsDynamicallySet()373 bool swiftAsyncContextIsDynamicallySet() const { 374 // Older OS versions (particularly system unwinders) are confused by the 375 // Swift extended frame, so when building code that might be run on them we 376 // must dynamically query the concurrency library to determine whether 377 // extended frames should be flagged as present. 378 const Triple &TT = getTargetTriple(); 379 380 unsigned Major = TT.getOSVersion().getMajor(); 381 switch(TT.getOS()) { 382 default: 383 return false; 384 case Triple::IOS: 385 case Triple::TvOS: 386 return Major < 15; 387 case Triple::WatchOS: 388 return Major < 8; 389 case Triple::MacOSX: 390 case Triple::Darwin: 391 return Major < 12; 392 } 393 } 394 395 void mirFileLoaded(MachineFunction &MF) const override; 396 397 // Return the known range for the bit length of SVE data registers. A value 398 // of 0 means nothing is known about that particular limit beyond what's 399 // implied by the architecture. getMaxSVEVectorSizeInBits()400 unsigned getMaxSVEVectorSizeInBits() const { 401 assert(isSVEorStreamingSVEAvailable() && 402 "Tried to get SVE vector length without SVE support!"); 403 return MaxSVEVectorSizeInBits; 404 } 405 getMinSVEVectorSizeInBits()406 unsigned getMinSVEVectorSizeInBits() const { 407 assert(isSVEorStreamingSVEAvailable() && 408 "Tried to get SVE vector length without SVE support!"); 409 return MinSVEVectorSizeInBits; 410 } 411 412 // Return the known bit length of SVE data registers. A value of 0 means the 413 // length is unknown beyond what's implied by the architecture. getSVEVectorSizeInBits()414 unsigned getSVEVectorSizeInBits() const { 415 assert(isSVEorStreamingSVEAvailable() && 416 "Tried to get SVE vector length without SVE support!"); 417 if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits) 418 return MaxSVEVectorSizeInBits; 419 return 0; 420 } 421 useSVEForFixedLengthVectors()422 bool useSVEForFixedLengthVectors() const { 423 if (!isSVEorStreamingSVEAvailable()) 424 return false; 425 426 // Prefer NEON unless larger SVE registers are available. 427 return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256; 428 } 429 useSVEForFixedLengthVectors(EVT VT)430 bool useSVEForFixedLengthVectors(EVT VT) const { 431 if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) 432 return false; 433 return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || 434 !isNeonAvailable(); 435 } 436 getVScaleForTuning()437 unsigned getVScaleForTuning() const { return VScaleForTuning; } 438 getSVETailFoldingDefaultOpts()439 TailFoldingOpts getSVETailFoldingDefaultOpts() const { 440 return DefaultSVETFOpts; 441 } 442 443 /// Returns true to use the addvl/inc/dec instructions, as opposed to separate 444 /// add + cnt instructions. 445 bool useScalarIncVL() const; 446 getChkStkName()447 const char* getChkStkName() const { 448 if (isWindowsArm64EC()) 449 return "#__chkstk_arm64ec"; 450 return "__chkstk"; 451 } 452 getSecurityCheckCookieName()453 const char* getSecurityCheckCookieName() const { 454 if (isWindowsArm64EC()) 455 return "#__security_check_cookie_arm64ec"; 456 return "__security_check_cookie"; 457 } 458 459 /// Choose a method of checking LR before performing a tail call. 460 AArch64PAuth::AuthCheckMethod 461 getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; 462 463 /// Compute the integer discriminator for a given BlockAddress constant, if 464 /// blockaddress signing is enabled, or std::nullopt otherwise. 465 /// Blockaddress signing is controlled by the function attribute 466 /// "ptrauth-indirect-gotos" on the parent function. 467 /// Note that this assumes the discriminator is independent of the indirect 468 /// goto branch site itself, i.e., it's the same for all BlockAddresses in 469 /// a function. 470 std::optional<uint16_t> 471 getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const; 472 }; 473 } // End llvm namespace 474 475 #endif 476