1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 15 16 #include "AArch64FrameLowering.h" 17 #include "AArch64ISelLowering.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64PointerAuth.h" 20 #include "AArch64RegisterInfo.h" 21 #include "AArch64SelectionDAGInfo.h" 22 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 26 #include "llvm/CodeGen/RegisterBankInfo.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 30 #define GET_SUBTARGETINFO_HEADER 31 #include "AArch64GenSubtargetInfo.inc" 32 33 namespace llvm { 34 class GlobalValue; 35 class StringRef; 36 class Triple; 37 38 class AArch64Subtarget final : public AArch64GenSubtargetInfo { 39 public: 40 enum ARMProcFamilyEnum : uint8_t { 41 Others, 42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, 43 #include "llvm/TargetParser/AArch64TargetParserDef.inc" 44 #undef ARM_PROCESSOR_FAMILY 45 }; 46 47 protected: 48 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. 49 ARMProcFamilyEnum ARMProcFamily = Others; 50 51 // Enable 64-bit vectorization in SLP. 52 unsigned MinVectorRegisterBitWidth = 64; 53 54 // Bool members corresponding to the SubtargetFeatures defined in tablegen 55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 56 bool ATTRIBUTE = DEFAULT; 57 #include "AArch64GenSubtargetInfo.inc" 58 59 uint8_t MaxInterleaveFactor = 2; 60 uint8_t VectorInsertExtractBaseCost = 2; 61 uint16_t CacheLineSize = 0; 62 uint16_t PrefetchDistance = 0; 63 uint16_t MinPrefetchStride = 1; 64 unsigned MaxPrefetchIterationsAhead = UINT_MAX; 65 Align PrefFunctionAlignment; 66 Align PrefLoopAlignment; 67 unsigned MaxBytesForLoopAlignment = 0; 68 unsigned MinimumJumpTableEntries = 4; 69 unsigned MaxJumpTableSize = 0; 70 71 // ReserveXRegister[i] - X#i is not available as a general purpose register. 72 BitVector ReserveXRegister; 73 74 // ReserveXRegisterForRA[i] - X#i is not available for register allocator. 75 BitVector ReserveXRegisterForRA; 76 77 // CustomCallUsedXRegister[i] - X#i call saved. 78 BitVector CustomCallSavedXRegs; 79 80 bool IsLittle; 81 82 bool IsStreaming; 83 bool IsStreamingCompatible; 84 unsigned MinSVEVectorSizeInBits; 85 unsigned MaxSVEVectorSizeInBits; 86 unsigned VScaleForTuning = 2; 87 TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; 88 89 /// TargetTriple - What processor and OS we're targeting. 90 Triple TargetTriple; 91 92 AArch64FrameLowering FrameLowering; 93 AArch64InstrInfo InstrInfo; 94 AArch64SelectionDAGInfo TSInfo; 95 AArch64TargetLowering TLInfo; 96 97 /// GlobalISel related APIs. 98 std::unique_ptr<CallLowering> CallLoweringInfo; 99 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 100 std::unique_ptr<InstructionSelector> InstSelector; 101 std::unique_ptr<LegalizerInfo> Legalizer; 102 std::unique_ptr<RegisterBankInfo> RegBankInfo; 103 104 private: 105 /// initializeSubtargetDependencies - Initializes using CPUString and the 106 /// passed in feature string so that we can use initializer lists for 107 /// subtarget initialization. 108 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, 109 StringRef CPUString, 110 StringRef TuneCPUString, 111 bool HasMinSize); 112 113 /// Initialize properties based on the selected processor family. 114 void initializeProperties(bool HasMinSize); 115 116 public: 117 /// This constructor initializes the data members to match that 118 /// of the specified triple. 119 AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, 120 StringRef FS, const TargetMachine &TM, bool LittleEndian, 121 unsigned MinSVEVectorSizeInBitsOverride = 0, 122 unsigned MaxSVEVectorSizeInBitsOverride = 0, 123 bool IsStreaming = false, bool IsStreamingCompatible = false, 124 bool HasMinSize = false); 125 126 // Getters for SubtargetFeatures defined in tablegen 127 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ 128 bool GETTER() const { return ATTRIBUTE; } 129 #include "AArch64GenSubtargetInfo.inc" 130 131 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { 132 return &TSInfo; 133 } 134 const AArch64FrameLowering *getFrameLowering() const override { 135 return &FrameLowering; 136 } 137 const AArch64TargetLowering *getTargetLowering() const override { 138 return &TLInfo; 139 } 140 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } 141 const AArch64RegisterInfo *getRegisterInfo() const override { 142 return &getInstrInfo()->getRegisterInfo(); 143 } 144 const CallLowering *getCallLowering() const override; 145 const InlineAsmLowering *getInlineAsmLowering() const override; 146 InstructionSelector *getInstructionSelector() const override; 147 const LegalizerInfo *getLegalizerInfo() const override; 148 const RegisterBankInfo *getRegBankInfo() const override; 149 const Triple &getTargetTriple() const { return TargetTriple; } 150 bool enableMachineScheduler() const override { return true; } 151 bool enablePostRAScheduler() const override { return usePostRAScheduler(); } 152 153 bool enableMachinePipeliner() const override; 154 bool useDFAforSMS() const override { return false; } 155 156 /// Returns ARM processor family. 157 /// Avoid this function! CPU specifics should be kept local to this class 158 /// and preferably modeled with SubtargetFeatures or properties in 159 /// initializeProperties(). 160 ARMProcFamilyEnum getProcFamily() const { 161 return ARMProcFamily; 162 } 163 164 bool isXRaySupported() const override { return true; } 165 166 /// Returns true if the function has a streaming body. 167 bool isStreaming() const { return IsStreaming; } 168 169 /// Returns true if the function has a streaming-compatible body. 170 bool isStreamingCompatible() const { return IsStreamingCompatible; } 171 172 /// Returns true if the target has NEON and the function at runtime is known 173 /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE 174 /// mode, which disables NEON instructions). 175 bool isNeonAvailable() const { 176 return hasNEON() && 177 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 178 } 179 180 /// Returns true if the target has SVE and can use the full range of SVE 181 /// instructions, for example because it knows the function is known not to be 182 /// in streaming-SVE mode or when the target has FEAT_FA64 enabled. 183 bool isSVEAvailable() const { 184 return hasSVE() && 185 (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible())); 186 } 187 188 /// Returns true if the target has access to either the full range of SVE instructions, 189 /// or the streaming-compatible subset of SVE instructions. 190 bool isSVEorStreamingSVEAvailable() const { 191 return hasSVE() || (hasSME() && isStreaming()); 192 } 193 194 unsigned getMinVectorRegisterBitWidth() const { 195 // Don't assume any minimum vector size when PSTATE.SM may not be 0, because 196 // we don't yet support streaming-compatible codegen support that we trust 197 // is safe for functions that may be executed in streaming-SVE mode. 198 // By returning '0' here, we disable vectorization. 199 if (!isSVEAvailable() && !isNeonAvailable()) 200 return 0; 201 return MinVectorRegisterBitWidth; 202 } 203 204 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } 205 bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; } 206 unsigned getNumXRegisterReserved() const { 207 BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs()); 208 AllReservedX |= ReserveXRegister; 209 AllReservedX |= ReserveXRegisterForRA; 210 return AllReservedX.count(); 211 } 212 bool isLRReservedForRA() const { return ReserveLRForRA; } 213 bool isXRegCustomCalleeSaved(size_t i) const { 214 return CustomCallSavedXRegs[i]; 215 } 216 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } 217 218 /// Return true if the CPU supports any kind of instruction fusion. 219 bool hasFusion() const { 220 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || 221 hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() || 222 hasFuseAdrpAdd() || hasFuseLiterals(); 223 } 224 225 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } 226 unsigned getVectorInsertExtractBaseCost() const; 227 unsigned getCacheLineSize() const override { return CacheLineSize; } 228 unsigned getPrefetchDistance() const override { return PrefetchDistance; } 229 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 230 unsigned NumStridedMemAccesses, 231 unsigned NumPrefetches, 232 bool HasCall) const override { 233 return MinPrefetchStride; 234 } 235 unsigned getMaxPrefetchIterationsAhead() const override { 236 return MaxPrefetchIterationsAhead; 237 } 238 Align getPrefFunctionAlignment() const { 239 return PrefFunctionAlignment; 240 } 241 Align getPrefLoopAlignment() const { return PrefLoopAlignment; } 242 243 unsigned getMaxBytesForLoopAlignment() const { 244 return MaxBytesForLoopAlignment; 245 } 246 247 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } 248 unsigned getMinimumJumpTableEntries() const { 249 return MinimumJumpTableEntries; 250 } 251 252 /// CPU has TBI (top byte of addresses is ignored during HW address 253 /// translation) and OS enables it. 254 bool supportsAddressTopByteIgnored() const; 255 256 bool isLittleEndian() const { return IsLittle; } 257 258 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } 259 bool isTargetIOS() const { return TargetTriple.isiOS(); } 260 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } 261 bool isTargetWindows() const { return TargetTriple.isOSWindows(); } 262 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } 263 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } 264 bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); } 265 266 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } 267 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } 268 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 269 270 bool isTargetILP32() const { 271 return TargetTriple.isArch32Bit() || 272 TargetTriple.getEnvironment() == Triple::GNUILP32; 273 } 274 275 bool useAA() const override; 276 277 bool addrSinkUsingGEPs() const override { 278 // Keeping GEPs inbounds is important for exploiting AArch64 279 // addressing-modes in ILP32 mode. 280 return useAA() || isTargetILP32(); 281 } 282 283 bool useSmallAddressing() const { 284 switch (TLInfo.getTargetMachine().getCodeModel()) { 285 case CodeModel::Kernel: 286 // Kernel is currently allowed only for Fuchsia targets, 287 // where it is the same as Small for almost all purposes. 288 case CodeModel::Small: 289 return true; 290 default: 291 return false; 292 } 293 } 294 295 /// ParseSubtargetFeatures - Parses features string setting specified 296 /// subtarget options. Definition of function is auto generated by tblgen. 297 void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 298 299 /// ClassifyGlobalReference - Find the target operand flags that describe 300 /// how a global value should be referenced for the current subtarget. 301 unsigned ClassifyGlobalReference(const GlobalValue *GV, 302 const TargetMachine &TM) const; 303 304 unsigned classifyGlobalFunctionReference(const GlobalValue *GV, 305 const TargetMachine &TM) const; 306 307 /// This function is design to compatible with the function def in other 308 /// targets and escape build error about the virtual function def in base 309 /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it. 310 unsigned char 311 classifyGlobalFunctionReference(const GlobalValue *GV) const override { 312 return 0; 313 } 314 315 void overrideSchedPolicy(MachineSchedPolicy &Policy, 316 unsigned NumRegionInstrs) const override; 317 void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 318 SDep &Dep, 319 const TargetSchedModel *SchedModel) const override; 320 321 bool enableEarlyIfConversion() const override; 322 323 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; 324 325 bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const { 326 switch (CC) { 327 case CallingConv::C: 328 case CallingConv::Fast: 329 case CallingConv::Swift: 330 case CallingConv::SwiftTail: 331 return isTargetWindows(); 332 case CallingConv::PreserveNone: 333 return IsVarArg && isTargetWindows(); 334 case CallingConv::Win64: 335 return true; 336 default: 337 return false; 338 } 339 } 340 341 /// Return whether FrameLowering should always set the "extended frame 342 /// present" bit in FP, or set it based on a symbol in the runtime. 343 bool swiftAsyncContextIsDynamicallySet() const { 344 // Older OS versions (particularly system unwinders) are confused by the 345 // Swift extended frame, so when building code that might be run on them we 346 // must dynamically query the concurrency library to determine whether 347 // extended frames should be flagged as present. 348 const Triple &TT = getTargetTriple(); 349 350 unsigned Major = TT.getOSVersion().getMajor(); 351 switch(TT.getOS()) { 352 default: 353 return false; 354 case Triple::IOS: 355 case Triple::TvOS: 356 return Major < 15; 357 case Triple::WatchOS: 358 return Major < 8; 359 case Triple::MacOSX: 360 case Triple::Darwin: 361 return Major < 12; 362 } 363 } 364 365 void mirFileLoaded(MachineFunction &MF) const override; 366 367 // Return the known range for the bit length of SVE data registers. A value 368 // of 0 means nothing is known about that particular limit beyong what's 369 // implied by the architecture. 370 unsigned getMaxSVEVectorSizeInBits() const { 371 assert(isSVEorStreamingSVEAvailable() && 372 "Tried to get SVE vector length without SVE support!"); 373 return MaxSVEVectorSizeInBits; 374 } 375 376 unsigned getMinSVEVectorSizeInBits() const { 377 assert(isSVEorStreamingSVEAvailable() && 378 "Tried to get SVE vector length without SVE support!"); 379 return MinSVEVectorSizeInBits; 380 } 381 382 bool useSVEForFixedLengthVectors() const { 383 if (!isSVEorStreamingSVEAvailable()) 384 return false; 385 386 // Prefer NEON unless larger SVE registers are available. 387 return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256; 388 } 389 390 bool useSVEForFixedLengthVectors(EVT VT) const { 391 if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector()) 392 return false; 393 return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock || 394 !isNeonAvailable(); 395 } 396 397 unsigned getVScaleForTuning() const { return VScaleForTuning; } 398 399 TailFoldingOpts getSVETailFoldingDefaultOpts() const { 400 return DefaultSVETFOpts; 401 } 402 403 const char* getChkStkName() const { 404 if (isWindowsArm64EC()) 405 return "#__chkstk_arm64ec"; 406 return "__chkstk"; 407 } 408 409 const char* getSecurityCheckCookieName() const { 410 if (isWindowsArm64EC()) 411 return "#__security_check_cookie_arm64ec"; 412 return "__security_check_cookie"; 413 } 414 415 /// Choose a method of checking LR before performing a tail call. 416 AArch64PAuth::AuthCheckMethod 417 getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; 418 419 /// Compute the integer discriminator for a given BlockAddress constant, if 420 /// blockaddress signing is enabled, or std::nullopt otherwise. 421 /// Blockaddress signing is controlled by the function attribute 422 /// "ptrauth-indirect-gotos" on the parent function. 423 /// Note that this assumes the discriminator is independent of the indirect 424 /// goto branch site itself, i.e., it's the same for all BlockAddresses in 425 /// a function. 426 std::optional<uint16_t> 427 getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const; 428 429 const PseudoSourceValue *getAddressCheckPSV() const { 430 return AddressCheckPSV.get(); 431 } 432 433 private: 434 /// Pseudo value representing memory load performed to check an address. 435 /// 436 /// This load operation is solely used for its side-effects: if the address 437 /// is not mapped (or not readable), it triggers CPU exception, otherwise 438 /// execution proceeds and the value is not used. 439 class AddressCheckPseudoSourceValue : public PseudoSourceValue { 440 public: 441 AddressCheckPseudoSourceValue(const TargetMachine &TM) 442 : PseudoSourceValue(TargetCustom, TM) {} 443 444 bool isConstant(const MachineFrameInfo *) const override { return false; } 445 bool isAliased(const MachineFrameInfo *) const override { return true; } 446 bool mayAlias(const MachineFrameInfo *) const override { return true; } 447 void printCustom(raw_ostream &OS) const override { OS << "AddressCheck"; } 448 }; 449 450 std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV; 451 }; 452 } // End llvm namespace 453 454 #endif 455