1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares the AArch64 specific subclass of TargetSubtarget. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 15 16 #include "AArch64FrameLowering.h" 17 #include "AArch64ISelLowering.h" 18 #include "AArch64InstrInfo.h" 19 #include "AArch64RegisterInfo.h" 20 #include "AArch64SelectionDAGInfo.h" 21 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 22 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 25 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" 26 #include "llvm/CodeGen/TargetSubtargetInfo.h" 27 #include "llvm/IR/DataLayout.h" 28 #include <string> 29 30 #define GET_SUBTARGETINFO_HEADER 31 #include "AArch64GenSubtargetInfo.inc" 32 33 namespace llvm { 34 class GlobalValue; 35 class StringRef; 36 class Triple; 37 38 class AArch64Subtarget final : public AArch64GenSubtargetInfo { 39 public: 40 enum ARMProcFamilyEnum : uint8_t { 41 Others, 42 A64FX, 43 AppleA7, 44 AppleA10, 45 AppleA11, 46 AppleA12, 47 AppleA13, 48 Carmel, 49 CortexA35, 50 CortexA53, 51 CortexA55, 52 CortexA57, 53 CortexA65, 54 CortexA72, 55 CortexA73, 56 CortexA75, 57 CortexA76, 58 CortexA77, 59 CortexA78, 60 CortexX1, 61 ExynosM3, 62 Falkor, 63 Kryo, 64 NeoverseE1, 65 NeoverseN1, 66 Saphira, 67 ThunderX2T99, 68 ThunderX, 69 ThunderXT81, 70 ThunderXT83, 71 ThunderXT88, 72 TSV110, 73 ThunderX3T110 74 }; 75 76 protected: 77 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. 78 ARMProcFamilyEnum ARMProcFamily = Others; 79 80 bool HasV8_1aOps = false; 81 bool HasV8_2aOps = false; 82 bool HasV8_3aOps = false; 83 bool HasV8_4aOps = false; 84 bool HasV8_5aOps = false; 85 bool HasV8_6aOps = false; 86 87 bool HasFPARMv8 = false; 88 bool HasNEON = false; 89 bool HasCrypto = false; 90 bool HasDotProd = false; 91 bool HasCRC = false; 92 bool HasLSE = false; 93 bool HasRAS = false; 94 bool HasRDM = false; 95 bool HasPerfMon = false; 96 bool HasFullFP16 = false; 97 bool HasFP16FML = false; 98 bool HasSPE = false; 99 100 // ARMv8.1 extensions 101 bool HasVH = false; 102 bool HasPAN = false; 103 bool HasLOR = false; 104 105 // ARMv8.2 extensions 106 bool HasPsUAO = false; 107 bool HasPAN_RWV = false; 108 bool HasCCPP = false; 109 110 // SVE extensions 111 bool HasSVE = false; 112 bool UseExperimentalZeroingPseudos = false; 113 114 // Armv8.2 Crypto extensions 115 bool HasSM4 = false; 116 bool HasSHA3 = false; 117 bool HasSHA2 = false; 118 bool HasAES = false; 119 120 // ARMv8.3 extensions 121 bool HasPA = false; 122 bool HasJS = false; 123 bool HasCCIDX = false; 124 bool HasComplxNum = false; 125 126 // ARMv8.4 extensions 127 bool HasNV = false; 128 bool HasRASv8_4 = false; 129 bool HasMPAM = false; 130 bool HasDIT = false; 131 bool HasTRACEV8_4 = false; 132 bool HasAM = false; 133 bool HasSEL2 = false; 134 bool HasPMU = false; 135 bool HasTLB_RMI = false; 136 bool HasFMI = false; 137 bool HasRCPC_IMMO = false; 138 139 bool HasLSLFast = false; 140 bool HasRCPC = false; 141 bool HasAggressiveFMA = false; 142 143 // Armv8.5-A Extensions 144 bool HasAlternativeNZCV = false; 145 bool HasFRInt3264 = false; 146 bool HasSpecRestrict = false; 147 bool HasSSBS = false; 148 bool HasSB = false; 149 bool HasPredRes = false; 150 bool HasCCDP = false; 151 bool HasBTI = false; 152 bool HasRandGen = false; 153 bool HasMTE = false; 154 bool HasTME = false; 155 156 // Armv8.6-A Extensions 157 bool HasBF16 = false; 158 bool HasMatMulInt8 = false; 159 bool HasMatMulFP32 = false; 160 bool HasMatMulFP64 = false; 161 bool HasAMVS = false; 162 bool HasFineGrainedTraps = false; 163 bool HasEnhancedCounterVirtualization = false; 164 165 // Arm SVE2 extensions 166 bool HasSVE2 = false; 167 bool HasSVE2AES = false; 168 bool HasSVE2SM4 = false; 169 bool HasSVE2SHA3 = false; 170 bool HasSVE2BitPerm = false; 171 172 // Future architecture extensions. 173 bool HasETE = false; 174 bool HasTRBE = false; 175 176 // HasZeroCycleRegMove - Has zero-cycle register mov instructions. 177 bool HasZeroCycleRegMove = false; 178 179 // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. 180 bool HasZeroCycleZeroing = false; 181 bool HasZeroCycleZeroingGP = false; 182 bool HasZeroCycleZeroingFP = false; 183 bool HasZeroCycleZeroingFPWorkaround = false; 184 185 // StrictAlign - Disallow unaligned memory accesses. 186 bool StrictAlign = false; 187 188 // NegativeImmediates - transform instructions with negative immediates 189 bool NegativeImmediates = true; 190 191 // Enable 64-bit vectorization in SLP. 192 unsigned MinVectorRegisterBitWidth = 64; 193 194 bool UseAA = false; 195 bool PredictableSelectIsExpensive = false; 196 bool BalanceFPOps = false; 197 bool CustomAsCheapAsMove = false; 198 bool ExynosAsCheapAsMove = false; 199 bool UsePostRAScheduler = false; 200 bool Misaligned128StoreIsSlow = false; 201 bool Paired128IsSlow = false; 202 bool STRQroIsSlow = false; 203 bool UseAlternateSExtLoadCVTF32Pattern = false; 204 bool HasArithmeticBccFusion = false; 205 bool HasArithmeticCbzFusion = false; 206 bool HasFuseAddress = false; 207 bool HasFuseAES = false; 208 bool HasFuseArithmeticLogic = false; 209 bool HasFuseCCSelect = false; 210 bool HasFuseCryptoEOR = false; 211 bool HasFuseLiterals = false; 212 bool DisableLatencySchedHeuristic = false; 213 bool UseRSqrt = false; 214 bool Force32BitJumpTables = false; 215 bool UseEL1ForTP = false; 216 bool UseEL2ForTP = false; 217 bool UseEL3ForTP = false; 218 bool AllowTaggedGlobals = false; 219 bool HardenSlsRetBr = false; 220 bool HardenSlsBlr = false; 221 uint8_t MaxInterleaveFactor = 2; 222 uint8_t VectorInsertExtractBaseCost = 3; 223 uint16_t CacheLineSize = 0; 224 uint16_t PrefetchDistance = 0; 225 uint16_t MinPrefetchStride = 1; 226 unsigned MaxPrefetchIterationsAhead = UINT_MAX; 227 unsigned PrefFunctionLogAlignment = 0; 228 unsigned PrefLoopLogAlignment = 0; 229 unsigned MaxJumpTableSize = 0; 230 unsigned WideningBaseCost = 0; 231 232 // ReserveXRegister[i] - X#i is not available as a general purpose register. 233 BitVector ReserveXRegister; 234 235 // CustomCallUsedXRegister[i] - X#i call saved. 236 BitVector CustomCallSavedXRegs; 237 238 bool IsLittle; 239 240 /// TargetTriple - What processor and OS we're targeting. 241 Triple TargetTriple; 242 243 AArch64FrameLowering FrameLowering; 244 AArch64InstrInfo InstrInfo; 245 AArch64SelectionDAGInfo TSInfo; 246 AArch64TargetLowering TLInfo; 247 248 /// GlobalISel related APIs. 249 std::unique_ptr<CallLowering> CallLoweringInfo; 250 std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 251 std::unique_ptr<InstructionSelector> InstSelector; 252 std::unique_ptr<LegalizerInfo> Legalizer; 253 std::unique_ptr<RegisterBankInfo> RegBankInfo; 254 255 private: 256 /// initializeSubtargetDependencies - Initializes using CPUString and the 257 /// passed in feature string so that we can use initializer lists for 258 /// subtarget initialization. 259 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, 260 StringRef CPUString); 261 262 /// Initialize properties based on the selected processor family. 263 void initializeProperties(); 264 265 public: 266 /// This constructor initializes the data members to match that 267 /// of the specified triple. 268 AArch64Subtarget(const Triple &TT, const std::string &CPU, 269 const std::string &FS, const TargetMachine &TM, 270 bool LittleEndian); 271 272 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { 273 return &TSInfo; 274 } 275 const AArch64FrameLowering *getFrameLowering() const override { 276 return &FrameLowering; 277 } 278 const AArch64TargetLowering *getTargetLowering() const override { 279 return &TLInfo; 280 } 281 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } 282 const AArch64RegisterInfo *getRegisterInfo() const override { 283 return &getInstrInfo()->getRegisterInfo(); 284 } 285 const CallLowering *getCallLowering() const override; 286 const InlineAsmLowering *getInlineAsmLowering() const override; 287 InstructionSelector *getInstructionSelector() const override; 288 const LegalizerInfo *getLegalizerInfo() const override; 289 const RegisterBankInfo *getRegBankInfo() const override; 290 const Triple &getTargetTriple() const { return TargetTriple; } 291 bool enableMachineScheduler() const override { return true; } 292 bool enablePostRAScheduler() const override { 293 return UsePostRAScheduler; 294 } 295 296 /// Returns ARM processor family. 297 /// Avoid this function! CPU specifics should be kept local to this class 298 /// and preferably modeled with SubtargetFeatures or properties in 299 /// initializeProperties(). 300 ARMProcFamilyEnum getProcFamily() const { 301 return ARMProcFamily; 302 } 303 304 bool hasV8_1aOps() const { return HasV8_1aOps; } 305 bool hasV8_2aOps() const { return HasV8_2aOps; } 306 bool hasV8_3aOps() const { return HasV8_3aOps; } 307 bool hasV8_4aOps() const { return HasV8_4aOps; } 308 bool hasV8_5aOps() const { return HasV8_5aOps; } 309 310 bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } 311 312 bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; } 313 314 bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; } 315 316 bool hasZeroCycleZeroingFPWorkaround() const { 317 return HasZeroCycleZeroingFPWorkaround; 318 } 319 320 bool requiresStrictAlign() const { return StrictAlign; } 321 322 bool isXRaySupported() const override { return true; } 323 324 unsigned getMinVectorRegisterBitWidth() const { 325 return MinVectorRegisterBitWidth; 326 } 327 328 bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; } 329 unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); } 330 bool isXRegCustomCalleeSaved(size_t i) const { 331 return CustomCallSavedXRegs[i]; 332 } 333 bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); } 334 bool hasFPARMv8() const { return HasFPARMv8; } 335 bool hasNEON() const { return HasNEON; } 336 bool hasCrypto() const { return HasCrypto; } 337 bool hasDotProd() const { return HasDotProd; } 338 bool hasCRC() const { return HasCRC; } 339 bool hasLSE() const { return HasLSE; } 340 bool hasRAS() const { return HasRAS; } 341 bool hasRDM() const { return HasRDM; } 342 bool hasSM4() const { return HasSM4; } 343 bool hasSHA3() const { return HasSHA3; } 344 bool hasSHA2() const { return HasSHA2; } 345 bool hasAES() const { return HasAES; } 346 bool balanceFPOps() const { return BalanceFPOps; } 347 bool predictableSelectIsExpensive() const { 348 return PredictableSelectIsExpensive; 349 } 350 bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } 351 bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; } 352 bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } 353 bool isPaired128Slow() const { return Paired128IsSlow; } 354 bool isSTRQroSlow() const { return STRQroIsSlow; } 355 bool useAlternateSExtLoadCVTF32Pattern() const { 356 return UseAlternateSExtLoadCVTF32Pattern; 357 } 358 bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } 359 bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } 360 bool hasFuseAddress() const { return HasFuseAddress; } 361 bool hasFuseAES() const { return HasFuseAES; } 362 bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } 363 bool hasFuseCCSelect() const { return HasFuseCCSelect; } 364 bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; } 365 bool hasFuseLiterals() const { return HasFuseLiterals; } 366 367 /// Return true if the CPU supports any kind of instruction fusion. 368 bool hasFusion() const { 369 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || 370 hasFuseAES() || hasFuseArithmeticLogic() || 371 hasFuseCCSelect() || hasFuseLiterals(); 372 } 373 374 bool hardenSlsRetBr() const { return HardenSlsRetBr; } 375 bool hardenSlsBlr() const { return HardenSlsBlr; } 376 377 bool useEL1ForTP() const { return UseEL1ForTP; } 378 bool useEL2ForTP() const { return UseEL2ForTP; } 379 bool useEL3ForTP() const { return UseEL3ForTP; } 380 381 bool useRSqrt() const { return UseRSqrt; } 382 bool force32BitJumpTables() const { return Force32BitJumpTables; } 383 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } 384 unsigned getVectorInsertExtractBaseCost() const { 385 return VectorInsertExtractBaseCost; 386 } 387 unsigned getCacheLineSize() const override { return CacheLineSize; } 388 unsigned getPrefetchDistance() const override { return PrefetchDistance; } 389 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 390 unsigned NumStridedMemAccesses, 391 unsigned NumPrefetches, 392 bool HasCall) const override { 393 return MinPrefetchStride; 394 } 395 unsigned getMaxPrefetchIterationsAhead() const override { 396 return MaxPrefetchIterationsAhead; 397 } 398 unsigned getPrefFunctionLogAlignment() const { 399 return PrefFunctionLogAlignment; 400 } 401 unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } 402 403 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } 404 405 unsigned getWideningBaseCost() const { return WideningBaseCost; } 406 407 bool useExperimentalZeroingPseudos() const { 408 return UseExperimentalZeroingPseudos; 409 } 410 411 /// CPU has TBI (top byte of addresses is ignored during HW address 412 /// translation) and OS enables it. 413 bool supportsAddressTopByteIgnored() const; 414 415 bool hasPerfMon() const { return HasPerfMon; } 416 bool hasFullFP16() const { return HasFullFP16; } 417 bool hasFP16FML() const { return HasFP16FML; } 418 bool hasSPE() const { return HasSPE; } 419 bool hasLSLFast() const { return HasLSLFast; } 420 bool hasSVE() const { return HasSVE; } 421 bool hasSVE2() const { return HasSVE2; } 422 bool hasRCPC() const { return HasRCPC; } 423 bool hasAggressiveFMA() const { return HasAggressiveFMA; } 424 bool hasAlternativeNZCV() const { return HasAlternativeNZCV; } 425 bool hasFRInt3264() const { return HasFRInt3264; } 426 bool hasSpecRestrict() const { return HasSpecRestrict; } 427 bool hasSSBS() const { return HasSSBS; } 428 bool hasSB() const { return HasSB; } 429 bool hasPredRes() const { return HasPredRes; } 430 bool hasCCDP() const { return HasCCDP; } 431 bool hasBTI() const { return HasBTI; } 432 bool hasRandGen() const { return HasRandGen; } 433 bool hasMTE() const { return HasMTE; } 434 bool hasTME() const { return HasTME; } 435 // Arm SVE2 extensions 436 bool hasSVE2AES() const { return HasSVE2AES; } 437 bool hasSVE2SM4() const { return HasSVE2SM4; } 438 bool hasSVE2SHA3() const { return HasSVE2SHA3; } 439 bool hasSVE2BitPerm() const { return HasSVE2BitPerm; } 440 bool hasMatMulInt8() const { return HasMatMulInt8; } 441 bool hasMatMulFP32() const { return HasMatMulFP32; } 442 bool hasMatMulFP64() const { return HasMatMulFP64; } 443 444 // Armv8.6-A Extensions 445 bool hasBF16() const { return HasBF16; } 446 bool hasFineGrainedTraps() const { return HasFineGrainedTraps; } 447 bool hasEnhancedCounterVirtualization() const { 448 return HasEnhancedCounterVirtualization; 449 } 450 451 bool isLittleEndian() const { return IsLittle; } 452 453 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } 454 bool isTargetIOS() const { return TargetTriple.isiOS(); } 455 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } 456 bool isTargetWindows() const { return TargetTriple.isOSWindows(); } 457 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } 458 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } 459 460 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } 461 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } 462 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 463 464 bool isTargetILP32() const { return TargetTriple.isArch32Bit(); } 465 466 bool useAA() const override { return UseAA; } 467 468 bool hasVH() const { return HasVH; } 469 bool hasPAN() const { return HasPAN; } 470 bool hasLOR() const { return HasLOR; } 471 472 bool hasPsUAO() const { return HasPsUAO; } 473 bool hasPAN_RWV() const { return HasPAN_RWV; } 474 bool hasCCPP() const { return HasCCPP; } 475 476 bool hasPA() const { return HasPA; } 477 bool hasJS() const { return HasJS; } 478 bool hasCCIDX() const { return HasCCIDX; } 479 bool hasComplxNum() const { return HasComplxNum; } 480 481 bool hasNV() const { return HasNV; } 482 bool hasRASv8_4() const { return HasRASv8_4; } 483 bool hasMPAM() const { return HasMPAM; } 484 bool hasDIT() const { return HasDIT; } 485 bool hasTRACEV8_4() const { return HasTRACEV8_4; } 486 bool hasAM() const { return HasAM; } 487 bool hasAMVS() const { return HasAMVS; } 488 bool hasSEL2() const { return HasSEL2; } 489 bool hasPMU() const { return HasPMU; } 490 bool hasTLB_RMI() const { return HasTLB_RMI; } 491 bool hasFMI() const { return HasFMI; } 492 bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } 493 494 bool addrSinkUsingGEPs() const override { 495 // Keeping GEPs inbounds is important for exploiting AArch64 496 // addressing-modes in ILP32 mode. 497 return useAA() || isTargetILP32(); 498 } 499 500 bool useSmallAddressing() const { 501 switch (TLInfo.getTargetMachine().getCodeModel()) { 502 case CodeModel::Kernel: 503 // Kernel is currently allowed only for Fuchsia targets, 504 // where it is the same as Small for almost all purposes. 505 case CodeModel::Small: 506 return true; 507 default: 508 return false; 509 } 510 } 511 512 /// ParseSubtargetFeatures - Parses features string setting specified 513 /// subtarget options. Definition of function is auto generated by tblgen. 514 void ParseSubtargetFeatures(StringRef CPU, StringRef FS); 515 516 /// ClassifyGlobalReference - Find the target operand flags that describe 517 /// how a global value should be referenced for the current subtarget. 518 unsigned ClassifyGlobalReference(const GlobalValue *GV, 519 const TargetMachine &TM) const; 520 521 unsigned classifyGlobalFunctionReference(const GlobalValue *GV, 522 const TargetMachine &TM) const; 523 524 void overrideSchedPolicy(MachineSchedPolicy &Policy, 525 unsigned NumRegionInstrs) const override; 526 527 bool enableEarlyIfConversion() const override; 528 529 bool enableAdvancedRASplitCost() const override { return true; } 530 531 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; 532 533 bool isCallingConvWin64(CallingConv::ID CC) const { 534 switch (CC) { 535 case CallingConv::C: 536 case CallingConv::Fast: 537 case CallingConv::Swift: 538 return isTargetWindows(); 539 case CallingConv::Win64: 540 return true; 541 default: 542 return false; 543 } 544 } 545 546 void mirFileLoaded(MachineFunction &MF) const override; 547 548 // Return the known range for the bit length of SVE data registers. A value 549 // of 0 means nothing is known about that particular limit beyong what's 550 // implied by the architecture. 551 unsigned getMaxSVEVectorSizeInBits() const; 552 unsigned getMinSVEVectorSizeInBits() const; 553 }; 554 } // End llvm namespace 555 556 #endif 557