1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "llvm/CodeGen/CallingConvLower.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/IR/CallingConv.h" 22 #include "llvm/IR/Instruction.h" 23 24 namespace llvm { 25 26 namespace AArch64 { 27 /// Possible values of current rounding mode, which is specified in bits 28 /// 23:22 of FPCR. 29 enum Rounding { 30 RN = 0, // Round to Nearest 31 RP = 1, // Round towards Plus infinity 32 RM = 2, // Round towards Minus infinity 33 RZ = 3, // Round towards Zero 34 rmMask = 3 // Bit mask selecting rounding mode 35 }; 36 37 // Bit position of rounding mode bits in FPCR. 38 const unsigned RoundingBitsPos = 22; 39 40 // Reserved bits should be preserved when modifying FPCR. 41 const uint64_t ReservedFPControlBits = 0xfffffffff80040f8; 42 43 // Registers used to pass function arguments. 44 ArrayRef<MCPhysReg> getGPRArgRegs(); 45 ArrayRef<MCPhysReg> getFPRArgRegs(); 46 47 /// Maximum allowed number of unprobed bytes above SP at an ABI 48 /// boundary. 49 const unsigned StackProbeMaxUnprobedStack = 1024; 50 51 /// Maximum number of iterations to unroll for a constant size probing loop. 52 const unsigned StackProbeMaxLoopUnroll = 4; 53 54 } // namespace AArch64 55 56 namespace ARM64AS { 57 enum : unsigned { PTR32_SPTR = 270, PTR32_UPTR = 271, PTR64 = 272 }; 58 } 59 60 class AArch64Subtarget; 61 62 class AArch64TargetLowering : public TargetLowering { 63 public: 64 explicit AArch64TargetLowering(const TargetMachine &TM, 65 const AArch64Subtarget &STI); 66 67 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 68 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 69 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 70 SDValue N1) const override; 71 72 /// Selects the correct CCAssignFn for a given CallingConvention value. 73 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 74 75 /// Selects the correct CCAssignFn for a given CallingConvention value. 76 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 77 78 /// Determine which of the bits specified in Mask are known to be either zero 79 /// or one and return them in the KnownZero/KnownOne bitsets. 80 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 81 const APInt &DemandedElts, 82 const SelectionDAG &DAG, 83 unsigned Depth = 0) const override; 84 85 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 86 const APInt &DemandedElts, 87 const SelectionDAG &DAG, 88 unsigned Depth) const override; 89 90 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 91 if ((AS == ARM64AS::PTR32_SPTR) || (AS == ARM64AS::PTR32_UPTR)) { 92 // These are 32-bit pointers created using the `__ptr32` extension or 93 // similar. They are handled by marking them as being in a different 94 // address space, and will be extended to 64-bits when used as the target 95 // of a load or store operation, or cast to a 64-bit pointer type. 96 return MVT::i32; 97 } else { 98 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 99 // *DAG* representation of pointers will always be 64-bits. They will be 100 // truncated and extended when transferred to memory, but the 64-bit DAG 101 // allows us to use AArch64's addressing modes much more easily. 102 return MVT::i64; 103 } 104 } 105 getVectorIdxWidth(const DataLayout & DL)106 unsigned getVectorIdxWidth(const DataLayout &DL) const override { 107 // The VectorIdx type is i64, with both normal and ilp32. 108 return 64; 109 } 110 111 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 112 const APInt &DemandedElts, 113 TargetLoweringOpt &TLO) const override; 114 115 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 116 117 /// Returns true if the target allows unaligned memory accesses of the 118 /// specified type. 119 bool allowsMisalignedMemoryAccesses( 120 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 121 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 122 unsigned *Fast = nullptr) const override; 123 /// LLT variant. 124 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 125 Align Alignment, 126 MachineMemOperand::Flags Flags, 127 unsigned *Fast = nullptr) const override; 128 129 /// Provide custom lowering hooks for some operations. 130 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 131 132 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 133 134 /// This method returns a target specific FastISel object, or null if the 135 /// target does not support "fast" ISel. 136 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 137 const TargetLibraryInfo *libInfo) const override; 138 139 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 140 141 bool isFPImmLegal(const APFloat &Imm, EVT VT, 142 bool ForCodeSize) const override; 143 144 /// Return true if the given shuffle mask can be codegen'd directly, or if it 145 /// should be stack expanded. 146 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 147 148 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 149 /// shuffle mask can be codegen'd directly. 150 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 151 152 /// Return the ISD::SETCC ValueType. 153 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 154 EVT VT) const override; 155 156 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 157 158 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 159 MachineBasicBlock *BB) const; 160 161 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 162 MachineBasicBlock *BB) const; 163 164 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, 165 MachineBasicBlock *MBB) const; 166 167 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 168 MachineInstr &MI, 169 MachineBasicBlock *BB) const; 170 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 171 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 172 MachineInstr &MI, MachineBasicBlock *BB) const; 173 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, 174 unsigned Opcode, bool Op0IsDef) const; 175 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 176 MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI, 177 MachineBasicBlock *BB) const; 178 MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI, 179 MachineBasicBlock *BB) const; 180 MachineBasicBlock *EmitAllocateSMESaveBuffer(MachineInstr &MI, 181 MachineBasicBlock *BB) const; 182 MachineBasicBlock *EmitGetSMESaveSize(MachineInstr &MI, 183 MachineBasicBlock *BB) const; 184 185 MachineBasicBlock * 186 EmitInstrWithCustomInserter(MachineInstr &MI, 187 MachineBasicBlock *MBB) const override; 188 189 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 190 MachineFunction &MF, 191 unsigned Intrinsic) const override; 192 193 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, 194 std::optional<unsigned> ByteOffset) const override; 195 196 bool shouldRemoveRedundantExtend(SDValue Op) const override; 197 198 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 199 bool isTruncateFree(EVT VT1, EVT VT2) const override; 200 201 bool isProfitableToHoist(Instruction *I) const override; 202 203 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 204 bool isZExtFree(EVT VT1, EVT VT2) const override; 205 bool isZExtFree(SDValue Val, EVT VT2) const override; 206 207 bool optimizeExtendOrTruncateConversion( 208 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; 209 210 bool hasPairedLoad(EVT LoadedType, Align &RequiredAlignment) const override; 211 getMaxSupportedInterleaveFactor()212 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 213 214 bool lowerInterleavedLoad(LoadInst *LI, 215 ArrayRef<ShuffleVectorInst *> Shuffles, 216 ArrayRef<unsigned> Indices, 217 unsigned Factor) const override; 218 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 219 unsigned Factor) const override; 220 221 bool lowerDeinterleaveIntrinsicToLoad( 222 LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override; 223 224 bool lowerInterleaveIntrinsicToStore( 225 StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override; 226 227 bool isLegalAddImmediate(int64_t) const override; 228 bool isLegalAddScalableImmediate(int64_t) const override; 229 bool isLegalICmpImmediate(int64_t) const override; 230 231 bool isMulAddWithConstProfitable(SDValue AddNode, 232 SDValue ConstNode) const override; 233 234 bool shouldConsiderGEPOffsetSplit() const override; 235 236 EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, 237 const AttributeList &FuncAttributes) const override; 238 239 LLT getOptimalMemOpLLT(const MemOp &Op, 240 const AttributeList &FuncAttributes) const override; 241 242 /// Return true if the addressing mode represented by AM is legal for this 243 /// target, for a load/store of the specified type. 244 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 245 unsigned AS, 246 Instruction *I = nullptr) const override; 247 248 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, 249 int64_t MaxOffset) const override; 250 251 /// Return true if an FMA operation is faster than a pair of fmul and fadd 252 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 253 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 254 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 255 EVT VT) const override; 256 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 257 258 bool generateFMAsInMachineCombiner(EVT VT, 259 CodeGenOptLevel OptLevel) const override; 260 261 /// Return true if the target has native support for 262 /// the specified value type and it is 'desirable' to use the type for the 263 /// given node type. 264 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 265 266 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 267 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 268 269 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 270 bool isDesirableToCommuteWithShift(const SDNode *N, 271 CombineLevel Level) const override; 272 isDesirableToPullExtFromShl(const MachineInstr & MI)273 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { 274 return false; 275 } 276 277 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 278 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 279 280 /// Return true if it is profitable to fold a pair of shifts into a mask. 281 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 282 CombineLevel Level) const override; 283 284 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, 285 unsigned SelectOpcode, SDValue X, 286 SDValue Y) const override; 287 288 /// Returns true if it is beneficial to convert a load of a constant 289 /// to just the constant itself. 290 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 291 Type *Ty) const override; 292 293 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 294 /// with this index. 295 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 296 unsigned Index) const override; 297 shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)298 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 299 bool MathUsed) const override { 300 // Using overflow ops for overflow checks only should beneficial on 301 // AArch64. 302 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 303 } 304 305 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 306 AtomicOrdering Ord) const override; 307 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 308 AtomicOrdering Ord) const override; 309 310 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 311 312 bool isOpSuitableForLDPSTP(const Instruction *I) const; 313 bool isOpSuitableForLSE128(const Instruction *I) const; 314 bool isOpSuitableForRCPC3(const Instruction *I) const; 315 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 316 bool 317 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 318 319 TargetLoweringBase::AtomicExpansionKind 320 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 321 TargetLoweringBase::AtomicExpansionKind 322 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 323 TargetLoweringBase::AtomicExpansionKind 324 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 325 326 TargetLoweringBase::AtomicExpansionKind 327 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 328 329 bool useLoadStackGuardNode(const Module &M) const override; 330 TargetLoweringBase::LegalizeTypeAction 331 getPreferredVectorAction(MVT VT) const override; 332 333 /// If the target has a standard location for the stack protector cookie, 334 /// returns the address of that location. Otherwise, returns nullptr. 335 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 336 337 void insertSSPDeclarations(Module &M) const override; 338 Value *getSDagStackGuard(const Module &M) const override; 339 Function *getSSPStackGuardCheck(const Module &M) const override; 340 341 /// If the target has a standard location for the unsafe stack pointer, 342 /// returns the address of that location. Otherwise, returns nullptr. 343 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 344 345 /// If a physical register, this returns the register that receives the 346 /// exception address on entry to an EH pad. 347 Register 348 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 349 350 /// If a physical register, this returns the register that receives the 351 /// exception typeid on entry to a landing pad. 352 Register 353 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 354 355 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 356 357 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 358 const MachineFunction &MF) const override; 359 isCheapToSpeculateCttz(Type *)360 bool isCheapToSpeculateCttz(Type *) const override { 361 return true; 362 } 363 isCheapToSpeculateCtlz(Type *)364 bool isCheapToSpeculateCtlz(Type *) const override { 365 return true; 366 } 367 368 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 369 hasAndNotCompare(SDValue V)370 bool hasAndNotCompare(SDValue V) const override { 371 // We can use bics for any scalar. 372 return V.getValueType().isScalarInteger(); 373 } 374 hasAndNot(SDValue Y)375 bool hasAndNot(SDValue Y) const override { 376 EVT VT = Y.getValueType(); 377 378 if (!VT.isVector()) 379 return hasAndNotCompare(Y); 380 381 if (VT.isScalableVector()) 382 return true; 383 384 return VT.getFixedSizeInBits() >= 64; // vector 'bic' 385 } 386 387 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 388 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 389 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 390 SelectionDAG &DAG) const override; 391 392 ShiftLegalizationStrategy 393 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 394 unsigned ExpansionFactor) const override; 395 shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)396 bool shouldTransformSignedTruncationCheck(EVT XVT, 397 unsigned KeptBits) const override { 398 // For vectors, we don't have a preference.. 399 if (XVT.isVector()) 400 return false; 401 402 auto VTIsOk = [](EVT VT) -> bool { 403 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 404 VT == MVT::i64; 405 }; 406 407 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 408 // XVT will be larger than KeptBitsVT. 409 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 410 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 411 } 412 413 bool preferIncOfAddToSubOfNot(EVT VT) const override; 414 415 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 416 417 bool shouldExpandCmpUsingSelects(EVT VT) const override; 418 419 bool isComplexDeinterleavingSupported() const override; 420 bool isComplexDeinterleavingOperationSupported( 421 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 422 423 Value *createComplexDeinterleavingIR( 424 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 425 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 426 Value *Accumulator = nullptr) const override; 427 supportSplitCSR(MachineFunction * MF)428 bool supportSplitCSR(MachineFunction *MF) const override { 429 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 430 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 431 } 432 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 433 void insertCopiesSplitCSR( 434 MachineBasicBlock *Entry, 435 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 436 supportSwiftError()437 bool supportSwiftError() const override { 438 return true; 439 } 440 supportPtrAuthBundles()441 bool supportPtrAuthBundles() const override { return true; } 442 supportKCFIBundles()443 bool supportKCFIBundles() const override { return true; } 444 445 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 446 MachineBasicBlock::instr_iterator &MBBI, 447 const TargetInstrInfo *TII) const override; 448 449 /// Enable aggressive FMA fusion on targets that want it. 450 bool enableAggressiveFMAFusion(EVT VT) const override; 451 aggressivelyPreferBuildVectorSources(EVT VecVT)452 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { 453 return true; 454 } 455 456 /// Returns the size of the platform's va_list object. 457 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 458 459 /// Returns true if \p VecTy is a legal interleaved access type. This 460 /// function checks the vector element type and the overall width of the 461 /// vector. 462 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 463 bool &UseScalable) const; 464 465 /// Returns the number of interleaved accesses that will be generated when 466 /// lowering accesses of the given type. 467 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 468 bool UseScalable) const; 469 470 MachineMemOperand::Flags getTargetMMOFlags( 471 const Instruction &I) const override; 472 473 bool functionArgumentNeedsConsecutiveRegisters( 474 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 475 const DataLayout &DL) const override; 476 477 /// Used for exception handling on Win64. 478 bool needsFixedCatchObjects() const override; 479 480 bool fallBackToDAGISel(const Instruction &Inst) const override; 481 482 /// SVE code generation for fixed length vectors does not custom lower 483 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 484 /// merge. However, merging them creates a BUILD_VECTOR that is just as 485 /// illegal as the original, thus leading to an infinite legalisation loop. 486 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 487 /// vector types this override can be removed. 488 bool mergeStoresAfterLegalization(EVT VT) const override; 489 490 // If the platform/function should have a redzone, return the size in bytes. getRedZoneSize(const Function & F)491 unsigned getRedZoneSize(const Function &F) const { 492 if (F.hasFnAttribute(Attribute::NoRedZone)) 493 return 0; 494 return 128; 495 } 496 497 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 498 EVT getPromotedVTForPredicate(EVT VT) const; 499 500 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 501 bool AllowUnknown = false) const override; 502 503 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 504 505 bool 506 shouldExpandPartialReductionIntrinsic(const IntrinsicInst *I) const override; 507 508 bool shouldExpandCttzElements(EVT VT) const override; 509 510 bool shouldExpandVectorMatch(EVT VT, unsigned SearchSize) const override; 511 512 /// If a change in streaming mode is required on entry to/return from a 513 /// function call it emits and returns the corresponding SMSTART or SMSTOP 514 /// node. \p Condition should be one of the enum values from 515 /// AArch64SME::ToggleCondition. 516 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 517 SDValue Chain, SDValue InGlue, unsigned Condition, 518 SDValue PStateSM = SDValue()) const; 519 isVScaleKnownToBeAPowerOfTwo()520 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 521 522 // Normally SVE is only used for byte size vectors that do not fit within a 523 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 524 // used for 64bit and 128bit vectors as well. 525 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 526 527 // Follow NEON ABI rules even when using SVE for fixed length vectors. 528 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 529 EVT VT) const override; 530 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 531 CallingConv::ID CC, 532 EVT VT) const override; 533 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 534 CallingConv::ID CC, EVT VT, 535 EVT &IntermediateVT, 536 unsigned &NumIntermediates, 537 MVT &RegisterVT) const override; 538 539 /// True if stack clash protection is enabled for this functions. 540 bool hasInlineStackProbe(const MachineFunction &MF) const override; 541 542 /// In AArch64, true if FEAT_CPA is present. Allows pointer arithmetic 543 /// semantics to be preserved for instruction selection. 544 bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override; 545 546 private: 547 /// Keep a pointer to the AArch64Subtarget around so that we can 548 /// make the right decision when generating code for different targets. 549 const AArch64Subtarget *Subtarget; 550 551 bool isExtFreeImpl(const Instruction *Ext) const override; 552 553 void addTypeForNEON(MVT VT); 554 void addTypeForFixedLengthSVE(MVT VT); 555 void addDRType(MVT VT); 556 void addQRType(MVT VT); 557 558 bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; 559 560 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 561 bool isVarArg, 562 const SmallVectorImpl<ISD::InputArg> &Ins, 563 const SDLoc &DL, SelectionDAG &DAG, 564 SmallVectorImpl<SDValue> &InVals) const override; 565 566 void AdjustInstrPostInstrSelection(MachineInstr &MI, 567 SDNode *Node) const override; 568 569 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 570 SmallVectorImpl<SDValue> &InVals) const override; 571 572 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 573 CallingConv::ID CallConv, bool isVarArg, 574 const SmallVectorImpl<CCValAssign> &RVLocs, 575 const SDLoc &DL, SelectionDAG &DAG, 576 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 577 SDValue ThisVal, bool RequiresSMChange) const; 578 579 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 580 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 581 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 582 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 583 584 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 585 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 586 587 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 588 589 SDValue LowerVECTOR_COMPRESS(SDValue Op, SelectionDAG &DAG) const; 590 591 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 592 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 593 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 594 595 bool 596 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 597 598 /// Finds the incoming stack arguments which overlap the given fixed stack 599 /// object and incorporates their load into the current chain. This prevents 600 /// an upcoming store from clobbering the stack argument before it's used. 601 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 602 MachineFrameInfo &MFI, int ClobberedFI) const; 603 604 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 605 606 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 607 SDValue &Chain) const; 608 609 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 610 bool isVarArg, 611 const SmallVectorImpl<ISD::OutputArg> &Outs, 612 LLVMContext &Context, const Type *RetTy) const override; 613 614 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 615 const SmallVectorImpl<ISD::OutputArg> &Outs, 616 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 617 SelectionDAG &DAG) const override; 618 619 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 620 unsigned Flag) const; 621 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 622 unsigned Flag) const; 623 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 624 unsigned Flag) const; 625 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 626 unsigned Flag) const; 627 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, 628 unsigned Flag) const; 629 template <class NodeTy> 630 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 631 template <class NodeTy> 632 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 633 template <class NodeTy> 634 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 635 template <class NodeTy> 636 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 637 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 638 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 639 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 640 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 641 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 642 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 643 const SDLoc &DL, SelectionDAG &DAG) const; 644 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 645 SelectionDAG &DAG) const; 646 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 647 SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 648 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 649 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 650 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 651 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 652 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 653 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 654 SDValue TVal, SDValue FVal, 655 iterator_range<SDNode::user_iterator> Users, 656 bool HasNoNans, const SDLoc &dl, 657 SelectionDAG &DAG) const; 658 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 659 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 660 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 661 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 662 SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; 663 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 664 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 665 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 666 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 667 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 668 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 669 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 670 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 671 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 672 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 673 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 674 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 675 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 676 SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 677 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 678 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 679 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 680 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 681 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 682 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 683 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 684 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 685 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 686 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 687 unsigned NewOp) const; 688 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 689 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 690 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 691 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 692 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 693 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 694 SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; 695 SDValue LowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; 696 SDValue LowerGET_ACTIVE_LANE_MASK(SDValue Op, SelectionDAG &DAG) const; 697 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 698 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 699 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 700 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 701 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 702 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 703 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 704 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 705 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 706 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 707 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 708 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 709 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 710 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 711 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 712 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 713 SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; 714 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 715 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 716 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 717 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 718 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 719 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 720 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 721 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 722 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 723 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 724 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 725 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 726 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 727 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 728 729 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; 730 731 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 732 SelectionDAG &DAG) const; 733 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 734 SelectionDAG &DAG) const; 735 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 736 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 737 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 738 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 739 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 740 SelectionDAG &DAG) const; 741 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 742 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 743 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 744 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 745 SelectionDAG &DAG) const; 746 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 747 SelectionDAG &DAG) const; 748 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 749 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 750 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 751 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 752 SelectionDAG &DAG) const; 753 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 754 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 755 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 756 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 757 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 758 SelectionDAG &DAG) const; 759 SDValue LowerFixedLengthBuildVectorToSVE(SDValue Op, SelectionDAG &DAG) const; 760 761 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 762 SmallVectorImpl<SDNode *> &Created) const override; 763 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 764 SmallVectorImpl<SDNode *> &Created) const override; 765 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 766 int &ExtraSteps, bool &UseOneConst, 767 bool Reciprocal) const override; 768 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 769 int &ExtraSteps) const override; 770 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 771 const DenormalMode &Mode) const override; 772 SDValue getSqrtResultForDenormInput(SDValue Operand, 773 SelectionDAG &DAG) const override; 774 unsigned combineRepeatedFPDivisors() const override; 775 776 ConstraintType getConstraintType(StringRef Constraint) const override; 777 Register getRegisterByName(const char* RegName, LLT VT, 778 const MachineFunction &MF) const override; 779 780 /// Examine constraint string and operand type and determine a weight value. 781 /// The operand object must already have been set up with the operand type. 782 ConstraintWeight 783 getSingleConstraintMatchWeight(AsmOperandInfo &info, 784 const char *constraint) const override; 785 786 std::pair<unsigned, const TargetRegisterClass *> 787 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 788 StringRef Constraint, MVT VT) const override; 789 790 const char *LowerXConstraint(EVT ConstraintVT) const override; 791 792 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 793 std::vector<SDValue> &Ops, 794 SelectionDAG &DAG) const override; 795 796 InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode)797 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 798 if (ConstraintCode == "Q") 799 return InlineAsm::ConstraintCode::Q; 800 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 801 // followed by llvm_unreachable so we'll leave them unimplemented in 802 // the backend for now. 803 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 804 } 805 806 /// Handle Lowering flag assembly outputs. 807 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 808 const SDLoc &DL, 809 const AsmOperandInfo &Constraint, 810 SelectionDAG &DAG) const override; 811 812 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 813 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 814 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 815 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 816 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 817 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 818 SDValue &Offset, SelectionDAG &DAG) const; 819 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 820 ISD::MemIndexedMode &AM, 821 SelectionDAG &DAG) const override; 822 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 823 SDValue &Offset, ISD::MemIndexedMode &AM, 824 SelectionDAG &DAG) const override; 825 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 826 bool IsPre, MachineRegisterInfo &MRI) const override; 827 828 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 829 SelectionDAG &DAG) const override; 830 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 831 SelectionDAG &DAG) const; 832 void ReplaceExtractSubVectorResults(SDNode *N, 833 SmallVectorImpl<SDValue> &Results, 834 SelectionDAG &DAG) const; 835 void ReplaceGetActiveLaneMaskResults(SDNode *N, 836 SmallVectorImpl<SDValue> &Results, 837 SelectionDAG &DAG) const; 838 839 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 840 841 void finalizeLowering(MachineFunction &MF) const override; 842 843 bool shouldLocalize(const MachineInstr &MI, 844 const TargetTransformInfo *TTI) const override; 845 846 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 847 const APInt &OriginalDemandedBits, 848 const APInt &OriginalDemandedElts, 849 KnownBits &Known, 850 TargetLoweringOpt &TLO, 851 unsigned Depth) const override; 852 853 bool isTargetCanonicalConstantNode(SDValue Op) const override; 854 855 // With the exception of data-predicate transitions, no instructions are 856 // required to cast between legal scalable vector types. However: 857 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 858 // is not universally useable. 859 // 2. Most unpacked integer types are not legal and thus integer extends 860 // cannot be used to convert between unpacked and packed types. 861 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 862 // to transition between unpacked and packed types of the same element type, 863 // with BITCAST used otherwise. 864 // This function does not handle predicate bitcasts. 865 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 866 867 // Returns the runtime value for PSTATE.SM by generating a call to 868 // __arm_sme_state. 869 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, 870 EVT VT) const; 871 872 bool preferScalarizeSplat(SDNode *N) const override; 873 874 unsigned getMinimumJumpTableEntries() const override; 875 softPromoteHalfType()876 bool softPromoteHalfType() const override { return true; } 877 shouldScalarizeBinop(SDValue VecOp)878 bool shouldScalarizeBinop(SDValue VecOp) const override { 879 return VecOp.getOpcode() == ISD::SETCC; 880 } 881 }; 882 883 namespace AArch64 { 884 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 885 const TargetLibraryInfo *libInfo); 886 } // end namespace AArch64 887 888 } // end namespace llvm 889 890 #endif 891