1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/IR/CallingConv.h" 22 #include "llvm/IR/Instruction.h" 23 24 namespace llvm { 25 26 namespace AArch64ISD { 27 28 // For predicated nodes where the result is a vector, the operation is 29 // controlled by a governing predicate and the inactive lanes are explicitly 30 // defined with a value, please stick the following naming convention: 31 // 32 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 33 // to source operand OP<n>. 34 // 35 // _MERGE_ZERO The result value is a vector with inactive lanes 36 // actively zeroed. 37 // 38 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 39 // to the last source operand which only purpose is being 40 // a passthru value. 41 // 42 // For other cases where no explicit action is needed to set the inactive lanes, 43 // or when the result is not a vector and it is needed or helpful to 44 // distinguish a node from similar unpredicated nodes, use: 45 // 46 // _PRED 47 // 48 enum NodeType : unsigned { 49 FIRST_NUMBER = ISD::BUILTIN_OP_END, 50 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 51 CALL, // Function call. 52 53 // Produces the full sequence of instructions for getting the thread pointer 54 // offset of a variable into X0, using the TLSDesc model. 55 TLSDESC_CALLSEQ, 56 ADRP, // Page address of a TargetGlobalAddress operand. 57 ADR, // ADR 58 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 59 LOADgot, // Load from automatically generated descriptor (e.g. Global 60 // Offset Table, TLS record). 61 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 62 BRCOND, // Conditional branch instruction; "b.cond". 63 CSEL, 64 FCSEL, // Conditional move instruction. 65 CSINV, // Conditional select invert. 66 CSNEG, // Conditional select negate. 67 CSINC, // Conditional select increment. 68 69 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 70 // ELF. 71 THREAD_POINTER, 72 ADC, 73 SBC, // adc, sbc instructions 74 75 // Arithmetic instructions 76 ADD_PRED, 77 FADD_PRED, 78 SDIV_PRED, 79 UDIV_PRED, 80 FMA_PRED, 81 SMIN_MERGE_OP1, 82 UMIN_MERGE_OP1, 83 SMAX_MERGE_OP1, 84 UMAX_MERGE_OP1, 85 SHL_MERGE_OP1, 86 SRL_MERGE_OP1, 87 SRA_MERGE_OP1, 88 89 SETCC_MERGE_ZERO, 90 91 // Arithmetic instructions which write flags. 92 ADDS, 93 SUBS, 94 ADCS, 95 SBCS, 96 ANDS, 97 98 // Conditional compares. Operands: left,right,falsecc,cc,flags 99 CCMP, 100 CCMN, 101 FCCMP, 102 103 // Floating point comparison 104 FCMP, 105 106 // Scalar extract 107 EXTR, 108 109 // Scalar-to-vector duplication 110 DUP, 111 DUPLANE8, 112 DUPLANE16, 113 DUPLANE32, 114 DUPLANE64, 115 116 // Vector immedate moves 117 MOVI, 118 MOVIshift, 119 MOVIedit, 120 MOVImsl, 121 FMOV, 122 MVNIshift, 123 MVNImsl, 124 125 // Vector immediate ops 126 BICi, 127 ORRi, 128 129 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 130 // element must be identical. 131 BSP, 132 133 // Vector arithmetic negation 134 NEG, 135 136 // Vector shuffles 137 ZIP1, 138 ZIP2, 139 UZP1, 140 UZP2, 141 TRN1, 142 TRN2, 143 REV16, 144 REV32, 145 REV64, 146 EXT, 147 148 // Vector shift by scalar 149 VSHL, 150 VLSHR, 151 VASHR, 152 153 // Vector shift by scalar (again) 154 SQSHL_I, 155 UQSHL_I, 156 SQSHLU_I, 157 SRSHR_I, 158 URSHR_I, 159 160 // Vector shift by constant and insert 161 VSLI, 162 VSRI, 163 164 // Vector comparisons 165 CMEQ, 166 CMGE, 167 CMGT, 168 CMHI, 169 CMHS, 170 FCMEQ, 171 FCMGE, 172 FCMGT, 173 174 // Vector zero comparisons 175 CMEQz, 176 CMGEz, 177 CMGTz, 178 CMLEz, 179 CMLTz, 180 FCMEQz, 181 FCMGEz, 182 FCMGTz, 183 FCMLEz, 184 FCMLTz, 185 186 // Vector across-lanes addition 187 // Only the lower result lane is defined. 188 SADDV, 189 UADDV, 190 191 // Vector rounding halving addition 192 SRHADD, 193 URHADD, 194 195 // Vector across-lanes min/max 196 // Only the lower result lane is defined. 197 SMINV, 198 UMINV, 199 SMAXV, 200 UMAXV, 201 202 SMAXV_PRED, 203 UMAXV_PRED, 204 SMINV_PRED, 205 UMINV_PRED, 206 ORV_PRED, 207 EORV_PRED, 208 ANDV_PRED, 209 210 // Vector bitwise negation 211 NOT, 212 213 // Vector bitwise insertion 214 BIT, 215 216 // Compare-and-branch 217 CBZ, 218 CBNZ, 219 TBZ, 220 TBNZ, 221 222 // Tail calls 223 TC_RETURN, 224 225 // Custom prefetch handling 226 PREFETCH, 227 228 // {s|u}int to FP within a FP register. 229 SITOF, 230 UITOF, 231 232 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 233 /// world w.r.t vectors; which causes additional REV instructions to be 234 /// generated to compensate for the byte-swapping. But sometimes we do 235 /// need to re-interpret the data in SIMD vector registers in big-endian 236 /// mode without emitting such REV instructions. 237 NVCAST, 238 239 SMULL, 240 UMULL, 241 242 // Reciprocal estimates and steps. 243 FRECPE, 244 FRECPS, 245 FRSQRTE, 246 FRSQRTS, 247 248 SUNPKHI, 249 SUNPKLO, 250 UUNPKHI, 251 UUNPKLO, 252 253 CLASTA_N, 254 CLASTB_N, 255 LASTA, 256 LASTB, 257 REV, 258 TBL, 259 260 // Floating-point reductions. 261 FADDA_PRED, 262 FADDV_PRED, 263 FMAXV_PRED, 264 FMAXNMV_PRED, 265 FMINV_PRED, 266 FMINNMV_PRED, 267 268 INSR, 269 PTEST, 270 PTRUE, 271 272 DUP_MERGE_PASSTHRU, 273 INDEX_VECTOR, 274 275 REINTERPRET_CAST, 276 277 LD1_MERGE_ZERO, 278 LD1S_MERGE_ZERO, 279 LDNF1_MERGE_ZERO, 280 LDNF1S_MERGE_ZERO, 281 LDFF1_MERGE_ZERO, 282 LDFF1S_MERGE_ZERO, 283 LD1RQ_MERGE_ZERO, 284 LD1RO_MERGE_ZERO, 285 286 // Structured loads. 287 SVE_LD2_MERGE_ZERO, 288 SVE_LD3_MERGE_ZERO, 289 SVE_LD4_MERGE_ZERO, 290 291 // Unsigned gather loads. 292 GLD1_MERGE_ZERO, 293 GLD1_SCALED_MERGE_ZERO, 294 GLD1_UXTW_MERGE_ZERO, 295 GLD1_SXTW_MERGE_ZERO, 296 GLD1_UXTW_SCALED_MERGE_ZERO, 297 GLD1_SXTW_SCALED_MERGE_ZERO, 298 GLD1_IMM_MERGE_ZERO, 299 300 // Signed gather loads 301 GLD1S_MERGE_ZERO, 302 GLD1S_SCALED_MERGE_ZERO, 303 GLD1S_UXTW_MERGE_ZERO, 304 GLD1S_SXTW_MERGE_ZERO, 305 GLD1S_UXTW_SCALED_MERGE_ZERO, 306 GLD1S_SXTW_SCALED_MERGE_ZERO, 307 GLD1S_IMM_MERGE_ZERO, 308 309 // Unsigned gather loads. 310 GLDFF1_MERGE_ZERO, 311 GLDFF1_SCALED_MERGE_ZERO, 312 GLDFF1_UXTW_MERGE_ZERO, 313 GLDFF1_SXTW_MERGE_ZERO, 314 GLDFF1_UXTW_SCALED_MERGE_ZERO, 315 GLDFF1_SXTW_SCALED_MERGE_ZERO, 316 GLDFF1_IMM_MERGE_ZERO, 317 318 // Signed gather loads. 319 GLDFF1S_MERGE_ZERO, 320 GLDFF1S_SCALED_MERGE_ZERO, 321 GLDFF1S_UXTW_MERGE_ZERO, 322 GLDFF1S_SXTW_MERGE_ZERO, 323 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 324 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 325 GLDFF1S_IMM_MERGE_ZERO, 326 327 // Non-temporal gather loads 328 GLDNT1_MERGE_ZERO, 329 GLDNT1_INDEX_MERGE_ZERO, 330 GLDNT1S_MERGE_ZERO, 331 332 // Contiguous masked store. 333 ST1_PRED, 334 335 // Scatter store 336 SST1_PRED, 337 SST1_SCALED_PRED, 338 SST1_UXTW_PRED, 339 SST1_SXTW_PRED, 340 SST1_UXTW_SCALED_PRED, 341 SST1_SXTW_SCALED_PRED, 342 SST1_IMM_PRED, 343 344 // Non-temporal scatter store 345 SSTNT1_PRED, 346 SSTNT1_INDEX_PRED, 347 348 // Strict (exception-raising) floating point comparison 349 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 350 STRICT_FCMPE, 351 352 // NEON Load/Store with post-increment base updates 353 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 354 LD3post, 355 LD4post, 356 ST2post, 357 ST3post, 358 ST4post, 359 LD1x2post, 360 LD1x3post, 361 LD1x4post, 362 ST1x2post, 363 ST1x3post, 364 ST1x4post, 365 LD1DUPpost, 366 LD2DUPpost, 367 LD3DUPpost, 368 LD4DUPpost, 369 LD1LANEpost, 370 LD2LANEpost, 371 LD3LANEpost, 372 LD4LANEpost, 373 ST2LANEpost, 374 ST3LANEpost, 375 ST4LANEpost, 376 377 STG, 378 STZG, 379 ST2G, 380 STZ2G, 381 382 LDP, 383 STP, 384 STNP 385 }; 386 387 } // end namespace AArch64ISD 388 389 namespace { 390 391 // Any instruction that defines a 32-bit result zeros out the high half of the 392 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 393 // be copying from a truncate. But any other 32-bit operation will zero-extend 394 // up to 64 bits. 395 // FIXME: X86 also checks for CMOV here. Do we need something similar? 396 static inline bool isDef32(const SDNode &N) { 397 unsigned Opc = N.getOpcode(); 398 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 399 Opc != ISD::CopyFromReg; 400 } 401 402 } // end anonymous namespace 403 404 class AArch64Subtarget; 405 class AArch64TargetMachine; 406 407 class AArch64TargetLowering : public TargetLowering { 408 public: 409 explicit AArch64TargetLowering(const TargetMachine &TM, 410 const AArch64Subtarget &STI); 411 412 /// Selects the correct CCAssignFn for a given CallingConvention value. 413 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 414 415 /// Selects the correct CCAssignFn for a given CallingConvention value. 416 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 417 418 /// Determine which of the bits specified in Mask are known to be either zero 419 /// or one and return them in the KnownZero/KnownOne bitsets. 420 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 421 const APInt &DemandedElts, 422 const SelectionDAG &DAG, 423 unsigned Depth = 0) const override; 424 425 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 426 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 427 // *DAG* representation of pointers will always be 64-bits. They will be 428 // truncated and extended when transferred to memory, but the 64-bit DAG 429 // allows us to use AArch64's addressing modes much more easily. 430 return MVT::getIntegerVT(64); 431 } 432 433 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 434 const APInt &DemandedElts, 435 TargetLoweringOpt &TLO) const override; 436 437 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 438 439 /// Returns true if the target allows unaligned memory accesses of the 440 /// specified type. 441 bool allowsMisalignedMemoryAccesses( 442 EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, 443 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 444 bool *Fast = nullptr) const override; 445 /// LLT variant. 446 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 447 Align Alignment, 448 MachineMemOperand::Flags Flags, 449 bool *Fast = nullptr) const override; 450 451 /// Provide custom lowering hooks for some operations. 452 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 453 454 const char *getTargetNodeName(unsigned Opcode) const override; 455 456 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 457 458 /// Returns true if a cast between SrcAS and DestAS is a noop. 459 bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { 460 // Addrspacecasts are always noops. 461 return true; 462 } 463 464 /// This method returns a target specific FastISel object, or null if the 465 /// target does not support "fast" ISel. 466 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 467 const TargetLibraryInfo *libInfo) const override; 468 469 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 470 471 bool isFPImmLegal(const APFloat &Imm, EVT VT, 472 bool ForCodeSize) const override; 473 474 /// Return true if the given shuffle mask can be codegen'd directly, or if it 475 /// should be stack expanded. 476 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 477 478 /// Return the ISD::SETCC ValueType. 479 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 480 EVT VT) const override; 481 482 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 483 484 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 485 MachineBasicBlock *BB) const; 486 487 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 488 MachineBasicBlock *BB) const; 489 490 MachineBasicBlock * 491 EmitInstrWithCustomInserter(MachineInstr &MI, 492 MachineBasicBlock *MBB) const override; 493 494 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 495 MachineFunction &MF, 496 unsigned Intrinsic) const override; 497 498 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 499 EVT NewVT) const override; 500 501 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 502 bool isTruncateFree(EVT VT1, EVT VT2) const override; 503 504 bool isProfitableToHoist(Instruction *I) const override; 505 506 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 507 bool isZExtFree(EVT VT1, EVT VT2) const override; 508 bool isZExtFree(SDValue Val, EVT VT2) const override; 509 510 bool shouldSinkOperands(Instruction *I, 511 SmallVectorImpl<Use *> &Ops) const override; 512 513 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 514 515 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 516 517 bool lowerInterleavedLoad(LoadInst *LI, 518 ArrayRef<ShuffleVectorInst *> Shuffles, 519 ArrayRef<unsigned> Indices, 520 unsigned Factor) const override; 521 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 522 unsigned Factor) const override; 523 524 bool isLegalAddImmediate(int64_t) const override; 525 bool isLegalICmpImmediate(int64_t) const override; 526 527 bool shouldConsiderGEPOffsetSplit() const override; 528 529 EVT getOptimalMemOpType(const MemOp &Op, 530 const AttributeList &FuncAttributes) const override; 531 532 LLT getOptimalMemOpLLT(const MemOp &Op, 533 const AttributeList &FuncAttributes) const override; 534 535 /// Return true if the addressing mode represented by AM is legal for this 536 /// target, for a load/store of the specified type. 537 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 538 unsigned AS, 539 Instruction *I = nullptr) const override; 540 541 /// Return the cost of the scaling factor used in the addressing 542 /// mode represented by AM for this target, for a load/store 543 /// of the specified type. 544 /// If the AM is supported, the return value must be >= 0. 545 /// If the AM is not supported, it returns a negative value. 546 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 547 unsigned AS) const override; 548 549 /// Return true if an FMA operation is faster than a pair of fmul and fadd 550 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 551 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 552 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 553 EVT VT) const override; 554 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 555 556 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 557 558 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 559 bool isDesirableToCommuteWithShift(const SDNode *N, 560 CombineLevel Level) const override; 561 562 /// Returns true if it is beneficial to convert a load of a constant 563 /// to just the constant itself. 564 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 565 Type *Ty) const override; 566 567 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 568 /// with this index. 569 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 570 unsigned Index) const override; 571 572 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 573 bool MathUsed) const override { 574 // Using overflow ops for overflow checks only should beneficial on 575 // AArch64. 576 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 577 } 578 579 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, 580 AtomicOrdering Ord) const override; 581 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, 582 Value *Addr, AtomicOrdering Ord) const override; 583 584 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; 585 586 TargetLoweringBase::AtomicExpansionKind 587 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 588 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 589 TargetLoweringBase::AtomicExpansionKind 590 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 591 592 TargetLoweringBase::AtomicExpansionKind 593 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 594 595 bool useLoadStackGuardNode() const override; 596 TargetLoweringBase::LegalizeTypeAction 597 getPreferredVectorAction(MVT VT) const override; 598 599 /// If the target has a standard location for the stack protector cookie, 600 /// returns the address of that location. Otherwise, returns nullptr. 601 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 602 603 void insertSSPDeclarations(Module &M) const override; 604 Value *getSDagStackGuard(const Module &M) const override; 605 Function *getSSPStackGuardCheck(const Module &M) const override; 606 607 /// If the target has a standard location for the unsafe stack pointer, 608 /// returns the address of that location. Otherwise, returns nullptr. 609 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 610 611 /// If a physical register, this returns the register that receives the 612 /// exception address on entry to an EH pad. 613 Register 614 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 615 // FIXME: This is a guess. Has this been defined yet? 616 return AArch64::X0; 617 } 618 619 /// If a physical register, this returns the register that receives the 620 /// exception typeid on entry to a landing pad. 621 Register 622 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 623 // FIXME: This is a guess. Has this been defined yet? 624 return AArch64::X1; 625 } 626 627 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 628 629 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 630 const SelectionDAG &DAG) const override { 631 // Do not merge to float value size (128 bytes) if no implicit 632 // float attribute is set. 633 634 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( 635 Attribute::NoImplicitFloat); 636 637 if (NoFloat) 638 return (MemVT.getSizeInBits() <= 64); 639 return true; 640 } 641 642 bool isCheapToSpeculateCttz() const override { 643 return true; 644 } 645 646 bool isCheapToSpeculateCtlz() const override { 647 return true; 648 } 649 650 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 651 652 bool hasAndNotCompare(SDValue V) const override { 653 // We can use bics for any scalar. 654 return V.getValueType().isScalarInteger(); 655 } 656 657 bool hasAndNot(SDValue Y) const override { 658 EVT VT = Y.getValueType(); 659 660 if (!VT.isVector()) 661 return hasAndNotCompare(Y); 662 663 return VT.getSizeInBits() >= 64; // vector 'bic' 664 } 665 666 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 667 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 668 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 669 SelectionDAG &DAG) const override; 670 671 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 672 673 bool shouldTransformSignedTruncationCheck(EVT XVT, 674 unsigned KeptBits) const override { 675 // For vectors, we don't have a preference.. 676 if (XVT.isVector()) 677 return false; 678 679 auto VTIsOk = [](EVT VT) -> bool { 680 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 681 VT == MVT::i64; 682 }; 683 684 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 685 // XVT will be larger than KeptBitsVT. 686 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 687 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 688 } 689 690 bool preferIncOfAddToSubOfNot(EVT VT) const override; 691 692 bool hasBitPreservingFPLogic(EVT VT) const override { 693 // FIXME: Is this always true? It should be true for vectors at least. 694 return VT == MVT::f32 || VT == MVT::f64; 695 } 696 697 bool supportSplitCSR(MachineFunction *MF) const override { 698 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 699 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 700 } 701 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 702 void insertCopiesSplitCSR( 703 MachineBasicBlock *Entry, 704 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 705 706 bool supportSwiftError() const override { 707 return true; 708 } 709 710 /// Enable aggressive FMA fusion on targets that want it. 711 bool enableAggressiveFMAFusion(EVT VT) const override; 712 713 /// Returns the size of the platform's va_list object. 714 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 715 716 /// Returns true if \p VecTy is a legal interleaved access type. This 717 /// function checks the vector element type and the overall width of the 718 /// vector. 719 bool isLegalInterleavedAccessType(VectorType *VecTy, 720 const DataLayout &DL) const; 721 722 /// Returns the number of interleaved accesses that will be generated when 723 /// lowering accesses of the given type. 724 unsigned getNumInterleavedAccesses(VectorType *VecTy, 725 const DataLayout &DL) const; 726 727 MachineMemOperand::Flags getTargetMMOFlags( 728 const Instruction &I) const override; 729 730 bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, 731 CallingConv::ID CallConv, 732 bool isVarArg) const override; 733 /// Used for exception handling on Win64. 734 bool needsFixedCatchObjects() const override; 735 736 bool fallBackToDAGISel(const Instruction &Inst) const override; 737 738 /// SVE code generation for fixed length vectors does not custom lower 739 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 740 /// merge. However, merging them creates a BUILD_VECTOR that is just as 741 /// illegal as the original, thus leading to an infinite legalisation loop. 742 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 743 /// vector types this override can be removed. 744 bool mergeStoresAfterLegalization(EVT VT) const override { 745 return !useSVEForFixedLengthVectors(); 746 } 747 748 private: 749 /// Keep a pointer to the AArch64Subtarget around so that we can 750 /// make the right decision when generating code for different targets. 751 const AArch64Subtarget *Subtarget; 752 753 bool isExtFreeImpl(const Instruction *Ext) const override; 754 755 void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT); 756 void addTypeForFixedLengthSVE(MVT VT); 757 void addDRTypeForNEON(MVT VT); 758 void addQRTypeForNEON(MVT VT); 759 760 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 761 bool isVarArg, 762 const SmallVectorImpl<ISD::InputArg> &Ins, 763 const SDLoc &DL, SelectionDAG &DAG, 764 SmallVectorImpl<SDValue> &InVals) const override; 765 766 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 767 SmallVectorImpl<SDValue> &InVals) const override; 768 769 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 770 CallingConv::ID CallConv, bool isVarArg, 771 const SmallVectorImpl<ISD::InputArg> &Ins, 772 const SDLoc &DL, SelectionDAG &DAG, 773 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 774 SDValue ThisVal) const; 775 776 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 777 778 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 779 780 bool isEligibleForTailCallOptimization( 781 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 782 const SmallVectorImpl<ISD::OutputArg> &Outs, 783 const SmallVectorImpl<SDValue> &OutVals, 784 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 785 786 /// Finds the incoming stack arguments which overlap the given fixed stack 787 /// object and incorporates their load into the current chain. This prevents 788 /// an upcoming store from clobbering the stack argument before it's used. 789 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 790 MachineFrameInfo &MFI, int ClobberedFI) const; 791 792 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 793 794 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 795 SDValue &Chain) const; 796 797 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 798 bool isVarArg, 799 const SmallVectorImpl<ISD::OutputArg> &Outs, 800 LLVMContext &Context) const override; 801 802 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 803 const SmallVectorImpl<ISD::OutputArg> &Outs, 804 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 805 SelectionDAG &DAG) const override; 806 807 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 808 unsigned Flag) const; 809 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 810 unsigned Flag) const; 811 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 812 unsigned Flag) const; 813 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 814 unsigned Flag) const; 815 template <class NodeTy> 816 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 817 template <class NodeTy> 818 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 819 template <class NodeTy> 820 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 821 template <class NodeTy> 822 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 823 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 824 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 825 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 826 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 827 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 828 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 829 const SDLoc &DL, SelectionDAG &DAG) const; 830 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 831 SelectionDAG &DAG) const; 832 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 833 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 834 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 835 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 836 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 837 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 838 SDValue TVal, SDValue FVal, const SDLoc &dl, 839 SelectionDAG &DAG) const; 840 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 841 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 842 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 843 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 844 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 845 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 846 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 847 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 848 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 849 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 850 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 851 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 852 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 853 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 854 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 855 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 856 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 857 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 858 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 859 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 860 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 861 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 862 unsigned NewOp) const; 863 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 864 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 865 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 866 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 867 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 868 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 869 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 870 SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, 871 RTLIB::Libcall Call) const; 872 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 873 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 874 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 875 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 876 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 877 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 878 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 879 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 880 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 881 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 882 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 883 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 884 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 885 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 886 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 887 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 888 SDValue &Size, 889 SelectionDAG &DAG) const; 890 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 891 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 892 893 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 894 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 895 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 896 SelectionDAG &DAG) const; 897 898 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 899 SmallVectorImpl<SDNode *> &Created) const override; 900 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 901 int &ExtraSteps, bool &UseOneConst, 902 bool Reciprocal) const override; 903 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 904 int &ExtraSteps) const override; 905 unsigned combineRepeatedFPDivisors() const override; 906 907 ConstraintType getConstraintType(StringRef Constraint) const override; 908 Register getRegisterByName(const char* RegName, LLT VT, 909 const MachineFunction &MF) const override; 910 911 /// Examine constraint string and operand type and determine a weight value. 912 /// The operand object must already have been set up with the operand type. 913 ConstraintWeight 914 getSingleConstraintMatchWeight(AsmOperandInfo &info, 915 const char *constraint) const override; 916 917 std::pair<unsigned, const TargetRegisterClass *> 918 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 919 StringRef Constraint, MVT VT) const override; 920 921 const char *LowerXConstraint(EVT ConstraintVT) const override; 922 923 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 924 std::vector<SDValue> &Ops, 925 SelectionDAG &DAG) const override; 926 927 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 928 if (ConstraintCode == "Q") 929 return InlineAsm::Constraint_Q; 930 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 931 // followed by llvm_unreachable so we'll leave them unimplemented in 932 // the backend for now. 933 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 934 } 935 936 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 937 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 938 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 939 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 940 ISD::MemIndexedMode &AM, bool &IsInc, 941 SelectionDAG &DAG) const; 942 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 943 ISD::MemIndexedMode &AM, 944 SelectionDAG &DAG) const override; 945 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 946 SDValue &Offset, ISD::MemIndexedMode &AM, 947 SelectionDAG &DAG) const override; 948 949 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 950 SelectionDAG &DAG) const override; 951 void ReplaceExtractSubVectorResults(SDNode *N, 952 SmallVectorImpl<SDValue> &Results, 953 SelectionDAG &DAG) const; 954 955 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 956 957 void finalizeLowering(MachineFunction &MF) const override; 958 959 bool shouldLocalize(const MachineInstr &MI, 960 const TargetTransformInfo *TTI) const override; 961 962 bool useSVEForFixedLengthVectors() const; 963 bool useSVEForFixedLengthVectorVT(EVT VT) const; 964 }; 965 966 namespace AArch64 { 967 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 968 const TargetLibraryInfo *libInfo); 969 } // end namespace AArch64 970 971 } // end namespace llvm 972 973 #endif 974