1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/IR/Instruction.h" 24 25 namespace llvm { 26 27 namespace AArch64ISD { 28 29 // For predicated nodes where the result is a vector, the operation is 30 // controlled by a governing predicate and the inactive lanes are explicitly 31 // defined with a value, please stick the following naming convention: 32 // 33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 34 // to source operand OP<n>. 35 // 36 // _MERGE_ZERO The result value is a vector with inactive lanes 37 // actively zeroed. 38 // 39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 40 // to the last source operand which only purpose is being 41 // a passthru value. 42 // 43 // For other cases where no explicit action is needed to set the inactive lanes, 44 // or when the result is not a vector and it is needed or helpful to 45 // distinguish a node from similar unpredicated nodes, use: 46 // 47 // _PRED 48 // 49 enum NodeType : unsigned { 50 FIRST_NUMBER = ISD::BUILTIN_OP_END, 51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 52 CALL, // Function call. 53 54 // Pseudo for a OBJC call that gets emitted together with a special `mov 55 // x29, x29` marker instruction. 56 CALL_RVMARKER, 57 58 CALL_BTI, // Function call followed by a BTI instruction. 59 60 // Produces the full sequence of instructions for getting the thread pointer 61 // offset of a variable into X0, using the TLSDesc model. 62 TLSDESC_CALLSEQ, 63 ADRP, // Page address of a TargetGlobalAddress operand. 64 ADR, // ADR 65 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 66 LOADgot, // Load from automatically generated descriptor (e.g. Global 67 // Offset Table, TLS record). 68 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 69 BRCOND, // Conditional branch instruction; "b.cond". 70 CSEL, 71 CSINV, // Conditional select invert. 72 CSNEG, // Conditional select negate. 73 CSINC, // Conditional select increment. 74 75 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 76 // ELF. 77 THREAD_POINTER, 78 ADC, 79 SBC, // adc, sbc instructions 80 81 // Predicated instructions where inactive lanes produce undefined results. 82 ABDS_PRED, 83 ABDU_PRED, 84 ADD_PRED, 85 FADD_PRED, 86 FDIV_PRED, 87 FMA_PRED, 88 FMAX_PRED, 89 FMAXNM_PRED, 90 FMIN_PRED, 91 FMINNM_PRED, 92 FMUL_PRED, 93 FSUB_PRED, 94 MUL_PRED, 95 MULHS_PRED, 96 MULHU_PRED, 97 SDIV_PRED, 98 SHL_PRED, 99 SMAX_PRED, 100 SMIN_PRED, 101 SRA_PRED, 102 SRL_PRED, 103 SUB_PRED, 104 UDIV_PRED, 105 UMAX_PRED, 106 UMIN_PRED, 107 108 // Unpredicated vector instructions 109 BIC, 110 111 SRAD_MERGE_OP1, 112 113 // Predicated instructions with the result of inactive lanes provided by the 114 // last operand. 115 FABS_MERGE_PASSTHRU, 116 FCEIL_MERGE_PASSTHRU, 117 FFLOOR_MERGE_PASSTHRU, 118 FNEARBYINT_MERGE_PASSTHRU, 119 FNEG_MERGE_PASSTHRU, 120 FRECPX_MERGE_PASSTHRU, 121 FRINT_MERGE_PASSTHRU, 122 FROUND_MERGE_PASSTHRU, 123 FROUNDEVEN_MERGE_PASSTHRU, 124 FSQRT_MERGE_PASSTHRU, 125 FTRUNC_MERGE_PASSTHRU, 126 FP_ROUND_MERGE_PASSTHRU, 127 FP_EXTEND_MERGE_PASSTHRU, 128 UINT_TO_FP_MERGE_PASSTHRU, 129 SINT_TO_FP_MERGE_PASSTHRU, 130 FCVTZU_MERGE_PASSTHRU, 131 FCVTZS_MERGE_PASSTHRU, 132 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 133 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 134 ABS_MERGE_PASSTHRU, 135 NEG_MERGE_PASSTHRU, 136 137 SETCC_MERGE_ZERO, 138 139 // Arithmetic instructions which write flags. 140 ADDS, 141 SUBS, 142 ADCS, 143 SBCS, 144 ANDS, 145 146 // Conditional compares. Operands: left,right,falsecc,cc,flags 147 CCMP, 148 CCMN, 149 FCCMP, 150 151 // Floating point comparison 152 FCMP, 153 154 // Scalar extract 155 EXTR, 156 157 // Scalar-to-vector duplication 158 DUP, 159 DUPLANE8, 160 DUPLANE16, 161 DUPLANE32, 162 DUPLANE64, 163 164 // Vector immedate moves 165 MOVI, 166 MOVIshift, 167 MOVIedit, 168 MOVImsl, 169 FMOV, 170 MVNIshift, 171 MVNImsl, 172 173 // Vector immediate ops 174 BICi, 175 ORRi, 176 177 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 178 // element must be identical. 179 BSP, 180 181 // Vector shuffles 182 ZIP1, 183 ZIP2, 184 UZP1, 185 UZP2, 186 TRN1, 187 TRN2, 188 REV16, 189 REV32, 190 REV64, 191 EXT, 192 SPLICE, 193 194 // Vector shift by scalar 195 VSHL, 196 VLSHR, 197 VASHR, 198 199 // Vector shift by scalar (again) 200 SQSHL_I, 201 UQSHL_I, 202 SQSHLU_I, 203 SRSHR_I, 204 URSHR_I, 205 206 // Vector shift by constant and insert 207 VSLI, 208 VSRI, 209 210 // Vector comparisons 211 CMEQ, 212 CMGE, 213 CMGT, 214 CMHI, 215 CMHS, 216 FCMEQ, 217 FCMGE, 218 FCMGT, 219 220 // Vector zero comparisons 221 CMEQz, 222 CMGEz, 223 CMGTz, 224 CMLEz, 225 CMLTz, 226 FCMEQz, 227 FCMGEz, 228 FCMGTz, 229 FCMLEz, 230 FCMLTz, 231 232 // Vector across-lanes addition 233 // Only the lower result lane is defined. 234 SADDV, 235 UADDV, 236 237 // Vector halving addition 238 SHADD, 239 UHADD, 240 241 // Vector rounding halving addition 242 SRHADD, 243 URHADD, 244 245 // Unsigned Add Long Pairwise 246 UADDLP, 247 248 // udot/sdot instructions 249 UDOT, 250 SDOT, 251 252 // Vector across-lanes min/max 253 // Only the lower result lane is defined. 254 SMINV, 255 UMINV, 256 SMAXV, 257 UMAXV, 258 259 SADDV_PRED, 260 UADDV_PRED, 261 SMAXV_PRED, 262 UMAXV_PRED, 263 SMINV_PRED, 264 UMINV_PRED, 265 ORV_PRED, 266 EORV_PRED, 267 ANDV_PRED, 268 269 // Vector bitwise insertion 270 BIT, 271 272 // Compare-and-branch 273 CBZ, 274 CBNZ, 275 TBZ, 276 TBNZ, 277 278 // Tail calls 279 TC_RETURN, 280 281 // Custom prefetch handling 282 PREFETCH, 283 284 // {s|u}int to FP within a FP register. 285 SITOF, 286 UITOF, 287 288 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 289 /// world w.r.t vectors; which causes additional REV instructions to be 290 /// generated to compensate for the byte-swapping. But sometimes we do 291 /// need to re-interpret the data in SIMD vector registers in big-endian 292 /// mode without emitting such REV instructions. 293 NVCAST, 294 295 MRS, // MRS, also sets the flags via a glue. 296 297 SMULL, 298 UMULL, 299 300 // Reciprocal estimates and steps. 301 FRECPE, 302 FRECPS, 303 FRSQRTE, 304 FRSQRTS, 305 306 SUNPKHI, 307 SUNPKLO, 308 UUNPKHI, 309 UUNPKLO, 310 311 CLASTA_N, 312 CLASTB_N, 313 LASTA, 314 LASTB, 315 TBL, 316 317 // Floating-point reductions. 318 FADDA_PRED, 319 FADDV_PRED, 320 FMAXV_PRED, 321 FMAXNMV_PRED, 322 FMINV_PRED, 323 FMINNMV_PRED, 324 325 INSR, 326 PTEST, 327 PTRUE, 328 329 BITREVERSE_MERGE_PASSTHRU, 330 BSWAP_MERGE_PASSTHRU, 331 REVH_MERGE_PASSTHRU, 332 REVW_MERGE_PASSTHRU, 333 CTLZ_MERGE_PASSTHRU, 334 CTPOP_MERGE_PASSTHRU, 335 DUP_MERGE_PASSTHRU, 336 INDEX_VECTOR, 337 338 // Cast between vectors of the same element type but differ in length. 339 REINTERPRET_CAST, 340 341 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 342 LS64_BUILD, 343 LS64_EXTRACT, 344 345 LD1_MERGE_ZERO, 346 LD1S_MERGE_ZERO, 347 LDNF1_MERGE_ZERO, 348 LDNF1S_MERGE_ZERO, 349 LDFF1_MERGE_ZERO, 350 LDFF1S_MERGE_ZERO, 351 LD1RQ_MERGE_ZERO, 352 LD1RO_MERGE_ZERO, 353 354 // Structured loads. 355 SVE_LD2_MERGE_ZERO, 356 SVE_LD3_MERGE_ZERO, 357 SVE_LD4_MERGE_ZERO, 358 359 // Unsigned gather loads. 360 GLD1_MERGE_ZERO, 361 GLD1_SCALED_MERGE_ZERO, 362 GLD1_UXTW_MERGE_ZERO, 363 GLD1_SXTW_MERGE_ZERO, 364 GLD1_UXTW_SCALED_MERGE_ZERO, 365 GLD1_SXTW_SCALED_MERGE_ZERO, 366 GLD1_IMM_MERGE_ZERO, 367 368 // Signed gather loads 369 GLD1S_MERGE_ZERO, 370 GLD1S_SCALED_MERGE_ZERO, 371 GLD1S_UXTW_MERGE_ZERO, 372 GLD1S_SXTW_MERGE_ZERO, 373 GLD1S_UXTW_SCALED_MERGE_ZERO, 374 GLD1S_SXTW_SCALED_MERGE_ZERO, 375 GLD1S_IMM_MERGE_ZERO, 376 377 // Unsigned gather loads. 378 GLDFF1_MERGE_ZERO, 379 GLDFF1_SCALED_MERGE_ZERO, 380 GLDFF1_UXTW_MERGE_ZERO, 381 GLDFF1_SXTW_MERGE_ZERO, 382 GLDFF1_UXTW_SCALED_MERGE_ZERO, 383 GLDFF1_SXTW_SCALED_MERGE_ZERO, 384 GLDFF1_IMM_MERGE_ZERO, 385 386 // Signed gather loads. 387 GLDFF1S_MERGE_ZERO, 388 GLDFF1S_SCALED_MERGE_ZERO, 389 GLDFF1S_UXTW_MERGE_ZERO, 390 GLDFF1S_SXTW_MERGE_ZERO, 391 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 392 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 393 GLDFF1S_IMM_MERGE_ZERO, 394 395 // Non-temporal gather loads 396 GLDNT1_MERGE_ZERO, 397 GLDNT1_INDEX_MERGE_ZERO, 398 GLDNT1S_MERGE_ZERO, 399 400 // Contiguous masked store. 401 ST1_PRED, 402 403 // Scatter store 404 SST1_PRED, 405 SST1_SCALED_PRED, 406 SST1_UXTW_PRED, 407 SST1_SXTW_PRED, 408 SST1_UXTW_SCALED_PRED, 409 SST1_SXTW_SCALED_PRED, 410 SST1_IMM_PRED, 411 412 // Non-temporal scatter store 413 SSTNT1_PRED, 414 SSTNT1_INDEX_PRED, 415 416 // Asserts that a function argument (i32) is zero-extended to i8 by 417 // the caller 418 ASSERT_ZEXT_BOOL, 419 420 // Strict (exception-raising) floating point comparison 421 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 422 STRICT_FCMPE, 423 424 // NEON Load/Store with post-increment base updates 425 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 426 LD3post, 427 LD4post, 428 ST2post, 429 ST3post, 430 ST4post, 431 LD1x2post, 432 LD1x3post, 433 LD1x4post, 434 ST1x2post, 435 ST1x3post, 436 ST1x4post, 437 LD1DUPpost, 438 LD2DUPpost, 439 LD3DUPpost, 440 LD4DUPpost, 441 LD1LANEpost, 442 LD2LANEpost, 443 LD3LANEpost, 444 LD4LANEpost, 445 ST2LANEpost, 446 ST3LANEpost, 447 ST4LANEpost, 448 449 STG, 450 STZG, 451 ST2G, 452 STZ2G, 453 454 LDP, 455 STP, 456 STNP, 457 458 // Memory Operations 459 MOPS_MEMSET, 460 MOPS_MEMSET_TAGGING, 461 MOPS_MEMCOPY, 462 MOPS_MEMMOVE, 463 }; 464 465 } // end namespace AArch64ISD 466 467 namespace { 468 469 // Any instruction that defines a 32-bit result zeros out the high half of the 470 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 471 // be copying from a truncate. But any other 32-bit operation will zero-extend 472 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper 473 // 32 bits, they're probably just qualifying a CopyFromReg. 474 static inline bool isDef32(const SDNode &N) { 475 unsigned Opc = N.getOpcode(); 476 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 477 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 478 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 479 Opc != ISD::FREEZE; 480 } 481 482 } // end anonymous namespace 483 484 namespace AArch64 { 485 /// Possible values of current rounding mode, which is specified in bits 486 /// 23:22 of FPCR. 487 enum Rounding { 488 RN = 0, // Round to Nearest 489 RP = 1, // Round towards Plus infinity 490 RM = 2, // Round towards Minus infinity 491 RZ = 3, // Round towards Zero 492 rmMask = 3 // Bit mask selecting rounding mode 493 }; 494 495 // Bit position of rounding mode bits in FPCR. 496 const unsigned RoundingBitsPos = 22; 497 } // namespace AArch64 498 499 class AArch64Subtarget; 500 501 class AArch64TargetLowering : public TargetLowering { 502 public: 503 explicit AArch64TargetLowering(const TargetMachine &TM, 504 const AArch64Subtarget &STI); 505 506 /// Selects the correct CCAssignFn for a given CallingConvention value. 507 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 508 509 /// Selects the correct CCAssignFn for a given CallingConvention value. 510 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 511 512 /// Determine which of the bits specified in Mask are known to be either zero 513 /// or one and return them in the KnownZero/KnownOne bitsets. 514 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 515 const APInt &DemandedElts, 516 const SelectionDAG &DAG, 517 unsigned Depth = 0) const override; 518 519 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 520 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 521 // *DAG* representation of pointers will always be 64-bits. They will be 522 // truncated and extended when transferred to memory, but the 64-bit DAG 523 // allows us to use AArch64's addressing modes much more easily. 524 return MVT::getIntegerVT(64); 525 } 526 527 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 528 const APInt &DemandedElts, 529 TargetLoweringOpt &TLO) const override; 530 531 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 532 533 /// Returns true if the target allows unaligned memory accesses of the 534 /// specified type. 535 bool allowsMisalignedMemoryAccesses( 536 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 537 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 538 bool *Fast = nullptr) const override; 539 /// LLT variant. 540 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 541 Align Alignment, 542 MachineMemOperand::Flags Flags, 543 bool *Fast = nullptr) const override; 544 545 /// Provide custom lowering hooks for some operations. 546 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 547 548 const char *getTargetNodeName(unsigned Opcode) const override; 549 550 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 551 552 /// This method returns a target specific FastISel object, or null if the 553 /// target does not support "fast" ISel. 554 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 555 const TargetLibraryInfo *libInfo) const override; 556 557 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 558 559 bool isFPImmLegal(const APFloat &Imm, EVT VT, 560 bool ForCodeSize) const override; 561 562 /// Return true if the given shuffle mask can be codegen'd directly, or if it 563 /// should be stack expanded. 564 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 565 566 /// Return the ISD::SETCC ValueType. 567 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 568 EVT VT) const override; 569 570 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 571 572 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 573 MachineBasicBlock *BB) const; 574 575 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 576 MachineBasicBlock *BB) const; 577 578 MachineBasicBlock * 579 EmitInstrWithCustomInserter(MachineInstr &MI, 580 MachineBasicBlock *MBB) const override; 581 582 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 583 MachineFunction &MF, 584 unsigned Intrinsic) const override; 585 586 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 587 EVT NewVT) const override; 588 589 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 590 bool isTruncateFree(EVT VT1, EVT VT2) const override; 591 592 bool isProfitableToHoist(Instruction *I) const override; 593 594 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 595 bool isZExtFree(EVT VT1, EVT VT2) const override; 596 bool isZExtFree(SDValue Val, EVT VT2) const override; 597 598 bool shouldSinkOperands(Instruction *I, 599 SmallVectorImpl<Use *> &Ops) const override; 600 601 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 602 603 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 604 605 bool lowerInterleavedLoad(LoadInst *LI, 606 ArrayRef<ShuffleVectorInst *> Shuffles, 607 ArrayRef<unsigned> Indices, 608 unsigned Factor) const override; 609 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 610 unsigned Factor) const override; 611 612 bool isLegalAddImmediate(int64_t) const override; 613 bool isLegalICmpImmediate(int64_t) const override; 614 615 bool isMulAddWithConstProfitable(const SDValue &AddNode, 616 const SDValue &ConstNode) const override; 617 618 bool shouldConsiderGEPOffsetSplit() const override; 619 620 EVT getOptimalMemOpType(const MemOp &Op, 621 const AttributeList &FuncAttributes) const override; 622 623 LLT getOptimalMemOpLLT(const MemOp &Op, 624 const AttributeList &FuncAttributes) const override; 625 626 /// Return true if the addressing mode represented by AM is legal for this 627 /// target, for a load/store of the specified type. 628 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 629 unsigned AS, 630 Instruction *I = nullptr) const override; 631 632 /// Return the cost of the scaling factor used in the addressing 633 /// mode represented by AM for this target, for a load/store 634 /// of the specified type. 635 /// If the AM is supported, the return value must be >= 0. 636 /// If the AM is not supported, it returns a negative value. 637 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, 638 Type *Ty, unsigned AS) const override; 639 640 /// Return true if an FMA operation is faster than a pair of fmul and fadd 641 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 642 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 643 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 644 EVT VT) const override; 645 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 646 647 bool generateFMAsInMachineCombiner(EVT VT, 648 CodeGenOpt::Level OptLevel) const override; 649 650 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 651 652 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 653 bool isDesirableToCommuteWithShift(const SDNode *N, 654 CombineLevel Level) const override; 655 656 /// Returns true if it is beneficial to convert a load of a constant 657 /// to just the constant itself. 658 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 659 Type *Ty) const override; 660 661 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 662 /// with this index. 663 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 664 unsigned Index) const override; 665 666 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 667 bool MathUsed) const override { 668 // Using overflow ops for overflow checks only should beneficial on 669 // AArch64. 670 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 671 } 672 673 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 674 AtomicOrdering Ord) const override; 675 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 676 AtomicOrdering Ord) const override; 677 678 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 679 680 bool isOpSuitableForLDPSTP(const Instruction *I) const; 681 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 682 683 TargetLoweringBase::AtomicExpansionKind 684 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 685 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 686 TargetLoweringBase::AtomicExpansionKind 687 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 688 689 TargetLoweringBase::AtomicExpansionKind 690 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 691 692 bool useLoadStackGuardNode() const override; 693 TargetLoweringBase::LegalizeTypeAction 694 getPreferredVectorAction(MVT VT) const override; 695 696 /// If the target has a standard location for the stack protector cookie, 697 /// returns the address of that location. Otherwise, returns nullptr. 698 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 699 700 void insertSSPDeclarations(Module &M) const override; 701 Value *getSDagStackGuard(const Module &M) const override; 702 Function *getSSPStackGuardCheck(const Module &M) const override; 703 704 /// If the target has a standard location for the unsafe stack pointer, 705 /// returns the address of that location. Otherwise, returns nullptr. 706 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 707 708 /// If a physical register, this returns the register that receives the 709 /// exception address on entry to an EH pad. 710 Register 711 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 712 // FIXME: This is a guess. Has this been defined yet? 713 return AArch64::X0; 714 } 715 716 /// If a physical register, this returns the register that receives the 717 /// exception typeid on entry to a landing pad. 718 Register 719 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 720 // FIXME: This is a guess. Has this been defined yet? 721 return AArch64::X1; 722 } 723 724 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 725 726 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 727 const MachineFunction &MF) const override { 728 // Do not merge to float value size (128 bytes) if no implicit 729 // float attribute is set. 730 731 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 732 733 if (NoFloat) 734 return (MemVT.getSizeInBits() <= 64); 735 return true; 736 } 737 738 bool isCheapToSpeculateCttz() const override { 739 return true; 740 } 741 742 bool isCheapToSpeculateCtlz() const override { 743 return true; 744 } 745 746 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 747 748 bool hasAndNotCompare(SDValue V) const override { 749 // We can use bics for any scalar. 750 return V.getValueType().isScalarInteger(); 751 } 752 753 bool hasAndNot(SDValue Y) const override { 754 EVT VT = Y.getValueType(); 755 756 if (!VT.isVector()) 757 return hasAndNotCompare(Y); 758 759 TypeSize TS = VT.getSizeInBits(); 760 // TODO: We should be able to use bic/bif too for SVE. 761 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 762 } 763 764 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 765 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 766 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 767 SelectionDAG &DAG) const override; 768 769 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 770 771 bool shouldTransformSignedTruncationCheck(EVT XVT, 772 unsigned KeptBits) const override { 773 // For vectors, we don't have a preference.. 774 if (XVT.isVector()) 775 return false; 776 777 auto VTIsOk = [](EVT VT) -> bool { 778 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 779 VT == MVT::i64; 780 }; 781 782 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 783 // XVT will be larger than KeptBitsVT. 784 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 785 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 786 } 787 788 bool preferIncOfAddToSubOfNot(EVT VT) const override; 789 790 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 791 792 bool hasBitPreservingFPLogic(EVT VT) const override { 793 // FIXME: Is this always true? It should be true for vectors at least. 794 return VT == MVT::f32 || VT == MVT::f64; 795 } 796 797 bool supportSplitCSR(MachineFunction *MF) const override { 798 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 799 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 800 } 801 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 802 void insertCopiesSplitCSR( 803 MachineBasicBlock *Entry, 804 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 805 806 bool supportSwiftError() const override { 807 return true; 808 } 809 810 /// Enable aggressive FMA fusion on targets that want it. 811 bool enableAggressiveFMAFusion(EVT VT) const override; 812 813 /// Returns the size of the platform's va_list object. 814 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 815 816 /// Returns true if \p VecTy is a legal interleaved access type. This 817 /// function checks the vector element type and the overall width of the 818 /// vector. 819 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 820 bool &UseScalable) const; 821 822 /// Returns the number of interleaved accesses that will be generated when 823 /// lowering accesses of the given type. 824 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 825 bool UseScalable) const; 826 827 MachineMemOperand::Flags getTargetMMOFlags( 828 const Instruction &I) const override; 829 830 bool functionArgumentNeedsConsecutiveRegisters( 831 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 832 const DataLayout &DL) const override; 833 834 /// Used for exception handling on Win64. 835 bool needsFixedCatchObjects() const override; 836 837 bool fallBackToDAGISel(const Instruction &Inst) const override; 838 839 /// SVE code generation for fixed length vectors does not custom lower 840 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 841 /// merge. However, merging them creates a BUILD_VECTOR that is just as 842 /// illegal as the original, thus leading to an infinite legalisation loop. 843 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 844 /// vector types this override can be removed. 845 bool mergeStoresAfterLegalization(EVT VT) const override; 846 847 // If the platform/function should have a redzone, return the size in bytes. 848 unsigned getRedZoneSize(const Function &F) const { 849 if (F.hasFnAttribute(Attribute::NoRedZone)) 850 return 0; 851 return 128; 852 } 853 854 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 855 EVT getPromotedVTForPredicate(EVT VT) const; 856 857 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 858 bool AllowUnknown = false) const override; 859 860 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 861 862 private: 863 /// Keep a pointer to the AArch64Subtarget around so that we can 864 /// make the right decision when generating code for different targets. 865 const AArch64Subtarget *Subtarget; 866 867 bool isExtFreeImpl(const Instruction *Ext) const override; 868 869 void addTypeForNEON(MVT VT); 870 void addTypeForFixedLengthSVE(MVT VT); 871 void addDRTypeForNEON(MVT VT); 872 void addQRTypeForNEON(MVT VT); 873 874 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 875 bool isVarArg, 876 const SmallVectorImpl<ISD::InputArg> &Ins, 877 const SDLoc &DL, SelectionDAG &DAG, 878 SmallVectorImpl<SDValue> &InVals) const override; 879 880 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 881 SmallVectorImpl<SDValue> &InVals) const override; 882 883 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 884 CallingConv::ID CallConv, bool isVarArg, 885 const SmallVectorImpl<ISD::InputArg> &Ins, 886 const SDLoc &DL, SelectionDAG &DAG, 887 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 888 SDValue ThisVal) const; 889 890 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 891 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 892 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 893 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 894 895 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 896 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 897 898 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 899 900 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 901 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 902 903 bool 904 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 905 906 /// Finds the incoming stack arguments which overlap the given fixed stack 907 /// object and incorporates their load into the current chain. This prevents 908 /// an upcoming store from clobbering the stack argument before it's used. 909 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 910 MachineFrameInfo &MFI, int ClobberedFI) const; 911 912 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 913 914 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 915 SDValue &Chain) const; 916 917 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 918 bool isVarArg, 919 const SmallVectorImpl<ISD::OutputArg> &Outs, 920 LLVMContext &Context) const override; 921 922 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 923 const SmallVectorImpl<ISD::OutputArg> &Outs, 924 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 925 SelectionDAG &DAG) const override; 926 927 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 928 unsigned Flag) const; 929 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 930 unsigned Flag) const; 931 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 932 unsigned Flag) const; 933 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 934 unsigned Flag) const; 935 template <class NodeTy> 936 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 937 template <class NodeTy> 938 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 939 template <class NodeTy> 940 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 941 template <class NodeTy> 942 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 943 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 944 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 945 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 946 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 947 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 948 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 949 const SDLoc &DL, SelectionDAG &DAG) const; 950 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 951 SelectionDAG &DAG) const; 952 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 953 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 955 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 956 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 957 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 958 SDValue TVal, SDValue FVal, const SDLoc &dl, 959 SelectionDAG &DAG) const; 960 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 961 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 962 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 963 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 964 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 965 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 966 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 967 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 968 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 969 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 970 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 971 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 972 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 973 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 974 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 975 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 976 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 977 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 978 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 979 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 980 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 981 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 982 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, 983 bool OverrideNEON = false) const; 984 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 985 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 986 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 987 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 988 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 989 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 990 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 991 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 992 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 993 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 994 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 995 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 996 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 997 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 998 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 999 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1000 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1001 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1002 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1003 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1004 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1005 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1011 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1012 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1013 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 1015 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1016 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 1018 SDValue &Size, 1019 SelectionDAG &DAG) const; 1020 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 1021 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 1022 1023 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1024 SelectionDAG &DAG) const; 1025 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1026 SelectionDAG &DAG) const; 1027 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1028 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1029 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1030 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1031 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1032 SelectionDAG &DAG) const; 1033 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1034 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1035 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1036 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1037 SelectionDAG &DAG) const; 1038 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1039 SelectionDAG &DAG) const; 1040 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1041 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1043 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1044 SelectionDAG &DAG) const; 1045 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1046 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1048 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1049 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1050 SelectionDAG &DAG) const; 1051 1052 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1053 SmallVectorImpl<SDNode *> &Created) const override; 1054 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1055 int &ExtraSteps, bool &UseOneConst, 1056 bool Reciprocal) const override; 1057 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1058 int &ExtraSteps) const override; 1059 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1060 const DenormalMode &Mode) const override; 1061 SDValue getSqrtResultForDenormInput(SDValue Operand, 1062 SelectionDAG &DAG) const override; 1063 unsigned combineRepeatedFPDivisors() const override; 1064 1065 ConstraintType getConstraintType(StringRef Constraint) const override; 1066 Register getRegisterByName(const char* RegName, LLT VT, 1067 const MachineFunction &MF) const override; 1068 1069 /// Examine constraint string and operand type and determine a weight value. 1070 /// The operand object must already have been set up with the operand type. 1071 ConstraintWeight 1072 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1073 const char *constraint) const override; 1074 1075 std::pair<unsigned, const TargetRegisterClass *> 1076 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1077 StringRef Constraint, MVT VT) const override; 1078 1079 const char *LowerXConstraint(EVT ConstraintVT) const override; 1080 1081 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1082 std::vector<SDValue> &Ops, 1083 SelectionDAG &DAG) const override; 1084 1085 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1086 if (ConstraintCode == "Q") 1087 return InlineAsm::Constraint_Q; 1088 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1089 // followed by llvm_unreachable so we'll leave them unimplemented in 1090 // the backend for now. 1091 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1092 } 1093 1094 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1095 bool shouldRemoveExtendFromGSIndex(EVT VT) const override; 1096 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1097 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1098 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1099 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 1100 ISD::MemIndexedMode &AM, bool &IsInc, 1101 SelectionDAG &DAG) const; 1102 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1103 ISD::MemIndexedMode &AM, 1104 SelectionDAG &DAG) const override; 1105 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1106 SDValue &Offset, ISD::MemIndexedMode &AM, 1107 SelectionDAG &DAG) const override; 1108 1109 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1110 SelectionDAG &DAG) const override; 1111 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1112 SelectionDAG &DAG) const; 1113 void ReplaceExtractSubVectorResults(SDNode *N, 1114 SmallVectorImpl<SDValue> &Results, 1115 SelectionDAG &DAG) const; 1116 1117 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1118 1119 void finalizeLowering(MachineFunction &MF) const override; 1120 1121 bool shouldLocalize(const MachineInstr &MI, 1122 const TargetTransformInfo *TTI) const override; 1123 1124 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1125 const APInt &OriginalDemandedBits, 1126 const APInt &OriginalDemandedElts, 1127 KnownBits &Known, 1128 TargetLoweringOpt &TLO, 1129 unsigned Depth) const override; 1130 1131 // Normally SVE is only used for byte size vectors that do not fit within a 1132 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1133 // used for 64bit and 128bit vectors as well. 1134 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1135 1136 // With the exception of data-predicate transitions, no instructions are 1137 // required to cast between legal scalable vector types. However: 1138 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1139 // is not universally useable. 1140 // 2. Most unpacked integer types are not legal and thus integer extends 1141 // cannot be used to convert between unpacked and packed types. 1142 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1143 // to transition between unpacked and packed types of the same element type, 1144 // with BITCAST used otherwise. 1145 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1146 1147 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 1148 LLT Ty2) const override; 1149 }; 1150 1151 namespace AArch64 { 1152 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1153 const TargetLibraryInfo *libInfo); 1154 } // end namespace AArch64 1155 1156 } // end namespace llvm 1157 1158 #endif 1159