1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/IR/Instruction.h" 24 25 namespace llvm { 26 27 namespace AArch64ISD { 28 29 // For predicated nodes where the result is a vector, the operation is 30 // controlled by a governing predicate and the inactive lanes are explicitly 31 // defined with a value, please stick the following naming convention: 32 // 33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 34 // to source operand OP<n>. 35 // 36 // _MERGE_ZERO The result value is a vector with inactive lanes 37 // actively zeroed. 38 // 39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 40 // to the last source operand which only purpose is being 41 // a passthru value. 42 // 43 // For other cases where no explicit action is needed to set the inactive lanes, 44 // or when the result is not a vector and it is needed or helpful to 45 // distinguish a node from similar unpredicated nodes, use: 46 // 47 // _PRED 48 // 49 enum NodeType : unsigned { 50 FIRST_NUMBER = ISD::BUILTIN_OP_END, 51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 52 CALL, // Function call. 53 54 // Pseudo for a OBJC call that gets emitted together with a special `mov 55 // x29, x29` marker instruction. 56 CALL_RVMARKER, 57 58 // Produces the full sequence of instructions for getting the thread pointer 59 // offset of a variable into X0, using the TLSDesc model. 60 TLSDESC_CALLSEQ, 61 ADRP, // Page address of a TargetGlobalAddress operand. 62 ADR, // ADR 63 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 64 LOADgot, // Load from automatically generated descriptor (e.g. Global 65 // Offset Table, TLS record). 66 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 67 BRCOND, // Conditional branch instruction; "b.cond". 68 CSEL, 69 CSINV, // Conditional select invert. 70 CSNEG, // Conditional select negate. 71 CSINC, // Conditional select increment. 72 73 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 74 // ELF. 75 THREAD_POINTER, 76 ADC, 77 SBC, // adc, sbc instructions 78 79 // Predicated instructions where inactive lanes produce undefined results. 80 ABDS_PRED, 81 ABDU_PRED, 82 ADD_PRED, 83 FADD_PRED, 84 FDIV_PRED, 85 FMA_PRED, 86 FMAX_PRED, 87 FMAXNM_PRED, 88 FMIN_PRED, 89 FMINNM_PRED, 90 FMUL_PRED, 91 FSUB_PRED, 92 MUL_PRED, 93 MULHS_PRED, 94 MULHU_PRED, 95 SDIV_PRED, 96 SHL_PRED, 97 SMAX_PRED, 98 SMIN_PRED, 99 SRA_PRED, 100 SRL_PRED, 101 SUB_PRED, 102 UDIV_PRED, 103 UMAX_PRED, 104 UMIN_PRED, 105 106 // Unpredicated vector instructions 107 BIC, 108 109 SRAD_MERGE_OP1, 110 111 // Predicated instructions with the result of inactive lanes provided by the 112 // last operand. 113 FABS_MERGE_PASSTHRU, 114 FCEIL_MERGE_PASSTHRU, 115 FFLOOR_MERGE_PASSTHRU, 116 FNEARBYINT_MERGE_PASSTHRU, 117 FNEG_MERGE_PASSTHRU, 118 FRECPX_MERGE_PASSTHRU, 119 FRINT_MERGE_PASSTHRU, 120 FROUND_MERGE_PASSTHRU, 121 FROUNDEVEN_MERGE_PASSTHRU, 122 FSQRT_MERGE_PASSTHRU, 123 FTRUNC_MERGE_PASSTHRU, 124 FP_ROUND_MERGE_PASSTHRU, 125 FP_EXTEND_MERGE_PASSTHRU, 126 UINT_TO_FP_MERGE_PASSTHRU, 127 SINT_TO_FP_MERGE_PASSTHRU, 128 FCVTZU_MERGE_PASSTHRU, 129 FCVTZS_MERGE_PASSTHRU, 130 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 131 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 132 ABS_MERGE_PASSTHRU, 133 NEG_MERGE_PASSTHRU, 134 135 SETCC_MERGE_ZERO, 136 137 // Arithmetic instructions which write flags. 138 ADDS, 139 SUBS, 140 ADCS, 141 SBCS, 142 ANDS, 143 144 // Conditional compares. Operands: left,right,falsecc,cc,flags 145 CCMP, 146 CCMN, 147 FCCMP, 148 149 // Floating point comparison 150 FCMP, 151 152 // Scalar extract 153 EXTR, 154 155 // Scalar-to-vector duplication 156 DUP, 157 DUPLANE8, 158 DUPLANE16, 159 DUPLANE32, 160 DUPLANE64, 161 162 // Vector immedate moves 163 MOVI, 164 MOVIshift, 165 MOVIedit, 166 MOVImsl, 167 FMOV, 168 MVNIshift, 169 MVNImsl, 170 171 // Vector immediate ops 172 BICi, 173 ORRi, 174 175 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 176 // element must be identical. 177 BSP, 178 179 // Vector shuffles 180 ZIP1, 181 ZIP2, 182 UZP1, 183 UZP2, 184 TRN1, 185 TRN2, 186 REV16, 187 REV32, 188 REV64, 189 EXT, 190 SPLICE, 191 192 // Vector shift by scalar 193 VSHL, 194 VLSHR, 195 VASHR, 196 197 // Vector shift by scalar (again) 198 SQSHL_I, 199 UQSHL_I, 200 SQSHLU_I, 201 SRSHR_I, 202 URSHR_I, 203 204 // Vector shift by constant and insert 205 VSLI, 206 VSRI, 207 208 // Vector comparisons 209 CMEQ, 210 CMGE, 211 CMGT, 212 CMHI, 213 CMHS, 214 FCMEQ, 215 FCMGE, 216 FCMGT, 217 218 // Vector zero comparisons 219 CMEQz, 220 CMGEz, 221 CMGTz, 222 CMLEz, 223 CMLTz, 224 FCMEQz, 225 FCMGEz, 226 FCMGTz, 227 FCMLEz, 228 FCMLTz, 229 230 // Vector across-lanes addition 231 // Only the lower result lane is defined. 232 SADDV, 233 UADDV, 234 235 // Vector halving addition 236 SHADD, 237 UHADD, 238 239 // Vector rounding halving addition 240 SRHADD, 241 URHADD, 242 243 // Unsigned Add Long Pairwise 244 UADDLP, 245 246 // udot/sdot instructions 247 UDOT, 248 SDOT, 249 250 // Vector across-lanes min/max 251 // Only the lower result lane is defined. 252 SMINV, 253 UMINV, 254 SMAXV, 255 UMAXV, 256 257 SADDV_PRED, 258 UADDV_PRED, 259 SMAXV_PRED, 260 UMAXV_PRED, 261 SMINV_PRED, 262 UMINV_PRED, 263 ORV_PRED, 264 EORV_PRED, 265 ANDV_PRED, 266 267 // Vector bitwise insertion 268 BIT, 269 270 // Compare-and-branch 271 CBZ, 272 CBNZ, 273 TBZ, 274 TBNZ, 275 276 // Tail calls 277 TC_RETURN, 278 279 // Custom prefetch handling 280 PREFETCH, 281 282 // {s|u}int to FP within a FP register. 283 SITOF, 284 UITOF, 285 286 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 287 /// world w.r.t vectors; which causes additional REV instructions to be 288 /// generated to compensate for the byte-swapping. But sometimes we do 289 /// need to re-interpret the data in SIMD vector registers in big-endian 290 /// mode without emitting such REV instructions. 291 NVCAST, 292 293 MRS, // MRS, also sets the flags via a glue. 294 295 SMULL, 296 UMULL, 297 298 // Reciprocal estimates and steps. 299 FRECPE, 300 FRECPS, 301 FRSQRTE, 302 FRSQRTS, 303 304 SUNPKHI, 305 SUNPKLO, 306 UUNPKHI, 307 UUNPKLO, 308 309 CLASTA_N, 310 CLASTB_N, 311 LASTA, 312 LASTB, 313 TBL, 314 315 // Floating-point reductions. 316 FADDA_PRED, 317 FADDV_PRED, 318 FMAXV_PRED, 319 FMAXNMV_PRED, 320 FMINV_PRED, 321 FMINNMV_PRED, 322 323 INSR, 324 PTEST, 325 PTRUE, 326 327 BITREVERSE_MERGE_PASSTHRU, 328 BSWAP_MERGE_PASSTHRU, 329 REVH_MERGE_PASSTHRU, 330 REVW_MERGE_PASSTHRU, 331 CTLZ_MERGE_PASSTHRU, 332 CTPOP_MERGE_PASSTHRU, 333 DUP_MERGE_PASSTHRU, 334 INDEX_VECTOR, 335 336 // Cast between vectors of the same element type but differ in length. 337 REINTERPRET_CAST, 338 339 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 340 LS64_BUILD, 341 LS64_EXTRACT, 342 343 LD1_MERGE_ZERO, 344 LD1S_MERGE_ZERO, 345 LDNF1_MERGE_ZERO, 346 LDNF1S_MERGE_ZERO, 347 LDFF1_MERGE_ZERO, 348 LDFF1S_MERGE_ZERO, 349 LD1RQ_MERGE_ZERO, 350 LD1RO_MERGE_ZERO, 351 352 // Structured loads. 353 SVE_LD2_MERGE_ZERO, 354 SVE_LD3_MERGE_ZERO, 355 SVE_LD4_MERGE_ZERO, 356 357 // Unsigned gather loads. 358 GLD1_MERGE_ZERO, 359 GLD1_SCALED_MERGE_ZERO, 360 GLD1_UXTW_MERGE_ZERO, 361 GLD1_SXTW_MERGE_ZERO, 362 GLD1_UXTW_SCALED_MERGE_ZERO, 363 GLD1_SXTW_SCALED_MERGE_ZERO, 364 GLD1_IMM_MERGE_ZERO, 365 366 // Signed gather loads 367 GLD1S_MERGE_ZERO, 368 GLD1S_SCALED_MERGE_ZERO, 369 GLD1S_UXTW_MERGE_ZERO, 370 GLD1S_SXTW_MERGE_ZERO, 371 GLD1S_UXTW_SCALED_MERGE_ZERO, 372 GLD1S_SXTW_SCALED_MERGE_ZERO, 373 GLD1S_IMM_MERGE_ZERO, 374 375 // Unsigned gather loads. 376 GLDFF1_MERGE_ZERO, 377 GLDFF1_SCALED_MERGE_ZERO, 378 GLDFF1_UXTW_MERGE_ZERO, 379 GLDFF1_SXTW_MERGE_ZERO, 380 GLDFF1_UXTW_SCALED_MERGE_ZERO, 381 GLDFF1_SXTW_SCALED_MERGE_ZERO, 382 GLDFF1_IMM_MERGE_ZERO, 383 384 // Signed gather loads. 385 GLDFF1S_MERGE_ZERO, 386 GLDFF1S_SCALED_MERGE_ZERO, 387 GLDFF1S_UXTW_MERGE_ZERO, 388 GLDFF1S_SXTW_MERGE_ZERO, 389 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 390 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 391 GLDFF1S_IMM_MERGE_ZERO, 392 393 // Non-temporal gather loads 394 GLDNT1_MERGE_ZERO, 395 GLDNT1_INDEX_MERGE_ZERO, 396 GLDNT1S_MERGE_ZERO, 397 398 // Contiguous masked store. 399 ST1_PRED, 400 401 // Scatter store 402 SST1_PRED, 403 SST1_SCALED_PRED, 404 SST1_UXTW_PRED, 405 SST1_SXTW_PRED, 406 SST1_UXTW_SCALED_PRED, 407 SST1_SXTW_SCALED_PRED, 408 SST1_IMM_PRED, 409 410 // Non-temporal scatter store 411 SSTNT1_PRED, 412 SSTNT1_INDEX_PRED, 413 414 // Asserts that a function argument (i32) is zero-extended to i8 by 415 // the caller 416 ASSERT_ZEXT_BOOL, 417 418 // Strict (exception-raising) floating point comparison 419 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 420 STRICT_FCMPE, 421 422 // NEON Load/Store with post-increment base updates 423 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 424 LD3post, 425 LD4post, 426 ST2post, 427 ST3post, 428 ST4post, 429 LD1x2post, 430 LD1x3post, 431 LD1x4post, 432 ST1x2post, 433 ST1x3post, 434 ST1x4post, 435 LD1DUPpost, 436 LD2DUPpost, 437 LD3DUPpost, 438 LD4DUPpost, 439 LD1LANEpost, 440 LD2LANEpost, 441 LD3LANEpost, 442 LD4LANEpost, 443 ST2LANEpost, 444 ST3LANEpost, 445 ST4LANEpost, 446 447 STG, 448 STZG, 449 ST2G, 450 STZ2G, 451 452 LDP, 453 STP, 454 STNP, 455 456 // Memory Operations 457 MOPS_MEMSET, 458 MOPS_MEMSET_TAGGING, 459 MOPS_MEMCOPY, 460 MOPS_MEMMOVE, 461 }; 462 463 } // end namespace AArch64ISD 464 465 namespace { 466 467 // Any instruction that defines a 32-bit result zeros out the high half of the 468 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 469 // be copying from a truncate. But any other 32-bit operation will zero-extend 470 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper 471 // 32 bits, they're probably just qualifying a CopyFromReg. 472 static inline bool isDef32(const SDNode &N) { 473 unsigned Opc = N.getOpcode(); 474 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 475 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 476 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 477 Opc != ISD::FREEZE; 478 } 479 480 } // end anonymous namespace 481 482 namespace AArch64 { 483 /// Possible values of current rounding mode, which is specified in bits 484 /// 23:22 of FPCR. 485 enum Rounding { 486 RN = 0, // Round to Nearest 487 RP = 1, // Round towards Plus infinity 488 RM = 2, // Round towards Minus infinity 489 RZ = 3, // Round towards Zero 490 rmMask = 3 // Bit mask selecting rounding mode 491 }; 492 493 // Bit position of rounding mode bits in FPCR. 494 const unsigned RoundingBitsPos = 22; 495 } // namespace AArch64 496 497 class AArch64Subtarget; 498 499 class AArch64TargetLowering : public TargetLowering { 500 public: 501 explicit AArch64TargetLowering(const TargetMachine &TM, 502 const AArch64Subtarget &STI); 503 504 /// Selects the correct CCAssignFn for a given CallingConvention value. 505 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 506 507 /// Selects the correct CCAssignFn for a given CallingConvention value. 508 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 509 510 /// Determine which of the bits specified in Mask are known to be either zero 511 /// or one and return them in the KnownZero/KnownOne bitsets. 512 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 513 const APInt &DemandedElts, 514 const SelectionDAG &DAG, 515 unsigned Depth = 0) const override; 516 517 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 518 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 519 // *DAG* representation of pointers will always be 64-bits. They will be 520 // truncated and extended when transferred to memory, but the 64-bit DAG 521 // allows us to use AArch64's addressing modes much more easily. 522 return MVT::getIntegerVT(64); 523 } 524 525 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 526 const APInt &DemandedElts, 527 TargetLoweringOpt &TLO) const override; 528 529 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 530 531 /// Returns true if the target allows unaligned memory accesses of the 532 /// specified type. 533 bool allowsMisalignedMemoryAccesses( 534 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 535 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 536 bool *Fast = nullptr) const override; 537 /// LLT variant. 538 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 539 Align Alignment, 540 MachineMemOperand::Flags Flags, 541 bool *Fast = nullptr) const override; 542 543 /// Provide custom lowering hooks for some operations. 544 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 545 546 const char *getTargetNodeName(unsigned Opcode) const override; 547 548 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 549 550 /// This method returns a target specific FastISel object, or null if the 551 /// target does not support "fast" ISel. 552 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 553 const TargetLibraryInfo *libInfo) const override; 554 555 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 556 557 bool isFPImmLegal(const APFloat &Imm, EVT VT, 558 bool ForCodeSize) const override; 559 560 /// Return true if the given shuffle mask can be codegen'd directly, or if it 561 /// should be stack expanded. 562 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 563 564 /// Return the ISD::SETCC ValueType. 565 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 566 EVT VT) const override; 567 568 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 569 570 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 571 MachineBasicBlock *BB) const; 572 573 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 574 MachineBasicBlock *BB) const; 575 576 MachineBasicBlock * 577 EmitInstrWithCustomInserter(MachineInstr &MI, 578 MachineBasicBlock *MBB) const override; 579 580 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 581 MachineFunction &MF, 582 unsigned Intrinsic) const override; 583 584 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 585 EVT NewVT) const override; 586 587 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 588 bool isTruncateFree(EVT VT1, EVT VT2) const override; 589 590 bool isProfitableToHoist(Instruction *I) const override; 591 592 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 593 bool isZExtFree(EVT VT1, EVT VT2) const override; 594 bool isZExtFree(SDValue Val, EVT VT2) const override; 595 596 bool shouldSinkOperands(Instruction *I, 597 SmallVectorImpl<Use *> &Ops) const override; 598 599 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 600 601 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 602 603 bool lowerInterleavedLoad(LoadInst *LI, 604 ArrayRef<ShuffleVectorInst *> Shuffles, 605 ArrayRef<unsigned> Indices, 606 unsigned Factor) const override; 607 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 608 unsigned Factor) const override; 609 610 bool isLegalAddImmediate(int64_t) const override; 611 bool isLegalICmpImmediate(int64_t) const override; 612 613 bool isMulAddWithConstProfitable(const SDValue &AddNode, 614 const SDValue &ConstNode) const override; 615 616 bool shouldConsiderGEPOffsetSplit() const override; 617 618 EVT getOptimalMemOpType(const MemOp &Op, 619 const AttributeList &FuncAttributes) const override; 620 621 LLT getOptimalMemOpLLT(const MemOp &Op, 622 const AttributeList &FuncAttributes) const override; 623 624 /// Return true if the addressing mode represented by AM is legal for this 625 /// target, for a load/store of the specified type. 626 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 627 unsigned AS, 628 Instruction *I = nullptr) const override; 629 630 /// Return the cost of the scaling factor used in the addressing 631 /// mode represented by AM for this target, for a load/store 632 /// of the specified type. 633 /// If the AM is supported, the return value must be >= 0. 634 /// If the AM is not supported, it returns a negative value. 635 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, 636 Type *Ty, unsigned AS) const override; 637 638 /// Return true if an FMA operation is faster than a pair of fmul and fadd 639 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 640 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 641 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 642 EVT VT) const override; 643 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 644 645 bool generateFMAsInMachineCombiner(EVT VT, 646 CodeGenOpt::Level OptLevel) const override; 647 648 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 649 650 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 651 bool isDesirableToCommuteWithShift(const SDNode *N, 652 CombineLevel Level) const override; 653 654 /// Returns true if it is beneficial to convert a load of a constant 655 /// to just the constant itself. 656 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 657 Type *Ty) const override; 658 659 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 660 /// with this index. 661 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 662 unsigned Index) const override; 663 664 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 665 bool MathUsed) const override { 666 // Using overflow ops for overflow checks only should beneficial on 667 // AArch64. 668 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 669 } 670 671 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 672 AtomicOrdering Ord) const override; 673 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 674 AtomicOrdering Ord) const override; 675 676 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 677 678 bool isOpSuitableForLDPSTP(const Instruction *I) const; 679 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 680 681 TargetLoweringBase::AtomicExpansionKind 682 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 683 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 684 TargetLoweringBase::AtomicExpansionKind 685 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 686 687 TargetLoweringBase::AtomicExpansionKind 688 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 689 690 bool useLoadStackGuardNode() const override; 691 TargetLoweringBase::LegalizeTypeAction 692 getPreferredVectorAction(MVT VT) const override; 693 694 /// If the target has a standard location for the stack protector cookie, 695 /// returns the address of that location. Otherwise, returns nullptr. 696 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 697 698 void insertSSPDeclarations(Module &M) const override; 699 Value *getSDagStackGuard(const Module &M) const override; 700 Function *getSSPStackGuardCheck(const Module &M) const override; 701 702 /// If the target has a standard location for the unsafe stack pointer, 703 /// returns the address of that location. Otherwise, returns nullptr. 704 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 705 706 /// If a physical register, this returns the register that receives the 707 /// exception address on entry to an EH pad. 708 Register 709 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 710 // FIXME: This is a guess. Has this been defined yet? 711 return AArch64::X0; 712 } 713 714 /// If a physical register, this returns the register that receives the 715 /// exception typeid on entry to a landing pad. 716 Register 717 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 718 // FIXME: This is a guess. Has this been defined yet? 719 return AArch64::X1; 720 } 721 722 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 723 724 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 725 const MachineFunction &MF) const override { 726 // Do not merge to float value size (128 bytes) if no implicit 727 // float attribute is set. 728 729 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 730 731 if (NoFloat) 732 return (MemVT.getSizeInBits() <= 64); 733 return true; 734 } 735 736 bool isCheapToSpeculateCttz() const override { 737 return true; 738 } 739 740 bool isCheapToSpeculateCtlz() const override { 741 return true; 742 } 743 744 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 745 746 bool hasAndNotCompare(SDValue V) const override { 747 // We can use bics for any scalar. 748 return V.getValueType().isScalarInteger(); 749 } 750 751 bool hasAndNot(SDValue Y) const override { 752 EVT VT = Y.getValueType(); 753 754 if (!VT.isVector()) 755 return hasAndNotCompare(Y); 756 757 TypeSize TS = VT.getSizeInBits(); 758 // TODO: We should be able to use bic/bif too for SVE. 759 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 760 } 761 762 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 763 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 764 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 765 SelectionDAG &DAG) const override; 766 767 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 768 769 bool shouldTransformSignedTruncationCheck(EVT XVT, 770 unsigned KeptBits) const override { 771 // For vectors, we don't have a preference.. 772 if (XVT.isVector()) 773 return false; 774 775 auto VTIsOk = [](EVT VT) -> bool { 776 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 777 VT == MVT::i64; 778 }; 779 780 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 781 // XVT will be larger than KeptBitsVT. 782 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 783 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 784 } 785 786 bool preferIncOfAddToSubOfNot(EVT VT) const override; 787 788 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 789 790 bool hasBitPreservingFPLogic(EVT VT) const override { 791 // FIXME: Is this always true? It should be true for vectors at least. 792 return VT == MVT::f32 || VT == MVT::f64; 793 } 794 795 bool supportSplitCSR(MachineFunction *MF) const override { 796 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 797 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 798 } 799 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 800 void insertCopiesSplitCSR( 801 MachineBasicBlock *Entry, 802 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 803 804 bool supportSwiftError() const override { 805 return true; 806 } 807 808 /// Enable aggressive FMA fusion on targets that want it. 809 bool enableAggressiveFMAFusion(EVT VT) const override; 810 811 /// Returns the size of the platform's va_list object. 812 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 813 814 /// Returns true if \p VecTy is a legal interleaved access type. This 815 /// function checks the vector element type and the overall width of the 816 /// vector. 817 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 818 bool &UseScalable) const; 819 820 /// Returns the number of interleaved accesses that will be generated when 821 /// lowering accesses of the given type. 822 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 823 bool UseScalable) const; 824 825 MachineMemOperand::Flags getTargetMMOFlags( 826 const Instruction &I) const override; 827 828 bool functionArgumentNeedsConsecutiveRegisters( 829 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 830 const DataLayout &DL) const override; 831 832 /// Used for exception handling on Win64. 833 bool needsFixedCatchObjects() const override; 834 835 bool fallBackToDAGISel(const Instruction &Inst) const override; 836 837 /// SVE code generation for fixed length vectors does not custom lower 838 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 839 /// merge. However, merging them creates a BUILD_VECTOR that is just as 840 /// illegal as the original, thus leading to an infinite legalisation loop. 841 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 842 /// vector types this override can be removed. 843 bool mergeStoresAfterLegalization(EVT VT) const override; 844 845 // If the platform/function should have a redzone, return the size in bytes. 846 unsigned getRedZoneSize(const Function &F) const { 847 if (F.hasFnAttribute(Attribute::NoRedZone)) 848 return 0; 849 return 128; 850 } 851 852 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 853 EVT getPromotedVTForPredicate(EVT VT) const; 854 855 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 856 bool AllowUnknown = false) const override; 857 858 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 859 860 private: 861 /// Keep a pointer to the AArch64Subtarget around so that we can 862 /// make the right decision when generating code for different targets. 863 const AArch64Subtarget *Subtarget; 864 865 bool isExtFreeImpl(const Instruction *Ext) const override; 866 867 void addTypeForNEON(MVT VT); 868 void addTypeForFixedLengthSVE(MVT VT); 869 void addDRTypeForNEON(MVT VT); 870 void addQRTypeForNEON(MVT VT); 871 872 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 873 bool isVarArg, 874 const SmallVectorImpl<ISD::InputArg> &Ins, 875 const SDLoc &DL, SelectionDAG &DAG, 876 SmallVectorImpl<SDValue> &InVals) const override; 877 878 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 879 SmallVectorImpl<SDValue> &InVals) const override; 880 881 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 882 CallingConv::ID CallConv, bool isVarArg, 883 const SmallVectorImpl<ISD::InputArg> &Ins, 884 const SDLoc &DL, SelectionDAG &DAG, 885 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 886 SDValue ThisVal) const; 887 888 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 889 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 890 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 891 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 892 893 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 894 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 895 896 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 897 898 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 899 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 900 901 bool isEligibleForTailCallOptimization( 902 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 903 const SmallVectorImpl<ISD::OutputArg> &Outs, 904 const SmallVectorImpl<SDValue> &OutVals, 905 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 906 907 /// Finds the incoming stack arguments which overlap the given fixed stack 908 /// object and incorporates their load into the current chain. This prevents 909 /// an upcoming store from clobbering the stack argument before it's used. 910 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 911 MachineFrameInfo &MFI, int ClobberedFI) const; 912 913 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 914 915 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 916 SDValue &Chain) const; 917 918 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 919 bool isVarArg, 920 const SmallVectorImpl<ISD::OutputArg> &Outs, 921 LLVMContext &Context) const override; 922 923 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 924 const SmallVectorImpl<ISD::OutputArg> &Outs, 925 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 926 SelectionDAG &DAG) const override; 927 928 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 929 unsigned Flag) const; 930 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 931 unsigned Flag) const; 932 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 933 unsigned Flag) const; 934 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 935 unsigned Flag) const; 936 template <class NodeTy> 937 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 938 template <class NodeTy> 939 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 940 template <class NodeTy> 941 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 942 template <class NodeTy> 943 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 944 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 945 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 946 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 947 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 948 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 949 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 950 const SDLoc &DL, SelectionDAG &DAG) const; 951 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 952 SelectionDAG &DAG) const; 953 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 955 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 956 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 957 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 958 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 959 SDValue TVal, SDValue FVal, const SDLoc &dl, 960 SelectionDAG &DAG) const; 961 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 962 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 963 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 964 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 965 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 966 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 967 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 968 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 969 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 970 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 971 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 972 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 973 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 974 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 975 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 976 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 977 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 978 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 979 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 980 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 981 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 982 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 983 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, 984 bool OverrideNEON = false) const; 985 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 986 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 987 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 988 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 989 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 990 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 991 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 992 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 993 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 994 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 995 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 996 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 997 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 998 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 999 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1000 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1001 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1002 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1003 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1004 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1005 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1011 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1012 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1013 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1015 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 1016 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1018 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 1019 SDValue &Size, 1020 SelectionDAG &DAG) const; 1021 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 1022 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 1023 1024 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1025 SelectionDAG &DAG) const; 1026 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1027 SelectionDAG &DAG) const; 1028 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1029 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1030 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1031 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1032 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1033 SelectionDAG &DAG) const; 1034 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1035 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1036 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1037 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1038 SelectionDAG &DAG) const; 1039 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1040 SelectionDAG &DAG) const; 1041 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1043 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1044 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1045 SelectionDAG &DAG) const; 1046 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1048 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1049 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1050 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1051 SelectionDAG &DAG) const; 1052 1053 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1054 SmallVectorImpl<SDNode *> &Created) const override; 1055 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1056 int &ExtraSteps, bool &UseOneConst, 1057 bool Reciprocal) const override; 1058 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1059 int &ExtraSteps) const override; 1060 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1061 const DenormalMode &Mode) const override; 1062 SDValue getSqrtResultForDenormInput(SDValue Operand, 1063 SelectionDAG &DAG) const override; 1064 unsigned combineRepeatedFPDivisors() const override; 1065 1066 ConstraintType getConstraintType(StringRef Constraint) const override; 1067 Register getRegisterByName(const char* RegName, LLT VT, 1068 const MachineFunction &MF) const override; 1069 1070 /// Examine constraint string and operand type and determine a weight value. 1071 /// The operand object must already have been set up with the operand type. 1072 ConstraintWeight 1073 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1074 const char *constraint) const override; 1075 1076 std::pair<unsigned, const TargetRegisterClass *> 1077 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1078 StringRef Constraint, MVT VT) const override; 1079 1080 const char *LowerXConstraint(EVT ConstraintVT) const override; 1081 1082 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1083 std::vector<SDValue> &Ops, 1084 SelectionDAG &DAG) const override; 1085 1086 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1087 if (ConstraintCode == "Q") 1088 return InlineAsm::Constraint_Q; 1089 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1090 // followed by llvm_unreachable so we'll leave them unimplemented in 1091 // the backend for now. 1092 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1093 } 1094 1095 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1096 bool shouldRemoveExtendFromGSIndex(EVT VT) const override; 1097 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1098 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1099 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1100 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 1101 ISD::MemIndexedMode &AM, bool &IsInc, 1102 SelectionDAG &DAG) const; 1103 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1104 ISD::MemIndexedMode &AM, 1105 SelectionDAG &DAG) const override; 1106 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1107 SDValue &Offset, ISD::MemIndexedMode &AM, 1108 SelectionDAG &DAG) const override; 1109 1110 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1111 SelectionDAG &DAG) const override; 1112 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1113 SelectionDAG &DAG) const; 1114 void ReplaceExtractSubVectorResults(SDNode *N, 1115 SmallVectorImpl<SDValue> &Results, 1116 SelectionDAG &DAG) const; 1117 1118 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1119 1120 void finalizeLowering(MachineFunction &MF) const override; 1121 1122 bool shouldLocalize(const MachineInstr &MI, 1123 const TargetTransformInfo *TTI) const override; 1124 1125 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1126 const APInt &OriginalDemandedBits, 1127 const APInt &OriginalDemandedElts, 1128 KnownBits &Known, 1129 TargetLoweringOpt &TLO, 1130 unsigned Depth) const override; 1131 1132 // Normally SVE is only used for byte size vectors that do not fit within a 1133 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1134 // used for 64bit and 128bit vectors as well. 1135 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1136 1137 // With the exception of data-predicate transitions, no instructions are 1138 // required to cast between legal scalable vector types. However: 1139 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1140 // is not universally useable. 1141 // 2. Most unpacked integer types are not legal and thus integer extends 1142 // cannot be used to convert between unpacked and packed types. 1143 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1144 // to transition between unpacked and packed types of the same element type, 1145 // with BITCAST used otherwise. 1146 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1147 1148 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 1149 LLT Ty2) const override; 1150 }; 1151 1152 namespace AArch64 { 1153 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1154 const TargetLibraryInfo *libInfo); 1155 } // end namespace AArch64 1156 1157 } // end namespace llvm 1158 1159 #endif 1160