1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/IR/CallingConv.h" 22 #include "llvm/IR/Instruction.h" 23 24 namespace llvm { 25 26 namespace AArch64ISD { 27 28 // For predicated nodes where the result is a vector, the operation is 29 // controlled by a governing predicate and the inactive lanes are explicitly 30 // defined with a value, please stick the following naming convention: 31 // 32 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 33 // to source operand OP<n>. 34 // 35 // _MERGE_ZERO The result value is a vector with inactive lanes 36 // actively zeroed. 37 // 38 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 39 // to the last source operand which only purpose is being 40 // a passthru value. 41 // 42 // For other cases where no explicit action is needed to set the inactive lanes, 43 // or when the result is not a vector and it is needed or helpful to 44 // distinguish a node from similar unpredicated nodes, use: 45 // 46 // _PRED 47 // 48 enum NodeType : unsigned { 49 FIRST_NUMBER = ISD::BUILTIN_OP_END, 50 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 51 CALL, // Function call. 52 53 // Produces the full sequence of instructions for getting the thread pointer 54 // offset of a variable into X0, using the TLSDesc model. 55 TLSDESC_CALLSEQ, 56 ADRP, // Page address of a TargetGlobalAddress operand. 57 ADR, // ADR 58 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 59 LOADgot, // Load from automatically generated descriptor (e.g. Global 60 // Offset Table, TLS record). 61 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 62 BRCOND, // Conditional branch instruction; "b.cond". 63 CSEL, 64 FCSEL, // Conditional move instruction. 65 CSINV, // Conditional select invert. 66 CSNEG, // Conditional select negate. 67 CSINC, // Conditional select increment. 68 69 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 70 // ELF. 71 THREAD_POINTER, 72 ADC, 73 SBC, // adc, sbc instructions 74 75 // Predicated instructions where inactive lanes produce undefined results. 76 ADD_PRED, 77 FADD_PRED, 78 FDIV_PRED, 79 FMA_PRED, 80 FMAXNM_PRED, 81 FMINNM_PRED, 82 FMUL_PRED, 83 FSUB_PRED, 84 MUL_PRED, 85 SDIV_PRED, 86 SHL_PRED, 87 SMAX_PRED, 88 SMIN_PRED, 89 SRA_PRED, 90 SRL_PRED, 91 SUB_PRED, 92 UDIV_PRED, 93 UMAX_PRED, 94 UMIN_PRED, 95 96 // Predicated instructions with the result of inactive lanes provided by the 97 // last operand. 98 FABS_MERGE_PASSTHRU, 99 FCEIL_MERGE_PASSTHRU, 100 FFLOOR_MERGE_PASSTHRU, 101 FNEARBYINT_MERGE_PASSTHRU, 102 FNEG_MERGE_PASSTHRU, 103 FRECPX_MERGE_PASSTHRU, 104 FRINT_MERGE_PASSTHRU, 105 FROUND_MERGE_PASSTHRU, 106 FROUNDEVEN_MERGE_PASSTHRU, 107 FSQRT_MERGE_PASSTHRU, 108 FTRUNC_MERGE_PASSTHRU, 109 FP_ROUND_MERGE_PASSTHRU, 110 FP_EXTEND_MERGE_PASSTHRU, 111 UINT_TO_FP_MERGE_PASSTHRU, 112 SINT_TO_FP_MERGE_PASSTHRU, 113 FCVTZU_MERGE_PASSTHRU, 114 FCVTZS_MERGE_PASSTHRU, 115 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 116 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 117 ABS_MERGE_PASSTHRU, 118 NEG_MERGE_PASSTHRU, 119 120 SETCC_MERGE_ZERO, 121 122 // Arithmetic instructions which write flags. 123 ADDS, 124 SUBS, 125 ADCS, 126 SBCS, 127 ANDS, 128 129 // Conditional compares. Operands: left,right,falsecc,cc,flags 130 CCMP, 131 CCMN, 132 FCCMP, 133 134 // Floating point comparison 135 FCMP, 136 137 // Scalar extract 138 EXTR, 139 140 // Scalar-to-vector duplication 141 DUP, 142 DUPLANE8, 143 DUPLANE16, 144 DUPLANE32, 145 DUPLANE64, 146 147 // Vector immedate moves 148 MOVI, 149 MOVIshift, 150 MOVIedit, 151 MOVImsl, 152 FMOV, 153 MVNIshift, 154 MVNImsl, 155 156 // Vector immediate ops 157 BICi, 158 ORRi, 159 160 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 161 // element must be identical. 162 BSP, 163 164 // Vector arithmetic negation 165 NEG, 166 167 // Vector shuffles 168 ZIP1, 169 ZIP2, 170 UZP1, 171 UZP2, 172 TRN1, 173 TRN2, 174 REV16, 175 REV32, 176 REV64, 177 EXT, 178 179 // Vector shift by scalar 180 VSHL, 181 VLSHR, 182 VASHR, 183 184 // Vector shift by scalar (again) 185 SQSHL_I, 186 UQSHL_I, 187 SQSHLU_I, 188 SRSHR_I, 189 URSHR_I, 190 191 // Vector shift by constant and insert 192 VSLI, 193 VSRI, 194 195 // Vector comparisons 196 CMEQ, 197 CMGE, 198 CMGT, 199 CMHI, 200 CMHS, 201 FCMEQ, 202 FCMGE, 203 FCMGT, 204 205 // Vector zero comparisons 206 CMEQz, 207 CMGEz, 208 CMGTz, 209 CMLEz, 210 CMLTz, 211 FCMEQz, 212 FCMGEz, 213 FCMGTz, 214 FCMLEz, 215 FCMLTz, 216 217 // Vector across-lanes addition 218 // Only the lower result lane is defined. 219 SADDV, 220 UADDV, 221 222 // Vector halving addition 223 SHADD, 224 UHADD, 225 226 // Vector rounding halving addition 227 SRHADD, 228 URHADD, 229 230 // Absolute difference 231 UABD, 232 SABD, 233 234 // Vector across-lanes min/max 235 // Only the lower result lane is defined. 236 SMINV, 237 UMINV, 238 SMAXV, 239 UMAXV, 240 241 SADDV_PRED, 242 UADDV_PRED, 243 SMAXV_PRED, 244 UMAXV_PRED, 245 SMINV_PRED, 246 UMINV_PRED, 247 ORV_PRED, 248 EORV_PRED, 249 ANDV_PRED, 250 251 // Vector bitwise insertion 252 BIT, 253 254 // Compare-and-branch 255 CBZ, 256 CBNZ, 257 TBZ, 258 TBNZ, 259 260 // Tail calls 261 TC_RETURN, 262 263 // Custom prefetch handling 264 PREFETCH, 265 266 // {s|u}int to FP within a FP register. 267 SITOF, 268 UITOF, 269 270 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 271 /// world w.r.t vectors; which causes additional REV instructions to be 272 /// generated to compensate for the byte-swapping. But sometimes we do 273 /// need to re-interpret the data in SIMD vector registers in big-endian 274 /// mode without emitting such REV instructions. 275 NVCAST, 276 277 SMULL, 278 UMULL, 279 280 // Reciprocal estimates and steps. 281 FRECPE, 282 FRECPS, 283 FRSQRTE, 284 FRSQRTS, 285 286 SUNPKHI, 287 SUNPKLO, 288 UUNPKHI, 289 UUNPKLO, 290 291 CLASTA_N, 292 CLASTB_N, 293 LASTA, 294 LASTB, 295 REV, 296 TBL, 297 298 // Floating-point reductions. 299 FADDA_PRED, 300 FADDV_PRED, 301 FMAXV_PRED, 302 FMAXNMV_PRED, 303 FMINV_PRED, 304 FMINNMV_PRED, 305 306 INSR, 307 PTEST, 308 PTRUE, 309 310 BITREVERSE_MERGE_PASSTHRU, 311 BSWAP_MERGE_PASSTHRU, 312 CTLZ_MERGE_PASSTHRU, 313 CTPOP_MERGE_PASSTHRU, 314 DUP_MERGE_PASSTHRU, 315 INDEX_VECTOR, 316 317 // Cast between vectors of the same element type but differ in length. 318 REINTERPRET_CAST, 319 320 LD1_MERGE_ZERO, 321 LD1S_MERGE_ZERO, 322 LDNF1_MERGE_ZERO, 323 LDNF1S_MERGE_ZERO, 324 LDFF1_MERGE_ZERO, 325 LDFF1S_MERGE_ZERO, 326 LD1RQ_MERGE_ZERO, 327 LD1RO_MERGE_ZERO, 328 329 // Structured loads. 330 SVE_LD2_MERGE_ZERO, 331 SVE_LD3_MERGE_ZERO, 332 SVE_LD4_MERGE_ZERO, 333 334 // Unsigned gather loads. 335 GLD1_MERGE_ZERO, 336 GLD1_SCALED_MERGE_ZERO, 337 GLD1_UXTW_MERGE_ZERO, 338 GLD1_SXTW_MERGE_ZERO, 339 GLD1_UXTW_SCALED_MERGE_ZERO, 340 GLD1_SXTW_SCALED_MERGE_ZERO, 341 GLD1_IMM_MERGE_ZERO, 342 343 // Signed gather loads 344 GLD1S_MERGE_ZERO, 345 GLD1S_SCALED_MERGE_ZERO, 346 GLD1S_UXTW_MERGE_ZERO, 347 GLD1S_SXTW_MERGE_ZERO, 348 GLD1S_UXTW_SCALED_MERGE_ZERO, 349 GLD1S_SXTW_SCALED_MERGE_ZERO, 350 GLD1S_IMM_MERGE_ZERO, 351 352 // Unsigned gather loads. 353 GLDFF1_MERGE_ZERO, 354 GLDFF1_SCALED_MERGE_ZERO, 355 GLDFF1_UXTW_MERGE_ZERO, 356 GLDFF1_SXTW_MERGE_ZERO, 357 GLDFF1_UXTW_SCALED_MERGE_ZERO, 358 GLDFF1_SXTW_SCALED_MERGE_ZERO, 359 GLDFF1_IMM_MERGE_ZERO, 360 361 // Signed gather loads. 362 GLDFF1S_MERGE_ZERO, 363 GLDFF1S_SCALED_MERGE_ZERO, 364 GLDFF1S_UXTW_MERGE_ZERO, 365 GLDFF1S_SXTW_MERGE_ZERO, 366 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 367 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 368 GLDFF1S_IMM_MERGE_ZERO, 369 370 // Non-temporal gather loads 371 GLDNT1_MERGE_ZERO, 372 GLDNT1_INDEX_MERGE_ZERO, 373 GLDNT1S_MERGE_ZERO, 374 375 // Contiguous masked store. 376 ST1_PRED, 377 378 // Scatter store 379 SST1_PRED, 380 SST1_SCALED_PRED, 381 SST1_UXTW_PRED, 382 SST1_SXTW_PRED, 383 SST1_UXTW_SCALED_PRED, 384 SST1_SXTW_SCALED_PRED, 385 SST1_IMM_PRED, 386 387 // Non-temporal scatter store 388 SSTNT1_PRED, 389 SSTNT1_INDEX_PRED, 390 391 // Strict (exception-raising) floating point comparison 392 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 393 STRICT_FCMPE, 394 395 // NEON Load/Store with post-increment base updates 396 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 397 LD3post, 398 LD4post, 399 ST2post, 400 ST3post, 401 ST4post, 402 LD1x2post, 403 LD1x3post, 404 LD1x4post, 405 ST1x2post, 406 ST1x3post, 407 ST1x4post, 408 LD1DUPpost, 409 LD2DUPpost, 410 LD3DUPpost, 411 LD4DUPpost, 412 LD1LANEpost, 413 LD2LANEpost, 414 LD3LANEpost, 415 LD4LANEpost, 416 ST2LANEpost, 417 ST3LANEpost, 418 ST4LANEpost, 419 420 STG, 421 STZG, 422 ST2G, 423 STZ2G, 424 425 LDP, 426 STP, 427 STNP, 428 429 // Pseudo for a OBJC call that gets emitted together with a special `mov 430 // x29, x29` marker instruction. 431 CALL_RVMARKER 432 }; 433 434 } // end namespace AArch64ISD 435 436 namespace { 437 438 // Any instruction that defines a 32-bit result zeros out the high half of the 439 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 440 // be copying from a truncate. But any other 32-bit operation will zero-extend 441 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper 442 // 32 bits, they're probably just qualifying a CopyFromReg. 443 // FIXME: X86 also checks for CMOV here. Do we need something similar? 444 static inline bool isDef32(const SDNode &N) { 445 unsigned Opc = N.getOpcode(); 446 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 447 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 448 Opc != ISD::AssertZext; 449 } 450 451 } // end anonymous namespace 452 453 class AArch64Subtarget; 454 class AArch64TargetMachine; 455 456 class AArch64TargetLowering : public TargetLowering { 457 public: 458 explicit AArch64TargetLowering(const TargetMachine &TM, 459 const AArch64Subtarget &STI); 460 461 /// Selects the correct CCAssignFn for a given CallingConvention value. 462 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 463 464 /// Selects the correct CCAssignFn for a given CallingConvention value. 465 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 466 467 /// Determine which of the bits specified in Mask are known to be either zero 468 /// or one and return them in the KnownZero/KnownOne bitsets. 469 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 470 const APInt &DemandedElts, 471 const SelectionDAG &DAG, 472 unsigned Depth = 0) const override; 473 474 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 475 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 476 // *DAG* representation of pointers will always be 64-bits. They will be 477 // truncated and extended when transferred to memory, but the 64-bit DAG 478 // allows us to use AArch64's addressing modes much more easily. 479 return MVT::getIntegerVT(64); 480 } 481 482 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 483 const APInt &DemandedElts, 484 TargetLoweringOpt &TLO) const override; 485 486 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 487 488 /// Returns true if the target allows unaligned memory accesses of the 489 /// specified type. 490 bool allowsMisalignedMemoryAccesses( 491 EVT VT, unsigned AddrSpace = 0, unsigned Align = 1, 492 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 493 bool *Fast = nullptr) const override; 494 /// LLT variant. 495 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 496 Align Alignment, 497 MachineMemOperand::Flags Flags, 498 bool *Fast = nullptr) const override; 499 500 /// Provide custom lowering hooks for some operations. 501 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 502 503 const char *getTargetNodeName(unsigned Opcode) const override; 504 505 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 506 507 /// This method returns a target specific FastISel object, or null if the 508 /// target does not support "fast" ISel. 509 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 510 const TargetLibraryInfo *libInfo) const override; 511 512 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 513 514 bool isFPImmLegal(const APFloat &Imm, EVT VT, 515 bool ForCodeSize) const override; 516 517 /// Return true if the given shuffle mask can be codegen'd directly, or if it 518 /// should be stack expanded. 519 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 520 521 /// Return the ISD::SETCC ValueType. 522 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 523 EVT VT) const override; 524 525 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 526 527 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 528 MachineBasicBlock *BB) const; 529 530 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 531 MachineBasicBlock *BB) const; 532 533 MachineBasicBlock * 534 EmitInstrWithCustomInserter(MachineInstr &MI, 535 MachineBasicBlock *MBB) const override; 536 537 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 538 MachineFunction &MF, 539 unsigned Intrinsic) const override; 540 541 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 542 EVT NewVT) const override; 543 544 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 545 bool isTruncateFree(EVT VT1, EVT VT2) const override; 546 547 bool isProfitableToHoist(Instruction *I) const override; 548 549 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 550 bool isZExtFree(EVT VT1, EVT VT2) const override; 551 bool isZExtFree(SDValue Val, EVT VT2) const override; 552 553 bool shouldSinkOperands(Instruction *I, 554 SmallVectorImpl<Use *> &Ops) const override; 555 556 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 557 558 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 559 560 bool lowerInterleavedLoad(LoadInst *LI, 561 ArrayRef<ShuffleVectorInst *> Shuffles, 562 ArrayRef<unsigned> Indices, 563 unsigned Factor) const override; 564 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 565 unsigned Factor) const override; 566 567 bool isLegalAddImmediate(int64_t) const override; 568 bool isLegalICmpImmediate(int64_t) const override; 569 570 bool shouldConsiderGEPOffsetSplit() const override; 571 572 EVT getOptimalMemOpType(const MemOp &Op, 573 const AttributeList &FuncAttributes) const override; 574 575 LLT getOptimalMemOpLLT(const MemOp &Op, 576 const AttributeList &FuncAttributes) const override; 577 578 /// Return true if the addressing mode represented by AM is legal for this 579 /// target, for a load/store of the specified type. 580 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 581 unsigned AS, 582 Instruction *I = nullptr) const override; 583 584 /// Return the cost of the scaling factor used in the addressing 585 /// mode represented by AM for this target, for a load/store 586 /// of the specified type. 587 /// If the AM is supported, the return value must be >= 0. 588 /// If the AM is not supported, it returns a negative value. 589 int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, 590 unsigned AS) const override; 591 592 /// Return true if an FMA operation is faster than a pair of fmul and fadd 593 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 594 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 595 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 596 EVT VT) const override; 597 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 598 599 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 600 601 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 602 bool isDesirableToCommuteWithShift(const SDNode *N, 603 CombineLevel Level) const override; 604 605 /// Returns true if it is beneficial to convert a load of a constant 606 /// to just the constant itself. 607 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 608 Type *Ty) const override; 609 610 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 611 /// with this index. 612 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 613 unsigned Index) const override; 614 615 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 616 bool MathUsed) const override { 617 // Using overflow ops for overflow checks only should beneficial on 618 // AArch64. 619 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 620 } 621 622 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, 623 AtomicOrdering Ord) const override; 624 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, 625 Value *Addr, AtomicOrdering Ord) const override; 626 627 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; 628 629 TargetLoweringBase::AtomicExpansionKind 630 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 631 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 632 TargetLoweringBase::AtomicExpansionKind 633 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 634 635 TargetLoweringBase::AtomicExpansionKind 636 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 637 638 bool useLoadStackGuardNode() const override; 639 TargetLoweringBase::LegalizeTypeAction 640 getPreferredVectorAction(MVT VT) const override; 641 642 /// If the target has a standard location for the stack protector cookie, 643 /// returns the address of that location. Otherwise, returns nullptr. 644 Value *getIRStackGuard(IRBuilder<> &IRB) const override; 645 646 void insertSSPDeclarations(Module &M) const override; 647 Value *getSDagStackGuard(const Module &M) const override; 648 Function *getSSPStackGuardCheck(const Module &M) const override; 649 650 /// If the target has a standard location for the unsafe stack pointer, 651 /// returns the address of that location. Otherwise, returns nullptr. 652 Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override; 653 654 /// If a physical register, this returns the register that receives the 655 /// exception address on entry to an EH pad. 656 Register 657 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 658 // FIXME: This is a guess. Has this been defined yet? 659 return AArch64::X0; 660 } 661 662 /// If a physical register, this returns the register that receives the 663 /// exception typeid on entry to a landing pad. 664 Register 665 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 666 // FIXME: This is a guess. Has this been defined yet? 667 return AArch64::X1; 668 } 669 670 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 671 672 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 673 const SelectionDAG &DAG) const override { 674 // Do not merge to float value size (128 bytes) if no implicit 675 // float attribute is set. 676 677 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( 678 Attribute::NoImplicitFloat); 679 680 if (NoFloat) 681 return (MemVT.getSizeInBits() <= 64); 682 return true; 683 } 684 685 bool isCheapToSpeculateCttz() const override { 686 return true; 687 } 688 689 bool isCheapToSpeculateCtlz() const override { 690 return true; 691 } 692 693 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 694 695 bool hasAndNotCompare(SDValue V) const override { 696 // We can use bics for any scalar. 697 return V.getValueType().isScalarInteger(); 698 } 699 700 bool hasAndNot(SDValue Y) const override { 701 EVT VT = Y.getValueType(); 702 703 if (!VT.isVector()) 704 return hasAndNotCompare(Y); 705 706 return VT.getSizeInBits() >= 64; // vector 'bic' 707 } 708 709 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 710 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 711 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 712 SelectionDAG &DAG) const override; 713 714 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 715 716 bool shouldTransformSignedTruncationCheck(EVT XVT, 717 unsigned KeptBits) const override { 718 // For vectors, we don't have a preference.. 719 if (XVT.isVector()) 720 return false; 721 722 auto VTIsOk = [](EVT VT) -> bool { 723 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 724 VT == MVT::i64; 725 }; 726 727 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 728 // XVT will be larger than KeptBitsVT. 729 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 730 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 731 } 732 733 bool preferIncOfAddToSubOfNot(EVT VT) const override; 734 735 bool hasBitPreservingFPLogic(EVT VT) const override { 736 // FIXME: Is this always true? It should be true for vectors at least. 737 return VT == MVT::f32 || VT == MVT::f64; 738 } 739 740 bool supportSplitCSR(MachineFunction *MF) const override { 741 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 742 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 743 } 744 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 745 void insertCopiesSplitCSR( 746 MachineBasicBlock *Entry, 747 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 748 749 bool supportSwiftError() const override { 750 return true; 751 } 752 753 /// Enable aggressive FMA fusion on targets that want it. 754 bool enableAggressiveFMAFusion(EVT VT) const override; 755 756 /// Returns the size of the platform's va_list object. 757 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 758 759 /// Returns true if \p VecTy is a legal interleaved access type. This 760 /// function checks the vector element type and the overall width of the 761 /// vector. 762 bool isLegalInterleavedAccessType(VectorType *VecTy, 763 const DataLayout &DL) const; 764 765 /// Returns the number of interleaved accesses that will be generated when 766 /// lowering accesses of the given type. 767 unsigned getNumInterleavedAccesses(VectorType *VecTy, 768 const DataLayout &DL) const; 769 770 MachineMemOperand::Flags getTargetMMOFlags( 771 const Instruction &I) const override; 772 773 bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, 774 CallingConv::ID CallConv, 775 bool isVarArg) const override; 776 /// Used for exception handling on Win64. 777 bool needsFixedCatchObjects() const override; 778 779 bool fallBackToDAGISel(const Instruction &Inst) const override; 780 781 /// SVE code generation for fixed length vectors does not custom lower 782 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 783 /// merge. However, merging them creates a BUILD_VECTOR that is just as 784 /// illegal as the original, thus leading to an infinite legalisation loop. 785 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 786 /// vector types this override can be removed. 787 bool mergeStoresAfterLegalization(EVT VT) const override; 788 789 private: 790 /// Keep a pointer to the AArch64Subtarget around so that we can 791 /// make the right decision when generating code for different targets. 792 const AArch64Subtarget *Subtarget; 793 794 bool isExtFreeImpl(const Instruction *Ext) const override; 795 796 void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT); 797 void addTypeForFixedLengthSVE(MVT VT); 798 void addDRTypeForNEON(MVT VT); 799 void addQRTypeForNEON(MVT VT); 800 801 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 802 bool isVarArg, 803 const SmallVectorImpl<ISD::InputArg> &Ins, 804 const SDLoc &DL, SelectionDAG &DAG, 805 SmallVectorImpl<SDValue> &InVals) const override; 806 807 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 808 SmallVectorImpl<SDValue> &InVals) const override; 809 810 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 811 CallingConv::ID CallConv, bool isVarArg, 812 const SmallVectorImpl<ISD::InputArg> &Ins, 813 const SDLoc &DL, SelectionDAG &DAG, 814 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 815 SDValue ThisVal) const; 816 817 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 818 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 819 820 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 821 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 822 823 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 824 825 bool isEligibleForTailCallOptimization( 826 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 827 const SmallVectorImpl<ISD::OutputArg> &Outs, 828 const SmallVectorImpl<SDValue> &OutVals, 829 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 830 831 /// Finds the incoming stack arguments which overlap the given fixed stack 832 /// object and incorporates their load into the current chain. This prevents 833 /// an upcoming store from clobbering the stack argument before it's used. 834 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 835 MachineFrameInfo &MFI, int ClobberedFI) const; 836 837 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 838 839 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 840 SDValue &Chain) const; 841 842 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 843 bool isVarArg, 844 const SmallVectorImpl<ISD::OutputArg> &Outs, 845 LLVMContext &Context) const override; 846 847 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 848 const SmallVectorImpl<ISD::OutputArg> &Outs, 849 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 850 SelectionDAG &DAG) const override; 851 852 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 853 unsigned Flag) const; 854 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 855 unsigned Flag) const; 856 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 857 unsigned Flag) const; 858 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 859 unsigned Flag) const; 860 template <class NodeTy> 861 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 862 template <class NodeTy> 863 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 864 template <class NodeTy> 865 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 866 template <class NodeTy> 867 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 868 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 869 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 870 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 871 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 872 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 873 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 874 const SDLoc &DL, SelectionDAG &DAG) const; 875 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 876 SelectionDAG &DAG) const; 877 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 878 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 879 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 880 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 881 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 882 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 883 SDValue TVal, SDValue FVal, const SDLoc &dl, 884 SelectionDAG &DAG) const; 885 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 886 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 887 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 888 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 889 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 890 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 891 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 892 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 893 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 894 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 895 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 896 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 897 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 898 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 899 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 900 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 901 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 902 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 903 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 904 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 905 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 906 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, 907 bool OverrideNEON = false) const; 908 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 909 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 910 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 911 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 912 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 913 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 914 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 915 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 916 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 917 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 918 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 919 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 920 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 921 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 922 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 923 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 924 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 925 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 926 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 927 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 928 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 929 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 930 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 931 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 932 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 933 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 934 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 935 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 936 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 937 SDValue &Size, 938 SelectionDAG &DAG) const; 939 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 940 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 941 942 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 943 SelectionDAG &DAG) const; 944 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 945 SelectionDAG &DAG) const; 946 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 947 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 948 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 949 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 950 SelectionDAG &DAG) const; 951 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 952 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 953 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 955 SelectionDAG &DAG) const; 956 957 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 958 SmallVectorImpl<SDNode *> &Created) const override; 959 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 960 int &ExtraSteps, bool &UseOneConst, 961 bool Reciprocal) const override; 962 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 963 int &ExtraSteps) const override; 964 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 965 const DenormalMode &Mode) const override; 966 SDValue getSqrtResultForDenormInput(SDValue Operand, 967 SelectionDAG &DAG) const override; 968 unsigned combineRepeatedFPDivisors() const override; 969 970 ConstraintType getConstraintType(StringRef Constraint) const override; 971 Register getRegisterByName(const char* RegName, LLT VT, 972 const MachineFunction &MF) const override; 973 974 /// Examine constraint string and operand type and determine a weight value. 975 /// The operand object must already have been set up with the operand type. 976 ConstraintWeight 977 getSingleConstraintMatchWeight(AsmOperandInfo &info, 978 const char *constraint) const override; 979 980 std::pair<unsigned, const TargetRegisterClass *> 981 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 982 StringRef Constraint, MVT VT) const override; 983 984 const char *LowerXConstraint(EVT ConstraintVT) const override; 985 986 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 987 std::vector<SDValue> &Ops, 988 SelectionDAG &DAG) const override; 989 990 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 991 if (ConstraintCode == "Q") 992 return InlineAsm::Constraint_Q; 993 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 994 // followed by llvm_unreachable so we'll leave them unimplemented in 995 // the backend for now. 996 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 997 } 998 999 bool shouldRemoveExtendFromGSIndex(EVT VT) const override; 1000 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1001 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1002 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1003 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 1004 ISD::MemIndexedMode &AM, bool &IsInc, 1005 SelectionDAG &DAG) const; 1006 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1007 ISD::MemIndexedMode &AM, 1008 SelectionDAG &DAG) const override; 1009 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1010 SDValue &Offset, ISD::MemIndexedMode &AM, 1011 SelectionDAG &DAG) const override; 1012 1013 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1014 SelectionDAG &DAG) const override; 1015 void ReplaceExtractSubVectorResults(SDNode *N, 1016 SmallVectorImpl<SDValue> &Results, 1017 SelectionDAG &DAG) const; 1018 1019 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1020 1021 void finalizeLowering(MachineFunction &MF) const override; 1022 1023 bool shouldLocalize(const MachineInstr &MI, 1024 const TargetTransformInfo *TTI) const override; 1025 1026 // Normally SVE is only used for byte size vectors that do not fit within a 1027 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1028 // used for 64bit and 128bit vectors as well. 1029 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1030 1031 // With the exception of data-predicate transitions, no instructions are 1032 // required to cast between legal scalable vector types. However: 1033 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1034 // is not universally useable. 1035 // 2. Most unpacked integer types are not legal and thus integer extends 1036 // cannot be used to convert between unpacked and packed types. 1037 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1038 // to transition between unpacked and packed types of the same element type, 1039 // with BITCAST used otherwise. 1040 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1041 }; 1042 1043 namespace AArch64 { 1044 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1045 const TargetLibraryInfo *libInfo); 1046 } // end namespace AArch64 1047 1048 } // end namespace llvm 1049 1050 #endif 1051