1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/IR/Instruction.h" 24 25 namespace llvm { 26 27 namespace AArch64ISD { 28 29 // For predicated nodes where the result is a vector, the operation is 30 // controlled by a governing predicate and the inactive lanes are explicitly 31 // defined with a value, please stick the following naming convention: 32 // 33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 34 // to source operand OP<n>. 35 // 36 // _MERGE_ZERO The result value is a vector with inactive lanes 37 // actively zeroed. 38 // 39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 40 // to the last source operand which only purpose is being 41 // a passthru value. 42 // 43 // For other cases where no explicit action is needed to set the inactive lanes, 44 // or when the result is not a vector and it is needed or helpful to 45 // distinguish a node from similar unpredicated nodes, use: 46 // 47 // _PRED 48 // 49 enum NodeType : unsigned { 50 FIRST_NUMBER = ISD::BUILTIN_OP_END, 51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 52 CALL, // Function call. 53 54 // Pseudo for a OBJC call that gets emitted together with a special `mov 55 // x29, x29` marker instruction. 56 CALL_RVMARKER, 57 58 // Produces the full sequence of instructions for getting the thread pointer 59 // offset of a variable into X0, using the TLSDesc model. 60 TLSDESC_CALLSEQ, 61 ADRP, // Page address of a TargetGlobalAddress operand. 62 ADR, // ADR 63 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 64 LOADgot, // Load from automatically generated descriptor (e.g. Global 65 // Offset Table, TLS record). 66 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 67 BRCOND, // Conditional branch instruction; "b.cond". 68 CSEL, 69 CSINV, // Conditional select invert. 70 CSNEG, // Conditional select negate. 71 CSINC, // Conditional select increment. 72 73 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 74 // ELF. 75 THREAD_POINTER, 76 ADC, 77 SBC, // adc, sbc instructions 78 79 // Predicated instructions where inactive lanes produce undefined results. 80 ADD_PRED, 81 FADD_PRED, 82 FDIV_PRED, 83 FMA_PRED, 84 FMAXNM_PRED, 85 FMINNM_PRED, 86 FMAX_PRED, 87 FMIN_PRED, 88 FMUL_PRED, 89 FSUB_PRED, 90 MUL_PRED, 91 MULHS_PRED, 92 MULHU_PRED, 93 SDIV_PRED, 94 SHL_PRED, 95 SMAX_PRED, 96 SMIN_PRED, 97 SRA_PRED, 98 SRL_PRED, 99 SUB_PRED, 100 UDIV_PRED, 101 UMAX_PRED, 102 UMIN_PRED, 103 104 // Unpredicated vector instructions 105 BIC, 106 107 // Predicated instructions with the result of inactive lanes provided by the 108 // last operand. 109 FABS_MERGE_PASSTHRU, 110 FCEIL_MERGE_PASSTHRU, 111 FFLOOR_MERGE_PASSTHRU, 112 FNEARBYINT_MERGE_PASSTHRU, 113 FNEG_MERGE_PASSTHRU, 114 FRECPX_MERGE_PASSTHRU, 115 FRINT_MERGE_PASSTHRU, 116 FROUND_MERGE_PASSTHRU, 117 FROUNDEVEN_MERGE_PASSTHRU, 118 FSQRT_MERGE_PASSTHRU, 119 FTRUNC_MERGE_PASSTHRU, 120 FP_ROUND_MERGE_PASSTHRU, 121 FP_EXTEND_MERGE_PASSTHRU, 122 UINT_TO_FP_MERGE_PASSTHRU, 123 SINT_TO_FP_MERGE_PASSTHRU, 124 FCVTZU_MERGE_PASSTHRU, 125 FCVTZS_MERGE_PASSTHRU, 126 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 127 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 128 ABS_MERGE_PASSTHRU, 129 NEG_MERGE_PASSTHRU, 130 131 SETCC_MERGE_ZERO, 132 133 // Arithmetic instructions which write flags. 134 ADDS, 135 SUBS, 136 ADCS, 137 SBCS, 138 ANDS, 139 140 // Conditional compares. Operands: left,right,falsecc,cc,flags 141 CCMP, 142 CCMN, 143 FCCMP, 144 145 // Floating point comparison 146 FCMP, 147 148 // Scalar extract 149 EXTR, 150 151 // Scalar-to-vector duplication 152 DUP, 153 DUPLANE8, 154 DUPLANE16, 155 DUPLANE32, 156 DUPLANE64, 157 158 // Vector immedate moves 159 MOVI, 160 MOVIshift, 161 MOVIedit, 162 MOVImsl, 163 FMOV, 164 MVNIshift, 165 MVNImsl, 166 167 // Vector immediate ops 168 BICi, 169 ORRi, 170 171 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 172 // element must be identical. 173 BSP, 174 175 // Vector shuffles 176 ZIP1, 177 ZIP2, 178 UZP1, 179 UZP2, 180 TRN1, 181 TRN2, 182 REV16, 183 REV32, 184 REV64, 185 EXT, 186 SPLICE, 187 188 // Vector shift by scalar 189 VSHL, 190 VLSHR, 191 VASHR, 192 193 // Vector shift by scalar (again) 194 SQSHL_I, 195 UQSHL_I, 196 SQSHLU_I, 197 SRSHR_I, 198 URSHR_I, 199 200 // Vector shift by constant and insert 201 VSLI, 202 VSRI, 203 204 // Vector comparisons 205 CMEQ, 206 CMGE, 207 CMGT, 208 CMHI, 209 CMHS, 210 FCMEQ, 211 FCMGE, 212 FCMGT, 213 214 // Vector zero comparisons 215 CMEQz, 216 CMGEz, 217 CMGTz, 218 CMLEz, 219 CMLTz, 220 FCMEQz, 221 FCMGEz, 222 FCMGTz, 223 FCMLEz, 224 FCMLTz, 225 226 // Vector across-lanes addition 227 // Only the lower result lane is defined. 228 SADDV, 229 UADDV, 230 231 // Vector halving addition 232 SHADD, 233 UHADD, 234 235 // Vector rounding halving addition 236 SRHADD, 237 URHADD, 238 239 // Unsigned Add Long Pairwise 240 UADDLP, 241 242 // udot/sdot instructions 243 UDOT, 244 SDOT, 245 246 // Vector across-lanes min/max 247 // Only the lower result lane is defined. 248 SMINV, 249 UMINV, 250 SMAXV, 251 UMAXV, 252 253 SADDV_PRED, 254 UADDV_PRED, 255 SMAXV_PRED, 256 UMAXV_PRED, 257 SMINV_PRED, 258 UMINV_PRED, 259 ORV_PRED, 260 EORV_PRED, 261 ANDV_PRED, 262 263 // Vector bitwise insertion 264 BIT, 265 266 // Compare-and-branch 267 CBZ, 268 CBNZ, 269 TBZ, 270 TBNZ, 271 272 // Tail calls 273 TC_RETURN, 274 275 // Custom prefetch handling 276 PREFETCH, 277 278 // {s|u}int to FP within a FP register. 279 SITOF, 280 UITOF, 281 282 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 283 /// world w.r.t vectors; which causes additional REV instructions to be 284 /// generated to compensate for the byte-swapping. But sometimes we do 285 /// need to re-interpret the data in SIMD vector registers in big-endian 286 /// mode without emitting such REV instructions. 287 NVCAST, 288 289 MRS, // MRS, also sets the flags via a glue. 290 291 SMULL, 292 UMULL, 293 294 // Reciprocal estimates and steps. 295 FRECPE, 296 FRECPS, 297 FRSQRTE, 298 FRSQRTS, 299 300 SUNPKHI, 301 SUNPKLO, 302 UUNPKHI, 303 UUNPKLO, 304 305 CLASTA_N, 306 CLASTB_N, 307 LASTA, 308 LASTB, 309 TBL, 310 311 // Floating-point reductions. 312 FADDA_PRED, 313 FADDV_PRED, 314 FMAXV_PRED, 315 FMAXNMV_PRED, 316 FMINV_PRED, 317 FMINNMV_PRED, 318 319 INSR, 320 PTEST, 321 PTRUE, 322 323 BITREVERSE_MERGE_PASSTHRU, 324 BSWAP_MERGE_PASSTHRU, 325 CTLZ_MERGE_PASSTHRU, 326 CTPOP_MERGE_PASSTHRU, 327 DUP_MERGE_PASSTHRU, 328 INDEX_VECTOR, 329 330 // Cast between vectors of the same element type but differ in length. 331 REINTERPRET_CAST, 332 333 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 334 LS64_BUILD, 335 LS64_EXTRACT, 336 337 LD1_MERGE_ZERO, 338 LD1S_MERGE_ZERO, 339 LDNF1_MERGE_ZERO, 340 LDNF1S_MERGE_ZERO, 341 LDFF1_MERGE_ZERO, 342 LDFF1S_MERGE_ZERO, 343 LD1RQ_MERGE_ZERO, 344 LD1RO_MERGE_ZERO, 345 346 // Structured loads. 347 SVE_LD2_MERGE_ZERO, 348 SVE_LD3_MERGE_ZERO, 349 SVE_LD4_MERGE_ZERO, 350 351 // Unsigned gather loads. 352 GLD1_MERGE_ZERO, 353 GLD1_SCALED_MERGE_ZERO, 354 GLD1_UXTW_MERGE_ZERO, 355 GLD1_SXTW_MERGE_ZERO, 356 GLD1_UXTW_SCALED_MERGE_ZERO, 357 GLD1_SXTW_SCALED_MERGE_ZERO, 358 GLD1_IMM_MERGE_ZERO, 359 360 // Signed gather loads 361 GLD1S_MERGE_ZERO, 362 GLD1S_SCALED_MERGE_ZERO, 363 GLD1S_UXTW_MERGE_ZERO, 364 GLD1S_SXTW_MERGE_ZERO, 365 GLD1S_UXTW_SCALED_MERGE_ZERO, 366 GLD1S_SXTW_SCALED_MERGE_ZERO, 367 GLD1S_IMM_MERGE_ZERO, 368 369 // Unsigned gather loads. 370 GLDFF1_MERGE_ZERO, 371 GLDFF1_SCALED_MERGE_ZERO, 372 GLDFF1_UXTW_MERGE_ZERO, 373 GLDFF1_SXTW_MERGE_ZERO, 374 GLDFF1_UXTW_SCALED_MERGE_ZERO, 375 GLDFF1_SXTW_SCALED_MERGE_ZERO, 376 GLDFF1_IMM_MERGE_ZERO, 377 378 // Signed gather loads. 379 GLDFF1S_MERGE_ZERO, 380 GLDFF1S_SCALED_MERGE_ZERO, 381 GLDFF1S_UXTW_MERGE_ZERO, 382 GLDFF1S_SXTW_MERGE_ZERO, 383 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 384 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 385 GLDFF1S_IMM_MERGE_ZERO, 386 387 // Non-temporal gather loads 388 GLDNT1_MERGE_ZERO, 389 GLDNT1_INDEX_MERGE_ZERO, 390 GLDNT1S_MERGE_ZERO, 391 392 // Contiguous masked store. 393 ST1_PRED, 394 395 // Scatter store 396 SST1_PRED, 397 SST1_SCALED_PRED, 398 SST1_UXTW_PRED, 399 SST1_SXTW_PRED, 400 SST1_UXTW_SCALED_PRED, 401 SST1_SXTW_SCALED_PRED, 402 SST1_IMM_PRED, 403 404 // Non-temporal scatter store 405 SSTNT1_PRED, 406 SSTNT1_INDEX_PRED, 407 408 // Strict (exception-raising) floating point comparison 409 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 410 STRICT_FCMPE, 411 412 // NEON Load/Store with post-increment base updates 413 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 414 LD3post, 415 LD4post, 416 ST2post, 417 ST3post, 418 ST4post, 419 LD1x2post, 420 LD1x3post, 421 LD1x4post, 422 ST1x2post, 423 ST1x3post, 424 ST1x4post, 425 LD1DUPpost, 426 LD2DUPpost, 427 LD3DUPpost, 428 LD4DUPpost, 429 LD1LANEpost, 430 LD2LANEpost, 431 LD3LANEpost, 432 LD4LANEpost, 433 ST2LANEpost, 434 ST3LANEpost, 435 ST4LANEpost, 436 437 STG, 438 STZG, 439 ST2G, 440 STZ2G, 441 442 LDP, 443 STP, 444 STNP, 445 }; 446 447 } // end namespace AArch64ISD 448 449 namespace { 450 451 // Any instruction that defines a 32-bit result zeros out the high half of the 452 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 453 // be copying from a truncate. But any other 32-bit operation will zero-extend 454 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper 455 // 32 bits, they're probably just qualifying a CopyFromReg. 456 static inline bool isDef32(const SDNode &N) { 457 unsigned Opc = N.getOpcode(); 458 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && 459 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && 460 Opc != ISD::AssertZext && Opc != ISD::AssertAlign && 461 Opc != ISD::FREEZE; 462 } 463 464 } // end anonymous namespace 465 466 namespace AArch64 { 467 /// Possible values of current rounding mode, which is specified in bits 468 /// 23:22 of FPCR. 469 enum Rounding { 470 RN = 0, // Round to Nearest 471 RP = 1, // Round towards Plus infinity 472 RM = 2, // Round towards Minus infinity 473 RZ = 3, // Round towards Zero 474 rmMask = 3 // Bit mask selecting rounding mode 475 }; 476 477 // Bit position of rounding mode bits in FPCR. 478 const unsigned RoundingBitsPos = 22; 479 } // namespace AArch64 480 481 class AArch64Subtarget; 482 class AArch64TargetMachine; 483 484 class AArch64TargetLowering : public TargetLowering { 485 public: 486 explicit AArch64TargetLowering(const TargetMachine &TM, 487 const AArch64Subtarget &STI); 488 489 /// Selects the correct CCAssignFn for a given CallingConvention value. 490 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 491 492 /// Selects the correct CCAssignFn for a given CallingConvention value. 493 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 494 495 /// Determine which of the bits specified in Mask are known to be either zero 496 /// or one and return them in the KnownZero/KnownOne bitsets. 497 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 498 const APInt &DemandedElts, 499 const SelectionDAG &DAG, 500 unsigned Depth = 0) const override; 501 502 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 503 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 504 // *DAG* representation of pointers will always be 64-bits. They will be 505 // truncated and extended when transferred to memory, but the 64-bit DAG 506 // allows us to use AArch64's addressing modes much more easily. 507 return MVT::getIntegerVT(64); 508 } 509 510 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 511 const APInt &DemandedElts, 512 TargetLoweringOpt &TLO) const override; 513 514 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 515 516 /// Returns true if the target allows unaligned memory accesses of the 517 /// specified type. 518 bool allowsMisalignedMemoryAccesses( 519 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 520 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 521 bool *Fast = nullptr) const override; 522 /// LLT variant. 523 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 524 Align Alignment, 525 MachineMemOperand::Flags Flags, 526 bool *Fast = nullptr) const override; 527 528 /// Provide custom lowering hooks for some operations. 529 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 530 531 const char *getTargetNodeName(unsigned Opcode) const override; 532 533 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 534 535 /// This method returns a target specific FastISel object, or null if the 536 /// target does not support "fast" ISel. 537 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 538 const TargetLibraryInfo *libInfo) const override; 539 540 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 541 542 bool isFPImmLegal(const APFloat &Imm, EVT VT, 543 bool ForCodeSize) const override; 544 545 /// Return true if the given shuffle mask can be codegen'd directly, or if it 546 /// should be stack expanded. 547 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 548 549 /// Return the ISD::SETCC ValueType. 550 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 551 EVT VT) const override; 552 553 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 554 555 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 556 MachineBasicBlock *BB) const; 557 558 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 559 MachineBasicBlock *BB) const; 560 561 MachineBasicBlock * 562 EmitInstrWithCustomInserter(MachineInstr &MI, 563 MachineBasicBlock *MBB) const override; 564 565 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 566 MachineFunction &MF, 567 unsigned Intrinsic) const override; 568 569 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 570 EVT NewVT) const override; 571 572 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 573 bool isTruncateFree(EVT VT1, EVT VT2) const override; 574 575 bool isProfitableToHoist(Instruction *I) const override; 576 577 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 578 bool isZExtFree(EVT VT1, EVT VT2) const override; 579 bool isZExtFree(SDValue Val, EVT VT2) const override; 580 581 bool shouldSinkOperands(Instruction *I, 582 SmallVectorImpl<Use *> &Ops) const override; 583 584 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 585 586 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 587 588 bool lowerInterleavedLoad(LoadInst *LI, 589 ArrayRef<ShuffleVectorInst *> Shuffles, 590 ArrayRef<unsigned> Indices, 591 unsigned Factor) const override; 592 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 593 unsigned Factor) const override; 594 595 bool isLegalAddImmediate(int64_t) const override; 596 bool isLegalICmpImmediate(int64_t) const override; 597 598 bool shouldConsiderGEPOffsetSplit() const override; 599 600 EVT getOptimalMemOpType(const MemOp &Op, 601 const AttributeList &FuncAttributes) const override; 602 603 LLT getOptimalMemOpLLT(const MemOp &Op, 604 const AttributeList &FuncAttributes) const override; 605 606 /// Return true if the addressing mode represented by AM is legal for this 607 /// target, for a load/store of the specified type. 608 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 609 unsigned AS, 610 Instruction *I = nullptr) const override; 611 612 /// Return the cost of the scaling factor used in the addressing 613 /// mode represented by AM for this target, for a load/store 614 /// of the specified type. 615 /// If the AM is supported, the return value must be >= 0. 616 /// If the AM is not supported, it returns a negative value. 617 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, 618 Type *Ty, unsigned AS) const override; 619 620 /// Return true if an FMA operation is faster than a pair of fmul and fadd 621 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 622 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 623 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 624 EVT VT) const override; 625 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 626 627 bool generateFMAsInMachineCombiner(EVT VT, 628 CodeGenOpt::Level OptLevel) const override; 629 630 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 631 632 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 633 bool isDesirableToCommuteWithShift(const SDNode *N, 634 CombineLevel Level) const override; 635 636 /// Returns true if it is beneficial to convert a load of a constant 637 /// to just the constant itself. 638 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 639 Type *Ty) const override; 640 641 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 642 /// with this index. 643 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 644 unsigned Index) const override; 645 646 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 647 bool MathUsed) const override { 648 // Using overflow ops for overflow checks only should beneficial on 649 // AArch64. 650 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 651 } 652 653 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 654 AtomicOrdering Ord) const override; 655 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 656 AtomicOrdering Ord) const override; 657 658 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 659 660 TargetLoweringBase::AtomicExpansionKind 661 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 662 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 663 TargetLoweringBase::AtomicExpansionKind 664 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 665 666 TargetLoweringBase::AtomicExpansionKind 667 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 668 669 bool useLoadStackGuardNode() const override; 670 TargetLoweringBase::LegalizeTypeAction 671 getPreferredVectorAction(MVT VT) const override; 672 673 /// If the target has a standard location for the stack protector cookie, 674 /// returns the address of that location. Otherwise, returns nullptr. 675 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 676 677 void insertSSPDeclarations(Module &M) const override; 678 Value *getSDagStackGuard(const Module &M) const override; 679 Function *getSSPStackGuardCheck(const Module &M) const override; 680 681 /// If the target has a standard location for the unsafe stack pointer, 682 /// returns the address of that location. Otherwise, returns nullptr. 683 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 684 685 /// If a physical register, this returns the register that receives the 686 /// exception address on entry to an EH pad. 687 Register 688 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 689 // FIXME: This is a guess. Has this been defined yet? 690 return AArch64::X0; 691 } 692 693 /// If a physical register, this returns the register that receives the 694 /// exception typeid on entry to a landing pad. 695 Register 696 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 697 // FIXME: This is a guess. Has this been defined yet? 698 return AArch64::X1; 699 } 700 701 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 702 703 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 704 const SelectionDAG &DAG) const override { 705 // Do not merge to float value size (128 bytes) if no implicit 706 // float attribute is set. 707 708 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( 709 Attribute::NoImplicitFloat); 710 711 if (NoFloat) 712 return (MemVT.getSizeInBits() <= 64); 713 return true; 714 } 715 716 bool isCheapToSpeculateCttz() const override { 717 return true; 718 } 719 720 bool isCheapToSpeculateCtlz() const override { 721 return true; 722 } 723 724 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 725 726 bool hasAndNotCompare(SDValue V) const override { 727 // We can use bics for any scalar. 728 return V.getValueType().isScalarInteger(); 729 } 730 731 bool hasAndNot(SDValue Y) const override { 732 EVT VT = Y.getValueType(); 733 734 if (!VT.isVector()) 735 return hasAndNotCompare(Y); 736 737 return VT.getSizeInBits() >= 64; // vector 'bic' 738 } 739 740 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 741 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 742 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 743 SelectionDAG &DAG) const override; 744 745 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 746 747 bool shouldTransformSignedTruncationCheck(EVT XVT, 748 unsigned KeptBits) const override { 749 // For vectors, we don't have a preference.. 750 if (XVT.isVector()) 751 return false; 752 753 auto VTIsOk = [](EVT VT) -> bool { 754 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 755 VT == MVT::i64; 756 }; 757 758 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 759 // XVT will be larger than KeptBitsVT. 760 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 761 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 762 } 763 764 bool preferIncOfAddToSubOfNot(EVT VT) const override; 765 766 bool hasBitPreservingFPLogic(EVT VT) const override { 767 // FIXME: Is this always true? It should be true for vectors at least. 768 return VT == MVT::f32 || VT == MVT::f64; 769 } 770 771 bool supportSplitCSR(MachineFunction *MF) const override { 772 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 773 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 774 } 775 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 776 void insertCopiesSplitCSR( 777 MachineBasicBlock *Entry, 778 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 779 780 bool supportSwiftError() const override { 781 return true; 782 } 783 784 /// Enable aggressive FMA fusion on targets that want it. 785 bool enableAggressiveFMAFusion(EVT VT) const override; 786 787 /// Returns the size of the platform's va_list object. 788 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 789 790 /// Returns true if \p VecTy is a legal interleaved access type. This 791 /// function checks the vector element type and the overall width of the 792 /// vector. 793 bool isLegalInterleavedAccessType(VectorType *VecTy, 794 const DataLayout &DL) const; 795 796 /// Returns the number of interleaved accesses that will be generated when 797 /// lowering accesses of the given type. 798 unsigned getNumInterleavedAccesses(VectorType *VecTy, 799 const DataLayout &DL) const; 800 801 MachineMemOperand::Flags getTargetMMOFlags( 802 const Instruction &I) const override; 803 804 bool functionArgumentNeedsConsecutiveRegisters( 805 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 806 const DataLayout &DL) const override; 807 808 /// Used for exception handling on Win64. 809 bool needsFixedCatchObjects() const override; 810 811 bool fallBackToDAGISel(const Instruction &Inst) const override; 812 813 /// SVE code generation for fixed length vectors does not custom lower 814 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 815 /// merge. However, merging them creates a BUILD_VECTOR that is just as 816 /// illegal as the original, thus leading to an infinite legalisation loop. 817 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 818 /// vector types this override can be removed. 819 bool mergeStoresAfterLegalization(EVT VT) const override; 820 821 // If the platform/function should have a redzone, return the size in bytes. 822 unsigned getRedZoneSize(const Function &F) const { 823 if (F.hasFnAttribute(Attribute::NoRedZone)) 824 return 0; 825 return 128; 826 } 827 828 bool isAllActivePredicate(SDValue N) const; 829 EVT getPromotedVTForPredicate(EVT VT) const; 830 831 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 832 bool AllowUnknown = false) const override; 833 834 private: 835 /// Keep a pointer to the AArch64Subtarget around so that we can 836 /// make the right decision when generating code for different targets. 837 const AArch64Subtarget *Subtarget; 838 839 bool isExtFreeImpl(const Instruction *Ext) const override; 840 841 void addTypeForNEON(MVT VT); 842 void addTypeForFixedLengthSVE(MVT VT); 843 void addDRTypeForNEON(MVT VT); 844 void addQRTypeForNEON(MVT VT); 845 846 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 847 bool isVarArg, 848 const SmallVectorImpl<ISD::InputArg> &Ins, 849 const SDLoc &DL, SelectionDAG &DAG, 850 SmallVectorImpl<SDValue> &InVals) const override; 851 852 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 853 SmallVectorImpl<SDValue> &InVals) const override; 854 855 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 856 CallingConv::ID CallConv, bool isVarArg, 857 const SmallVectorImpl<ISD::InputArg> &Ins, 858 const SDLoc &DL, SelectionDAG &DAG, 859 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 860 SDValue ThisVal) const; 861 862 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 863 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 864 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 865 866 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 867 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 868 869 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 870 871 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 872 873 bool isEligibleForTailCallOptimization( 874 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, 875 const SmallVectorImpl<ISD::OutputArg> &Outs, 876 const SmallVectorImpl<SDValue> &OutVals, 877 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; 878 879 /// Finds the incoming stack arguments which overlap the given fixed stack 880 /// object and incorporates their load into the current chain. This prevents 881 /// an upcoming store from clobbering the stack argument before it's used. 882 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 883 MachineFrameInfo &MFI, int ClobberedFI) const; 884 885 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 886 887 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 888 SDValue &Chain) const; 889 890 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 891 bool isVarArg, 892 const SmallVectorImpl<ISD::OutputArg> &Outs, 893 LLVMContext &Context) const override; 894 895 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 896 const SmallVectorImpl<ISD::OutputArg> &Outs, 897 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 898 SelectionDAG &DAG) const override; 899 900 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 901 unsigned Flag) const; 902 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 903 unsigned Flag) const; 904 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 905 unsigned Flag) const; 906 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 907 unsigned Flag) const; 908 template <class NodeTy> 909 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 910 template <class NodeTy> 911 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 912 template <class NodeTy> 913 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 914 template <class NodeTy> 915 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 916 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 917 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 918 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 919 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 920 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 921 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 922 const SDLoc &DL, SelectionDAG &DAG) const; 923 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 924 SelectionDAG &DAG) const; 925 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 926 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 927 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 928 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 929 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 930 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 931 SDValue TVal, SDValue FVal, const SDLoc &dl, 932 SelectionDAG &DAG) const; 933 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 934 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 935 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 936 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 937 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 938 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 939 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 940 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 941 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 942 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 943 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 944 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 945 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 946 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 947 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 948 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 949 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 950 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 951 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 952 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 953 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 955 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp, 956 bool OverrideNEON = false) const; 957 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 958 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 959 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 960 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 961 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 962 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 963 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 964 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 965 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 966 SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; 967 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 968 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 969 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 970 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 971 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 972 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 973 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 974 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 975 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 976 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 977 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 978 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 979 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 980 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 981 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 982 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 983 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 984 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 985 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 986 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 987 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 988 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 989 SDValue &Size, 990 SelectionDAG &DAG) const; 991 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 992 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 993 994 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 995 SelectionDAG &DAG) const; 996 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 997 SelectionDAG &DAG) const; 998 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 999 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1000 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1001 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1002 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1003 SelectionDAG &DAG) const; 1004 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1005 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1008 SelectionDAG &DAG) const; 1009 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1010 SelectionDAG &DAG) const; 1011 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1012 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1013 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1015 SelectionDAG &DAG) const; 1016 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1018 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1019 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1020 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1021 SelectionDAG &DAG) const; 1022 1023 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1024 SmallVectorImpl<SDNode *> &Created) const override; 1025 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1026 int &ExtraSteps, bool &UseOneConst, 1027 bool Reciprocal) const override; 1028 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1029 int &ExtraSteps) const override; 1030 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1031 const DenormalMode &Mode) const override; 1032 SDValue getSqrtResultForDenormInput(SDValue Operand, 1033 SelectionDAG &DAG) const override; 1034 unsigned combineRepeatedFPDivisors() const override; 1035 1036 ConstraintType getConstraintType(StringRef Constraint) const override; 1037 Register getRegisterByName(const char* RegName, LLT VT, 1038 const MachineFunction &MF) const override; 1039 1040 /// Examine constraint string and operand type and determine a weight value. 1041 /// The operand object must already have been set up with the operand type. 1042 ConstraintWeight 1043 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1044 const char *constraint) const override; 1045 1046 std::pair<unsigned, const TargetRegisterClass *> 1047 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1048 StringRef Constraint, MVT VT) const override; 1049 1050 const char *LowerXConstraint(EVT ConstraintVT) const override; 1051 1052 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1053 std::vector<SDValue> &Ops, 1054 SelectionDAG &DAG) const override; 1055 1056 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1057 if (ConstraintCode == "Q") 1058 return InlineAsm::Constraint_Q; 1059 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1060 // followed by llvm_unreachable so we'll leave them unimplemented in 1061 // the backend for now. 1062 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1063 } 1064 1065 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1066 bool shouldRemoveExtendFromGSIndex(EVT VT) const override; 1067 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1068 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1069 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1070 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 1071 ISD::MemIndexedMode &AM, bool &IsInc, 1072 SelectionDAG &DAG) const; 1073 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1074 ISD::MemIndexedMode &AM, 1075 SelectionDAG &DAG) const override; 1076 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1077 SDValue &Offset, ISD::MemIndexedMode &AM, 1078 SelectionDAG &DAG) const override; 1079 1080 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1081 SelectionDAG &DAG) const override; 1082 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1083 SelectionDAG &DAG) const; 1084 void ReplaceExtractSubVectorResults(SDNode *N, 1085 SmallVectorImpl<SDValue> &Results, 1086 SelectionDAG &DAG) const; 1087 1088 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1089 1090 void finalizeLowering(MachineFunction &MF) const override; 1091 1092 bool shouldLocalize(const MachineInstr &MI, 1093 const TargetTransformInfo *TTI) const override; 1094 1095 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1096 const APInt &OriginalDemandedBits, 1097 const APInt &OriginalDemandedElts, 1098 KnownBits &Known, 1099 TargetLoweringOpt &TLO, 1100 unsigned Depth) const override; 1101 1102 // Normally SVE is only used for byte size vectors that do not fit within a 1103 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1104 // used for 64bit and 128bit vectors as well. 1105 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1106 1107 // With the exception of data-predicate transitions, no instructions are 1108 // required to cast between legal scalable vector types. However: 1109 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1110 // is not universally useable. 1111 // 2. Most unpacked integer types are not legal and thus integer extends 1112 // cannot be used to convert between unpacked and packed types. 1113 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1114 // to transition between unpacked and packed types of the same element type, 1115 // with BITCAST used otherwise. 1116 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1117 1118 bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1, 1119 LLT Ty2) const override; 1120 }; 1121 1122 namespace AArch64 { 1123 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1124 const TargetLibraryInfo *libInfo); 1125 } // end namespace AArch64 1126 1127 } // end namespace llvm 1128 1129 #endif 1130