1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "Utils/AArch64SMEAttributes.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/SelectionDAG.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/IR/CallingConv.h" 24 #include "llvm/IR/Instruction.h" 25 26 namespace llvm { 27 28 namespace AArch64ISD { 29 30 // For predicated nodes where the result is a vector, the operation is 31 // controlled by a governing predicate and the inactive lanes are explicitly 32 // defined with a value, please stick the following naming convention: 33 // 34 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 35 // to source operand OP<n>. 36 // 37 // _MERGE_ZERO The result value is a vector with inactive lanes 38 // actively zeroed. 39 // 40 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 41 // to the last source operand which only purpose is being 42 // a passthru value. 43 // 44 // For other cases where no explicit action is needed to set the inactive lanes, 45 // or when the result is not a vector and it is needed or helpful to 46 // distinguish a node from similar unpredicated nodes, use: 47 // 48 // _PRED 49 // 50 enum NodeType : unsigned { 51 FIRST_NUMBER = ISD::BUILTIN_OP_END, 52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 53 CALL, // Function call. 54 55 // Pseudo for a OBJC call that gets emitted together with a special `mov 56 // x29, x29` marker instruction. 57 CALL_RVMARKER, 58 59 CALL_BTI, // Function call followed by a BTI instruction. 60 61 // Essentially like a normal COPY that works on GPRs, but cannot be 62 // rematerialised by passes like the simple register coalescer. It's 63 // required for SME when lowering calls because we cannot allow frame 64 // index calculations using addvl to slip in between the smstart/smstop 65 // and the bl instruction. The scalable vector length may change across 66 // the smstart/smstop boundary. 67 OBSCURE_COPY, 68 SMSTART, 69 SMSTOP, 70 RESTORE_ZA, 71 72 // Produces the full sequence of instructions for getting the thread pointer 73 // offset of a variable into X0, using the TLSDesc model. 74 TLSDESC_CALLSEQ, 75 ADRP, // Page address of a TargetGlobalAddress operand. 76 ADR, // ADR 77 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 78 LOADgot, // Load from automatically generated descriptor (e.g. Global 79 // Offset Table, TLS record). 80 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 81 BRCOND, // Conditional branch instruction; "b.cond". 82 CSEL, 83 CSINV, // Conditional select invert. 84 CSNEG, // Conditional select negate. 85 CSINC, // Conditional select increment. 86 87 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 88 // ELF. 89 THREAD_POINTER, 90 ADC, 91 SBC, // adc, sbc instructions 92 93 // Predicated instructions where inactive lanes produce undefined results. 94 ABDS_PRED, 95 ABDU_PRED, 96 FADD_PRED, 97 FDIV_PRED, 98 FMA_PRED, 99 FMAX_PRED, 100 FMAXNM_PRED, 101 FMIN_PRED, 102 FMINNM_PRED, 103 FMUL_PRED, 104 FSUB_PRED, 105 HADDS_PRED, 106 HADDU_PRED, 107 MUL_PRED, 108 MULHS_PRED, 109 MULHU_PRED, 110 RHADDS_PRED, 111 RHADDU_PRED, 112 SDIV_PRED, 113 SHL_PRED, 114 SMAX_PRED, 115 SMIN_PRED, 116 SRA_PRED, 117 SRL_PRED, 118 UDIV_PRED, 119 UMAX_PRED, 120 UMIN_PRED, 121 122 // Unpredicated vector instructions 123 BIC, 124 125 SRAD_MERGE_OP1, 126 127 // Predicated instructions with the result of inactive lanes provided by the 128 // last operand. 129 FABS_MERGE_PASSTHRU, 130 FCEIL_MERGE_PASSTHRU, 131 FFLOOR_MERGE_PASSTHRU, 132 FNEARBYINT_MERGE_PASSTHRU, 133 FNEG_MERGE_PASSTHRU, 134 FRECPX_MERGE_PASSTHRU, 135 FRINT_MERGE_PASSTHRU, 136 FROUND_MERGE_PASSTHRU, 137 FROUNDEVEN_MERGE_PASSTHRU, 138 FSQRT_MERGE_PASSTHRU, 139 FTRUNC_MERGE_PASSTHRU, 140 FP_ROUND_MERGE_PASSTHRU, 141 FP_EXTEND_MERGE_PASSTHRU, 142 UINT_TO_FP_MERGE_PASSTHRU, 143 SINT_TO_FP_MERGE_PASSTHRU, 144 FCVTZU_MERGE_PASSTHRU, 145 FCVTZS_MERGE_PASSTHRU, 146 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 147 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 148 ABS_MERGE_PASSTHRU, 149 NEG_MERGE_PASSTHRU, 150 151 SETCC_MERGE_ZERO, 152 153 // Arithmetic instructions which write flags. 154 ADDS, 155 SUBS, 156 ADCS, 157 SBCS, 158 ANDS, 159 160 // Conditional compares. Operands: left,right,falsecc,cc,flags 161 CCMP, 162 CCMN, 163 FCCMP, 164 165 // Floating point comparison 166 FCMP, 167 168 // Scalar extract 169 EXTR, 170 171 // Scalar-to-vector duplication 172 DUP, 173 DUPLANE8, 174 DUPLANE16, 175 DUPLANE32, 176 DUPLANE64, 177 DUPLANE128, 178 179 // Vector immedate moves 180 MOVI, 181 MOVIshift, 182 MOVIedit, 183 MOVImsl, 184 FMOV, 185 MVNIshift, 186 MVNImsl, 187 188 // Vector immediate ops 189 BICi, 190 ORRi, 191 192 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 193 // element must be identical. 194 BSP, 195 196 // Vector shuffles 197 ZIP1, 198 ZIP2, 199 UZP1, 200 UZP2, 201 TRN1, 202 TRN2, 203 REV16, 204 REV32, 205 REV64, 206 EXT, 207 SPLICE, 208 209 // Vector shift by scalar 210 VSHL, 211 VLSHR, 212 VASHR, 213 214 // Vector shift by scalar (again) 215 SQSHL_I, 216 UQSHL_I, 217 SQSHLU_I, 218 SRSHR_I, 219 URSHR_I, 220 221 // Vector shift by constant and insert 222 VSLI, 223 VSRI, 224 225 // Vector comparisons 226 CMEQ, 227 CMGE, 228 CMGT, 229 CMHI, 230 CMHS, 231 FCMEQ, 232 FCMGE, 233 FCMGT, 234 235 // Vector zero comparisons 236 CMEQz, 237 CMGEz, 238 CMGTz, 239 CMLEz, 240 CMLTz, 241 FCMEQz, 242 FCMGEz, 243 FCMGTz, 244 FCMLEz, 245 FCMLTz, 246 247 // Vector across-lanes addition 248 // Only the lower result lane is defined. 249 SADDV, 250 UADDV, 251 252 // Add Pairwise of two vectors 253 ADDP, 254 // Add Long Pairwise 255 SADDLP, 256 UADDLP, 257 258 // udot/sdot instructions 259 UDOT, 260 SDOT, 261 262 // Vector across-lanes min/max 263 // Only the lower result lane is defined. 264 SMINV, 265 UMINV, 266 SMAXV, 267 UMAXV, 268 269 SADDV_PRED, 270 UADDV_PRED, 271 SMAXV_PRED, 272 UMAXV_PRED, 273 SMINV_PRED, 274 UMINV_PRED, 275 ORV_PRED, 276 EORV_PRED, 277 ANDV_PRED, 278 279 // Vector bitwise insertion 280 BIT, 281 282 // Compare-and-branch 283 CBZ, 284 CBNZ, 285 TBZ, 286 TBNZ, 287 288 // Tail calls 289 TC_RETURN, 290 291 // Custom prefetch handling 292 PREFETCH, 293 294 // {s|u}int to FP within a FP register. 295 SITOF, 296 UITOF, 297 298 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 299 /// world w.r.t vectors; which causes additional REV instructions to be 300 /// generated to compensate for the byte-swapping. But sometimes we do 301 /// need to re-interpret the data in SIMD vector registers in big-endian 302 /// mode without emitting such REV instructions. 303 NVCAST, 304 305 MRS, // MRS, also sets the flags via a glue. 306 307 SMULL, 308 UMULL, 309 310 PMULL, 311 312 // Reciprocal estimates and steps. 313 FRECPE, 314 FRECPS, 315 FRSQRTE, 316 FRSQRTS, 317 318 SUNPKHI, 319 SUNPKLO, 320 UUNPKHI, 321 UUNPKLO, 322 323 CLASTA_N, 324 CLASTB_N, 325 LASTA, 326 LASTB, 327 TBL, 328 329 // Floating-point reductions. 330 FADDA_PRED, 331 FADDV_PRED, 332 FMAXV_PRED, 333 FMAXNMV_PRED, 334 FMINV_PRED, 335 FMINNMV_PRED, 336 337 INSR, 338 PTEST, 339 PTEST_ANY, 340 PTRUE, 341 342 BITREVERSE_MERGE_PASSTHRU, 343 BSWAP_MERGE_PASSTHRU, 344 REVH_MERGE_PASSTHRU, 345 REVW_MERGE_PASSTHRU, 346 CTLZ_MERGE_PASSTHRU, 347 CTPOP_MERGE_PASSTHRU, 348 DUP_MERGE_PASSTHRU, 349 INDEX_VECTOR, 350 351 // Cast between vectors of the same element type but differ in length. 352 REINTERPRET_CAST, 353 354 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 355 LS64_BUILD, 356 LS64_EXTRACT, 357 358 LD1_MERGE_ZERO, 359 LD1S_MERGE_ZERO, 360 LDNF1_MERGE_ZERO, 361 LDNF1S_MERGE_ZERO, 362 LDFF1_MERGE_ZERO, 363 LDFF1S_MERGE_ZERO, 364 LD1RQ_MERGE_ZERO, 365 LD1RO_MERGE_ZERO, 366 367 // Structured loads. 368 SVE_LD2_MERGE_ZERO, 369 SVE_LD3_MERGE_ZERO, 370 SVE_LD4_MERGE_ZERO, 371 372 // Unsigned gather loads. 373 GLD1_MERGE_ZERO, 374 GLD1_SCALED_MERGE_ZERO, 375 GLD1_UXTW_MERGE_ZERO, 376 GLD1_SXTW_MERGE_ZERO, 377 GLD1_UXTW_SCALED_MERGE_ZERO, 378 GLD1_SXTW_SCALED_MERGE_ZERO, 379 GLD1_IMM_MERGE_ZERO, 380 381 // Signed gather loads 382 GLD1S_MERGE_ZERO, 383 GLD1S_SCALED_MERGE_ZERO, 384 GLD1S_UXTW_MERGE_ZERO, 385 GLD1S_SXTW_MERGE_ZERO, 386 GLD1S_UXTW_SCALED_MERGE_ZERO, 387 GLD1S_SXTW_SCALED_MERGE_ZERO, 388 GLD1S_IMM_MERGE_ZERO, 389 390 // Unsigned gather loads. 391 GLDFF1_MERGE_ZERO, 392 GLDFF1_SCALED_MERGE_ZERO, 393 GLDFF1_UXTW_MERGE_ZERO, 394 GLDFF1_SXTW_MERGE_ZERO, 395 GLDFF1_UXTW_SCALED_MERGE_ZERO, 396 GLDFF1_SXTW_SCALED_MERGE_ZERO, 397 GLDFF1_IMM_MERGE_ZERO, 398 399 // Signed gather loads. 400 GLDFF1S_MERGE_ZERO, 401 GLDFF1S_SCALED_MERGE_ZERO, 402 GLDFF1S_UXTW_MERGE_ZERO, 403 GLDFF1S_SXTW_MERGE_ZERO, 404 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 405 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 406 GLDFF1S_IMM_MERGE_ZERO, 407 408 // Non-temporal gather loads 409 GLDNT1_MERGE_ZERO, 410 GLDNT1_INDEX_MERGE_ZERO, 411 GLDNT1S_MERGE_ZERO, 412 413 // Contiguous masked store. 414 ST1_PRED, 415 416 // Scatter store 417 SST1_PRED, 418 SST1_SCALED_PRED, 419 SST1_UXTW_PRED, 420 SST1_SXTW_PRED, 421 SST1_UXTW_SCALED_PRED, 422 SST1_SXTW_SCALED_PRED, 423 SST1_IMM_PRED, 424 425 // Non-temporal scatter store 426 SSTNT1_PRED, 427 SSTNT1_INDEX_PRED, 428 429 // SME 430 RDSVL, 431 REVD_MERGE_PASSTHRU, 432 433 // Asserts that a function argument (i32) is zero-extended to i8 by 434 // the caller 435 ASSERT_ZEXT_BOOL, 436 437 // 128-bit system register accesses 438 // lo64, hi64, chain = MRRS(chain, sysregname) 439 MRRS, 440 // chain = MSRR(chain, sysregname, lo64, hi64) 441 MSRR, 442 443 // Strict (exception-raising) floating point comparison 444 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 445 STRICT_FCMPE, 446 447 // NEON Load/Store with post-increment base updates 448 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 449 LD3post, 450 LD4post, 451 ST2post, 452 ST3post, 453 ST4post, 454 LD1x2post, 455 LD1x3post, 456 LD1x4post, 457 ST1x2post, 458 ST1x3post, 459 ST1x4post, 460 LD1DUPpost, 461 LD2DUPpost, 462 LD3DUPpost, 463 LD4DUPpost, 464 LD1LANEpost, 465 LD2LANEpost, 466 LD3LANEpost, 467 LD4LANEpost, 468 ST2LANEpost, 469 ST3LANEpost, 470 ST4LANEpost, 471 472 STG, 473 STZG, 474 ST2G, 475 STZ2G, 476 477 LDP, 478 LDNP, 479 STP, 480 STNP, 481 482 // Memory Operations 483 MOPS_MEMSET, 484 MOPS_MEMSET_TAGGING, 485 MOPS_MEMCOPY, 486 MOPS_MEMMOVE, 487 }; 488 489 } // end namespace AArch64ISD 490 491 namespace AArch64 { 492 /// Possible values of current rounding mode, which is specified in bits 493 /// 23:22 of FPCR. 494 enum Rounding { 495 RN = 0, // Round to Nearest 496 RP = 1, // Round towards Plus infinity 497 RM = 2, // Round towards Minus infinity 498 RZ = 3, // Round towards Zero 499 rmMask = 3 // Bit mask selecting rounding mode 500 }; 501 502 // Bit position of rounding mode bits in FPCR. 503 const unsigned RoundingBitsPos = 22; 504 } // namespace AArch64 505 506 class AArch64Subtarget; 507 508 class AArch64TargetLowering : public TargetLowering { 509 public: 510 explicit AArch64TargetLowering(const TargetMachine &TM, 511 const AArch64Subtarget &STI); 512 513 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 514 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 515 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 516 SDValue N1) const override; 517 518 /// Selects the correct CCAssignFn for a given CallingConvention value. 519 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 520 521 /// Selects the correct CCAssignFn for a given CallingConvention value. 522 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 523 524 /// Determine which of the bits specified in Mask are known to be either zero 525 /// or one and return them in the KnownZero/KnownOne bitsets. 526 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 527 const APInt &DemandedElts, 528 const SelectionDAG &DAG, 529 unsigned Depth = 0) const override; 530 531 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 532 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 533 // *DAG* representation of pointers will always be 64-bits. They will be 534 // truncated and extended when transferred to memory, but the 64-bit DAG 535 // allows us to use AArch64's addressing modes much more easily. 536 return MVT::getIntegerVT(64); 537 } 538 539 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 540 const APInt &DemandedElts, 541 TargetLoweringOpt &TLO) const override; 542 543 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 544 545 /// Returns true if the target allows unaligned memory accesses of the 546 /// specified type. 547 bool allowsMisalignedMemoryAccesses( 548 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 549 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 550 unsigned *Fast = nullptr) const override; 551 /// LLT variant. 552 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 553 Align Alignment, 554 MachineMemOperand::Flags Flags, 555 unsigned *Fast = nullptr) const override; 556 557 /// Provide custom lowering hooks for some operations. 558 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 559 560 const char *getTargetNodeName(unsigned Opcode) const override; 561 562 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 563 564 /// This method returns a target specific FastISel object, or null if the 565 /// target does not support "fast" ISel. 566 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 567 const TargetLibraryInfo *libInfo) const override; 568 569 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 570 571 bool isFPImmLegal(const APFloat &Imm, EVT VT, 572 bool ForCodeSize) const override; 573 574 /// Return true if the given shuffle mask can be codegen'd directly, or if it 575 /// should be stack expanded. 576 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 577 578 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 579 /// shuffle mask can be codegen'd directly. 580 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 581 582 /// Return the ISD::SETCC ValueType. 583 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 584 EVT VT) const override; 585 586 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 587 588 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 589 MachineBasicBlock *BB) const; 590 591 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 592 MachineBasicBlock *BB) const; 593 594 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 595 MachineInstr &MI, 596 MachineBasicBlock *BB) const; 597 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 598 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 599 MachineInstr &MI, MachineBasicBlock *BB, 600 bool HasTile) const; 601 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 602 603 MachineBasicBlock * 604 EmitInstrWithCustomInserter(MachineInstr &MI, 605 MachineBasicBlock *MBB) const override; 606 607 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 608 MachineFunction &MF, 609 unsigned Intrinsic) const override; 610 611 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 612 EVT NewVT) const override; 613 614 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 615 bool isTruncateFree(EVT VT1, EVT VT2) const override; 616 617 bool isProfitableToHoist(Instruction *I) const override; 618 619 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 620 bool isZExtFree(EVT VT1, EVT VT2) const override; 621 bool isZExtFree(SDValue Val, EVT VT2) const override; 622 623 bool shouldSinkOperands(Instruction *I, 624 SmallVectorImpl<Use *> &Ops) const override; 625 626 bool optimizeExtendOrTruncateConversion(Instruction *I, 627 Loop *L) const override; 628 629 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 630 631 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 632 633 bool lowerInterleavedLoad(LoadInst *LI, 634 ArrayRef<ShuffleVectorInst *> Shuffles, 635 ArrayRef<unsigned> Indices, 636 unsigned Factor) const override; 637 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 638 unsigned Factor) const override; 639 640 bool isLegalAddImmediate(int64_t) const override; 641 bool isLegalICmpImmediate(int64_t) const override; 642 643 bool isMulAddWithConstProfitable(SDValue AddNode, 644 SDValue ConstNode) const override; 645 646 bool shouldConsiderGEPOffsetSplit() const override; 647 648 EVT getOptimalMemOpType(const MemOp &Op, 649 const AttributeList &FuncAttributes) const override; 650 651 LLT getOptimalMemOpLLT(const MemOp &Op, 652 const AttributeList &FuncAttributes) const override; 653 654 /// Return true if the addressing mode represented by AM is legal for this 655 /// target, for a load/store of the specified type. 656 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 657 unsigned AS, 658 Instruction *I = nullptr) const override; 659 660 /// Return true if an FMA operation is faster than a pair of fmul and fadd 661 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 662 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 663 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 664 EVT VT) const override; 665 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 666 667 bool generateFMAsInMachineCombiner(EVT VT, 668 CodeGenOpt::Level OptLevel) const override; 669 670 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 671 672 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 673 bool isDesirableToCommuteWithShift(const SDNode *N, 674 CombineLevel Level) const override; 675 676 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 677 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 678 679 /// Return true if it is profitable to fold a pair of shifts into a mask. 680 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 681 CombineLevel Level) const override; 682 683 /// Returns true if it is beneficial to convert a load of a constant 684 /// to just the constant itself. 685 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 686 Type *Ty) const override; 687 688 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 689 /// with this index. 690 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 691 unsigned Index) const override; 692 693 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 694 bool MathUsed) const override { 695 // Using overflow ops for overflow checks only should beneficial on 696 // AArch64. 697 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 698 } 699 700 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 701 AtomicOrdering Ord) const override; 702 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 703 AtomicOrdering Ord) const override; 704 705 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 706 707 bool isOpSuitableForLDPSTP(const Instruction *I) const; 708 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 709 bool 710 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 711 712 TargetLoweringBase::AtomicExpansionKind 713 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 714 TargetLoweringBase::AtomicExpansionKind 715 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 716 TargetLoweringBase::AtomicExpansionKind 717 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 718 719 TargetLoweringBase::AtomicExpansionKind 720 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 721 722 bool useLoadStackGuardNode() const override; 723 TargetLoweringBase::LegalizeTypeAction 724 getPreferredVectorAction(MVT VT) const override; 725 726 /// If the target has a standard location for the stack protector cookie, 727 /// returns the address of that location. Otherwise, returns nullptr. 728 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 729 730 void insertSSPDeclarations(Module &M) const override; 731 Value *getSDagStackGuard(const Module &M) const override; 732 Function *getSSPStackGuardCheck(const Module &M) const override; 733 734 /// If the target has a standard location for the unsafe stack pointer, 735 /// returns the address of that location. Otherwise, returns nullptr. 736 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 737 738 /// If a physical register, this returns the register that receives the 739 /// exception address on entry to an EH pad. 740 Register 741 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 742 // FIXME: This is a guess. Has this been defined yet? 743 return AArch64::X0; 744 } 745 746 /// If a physical register, this returns the register that receives the 747 /// exception typeid on entry to a landing pad. 748 Register 749 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 750 // FIXME: This is a guess. Has this been defined yet? 751 return AArch64::X1; 752 } 753 754 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 755 756 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 757 const MachineFunction &MF) const override { 758 // Do not merge to float value size (128 bytes) if no implicit 759 // float attribute is set. 760 761 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 762 763 if (NoFloat) 764 return (MemVT.getSizeInBits() <= 64); 765 return true; 766 } 767 768 bool isCheapToSpeculateCttz(Type *) const override { 769 return true; 770 } 771 772 bool isCheapToSpeculateCtlz(Type *) const override { 773 return true; 774 } 775 776 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 777 778 bool hasAndNotCompare(SDValue V) const override { 779 // We can use bics for any scalar. 780 return V.getValueType().isScalarInteger(); 781 } 782 783 bool hasAndNot(SDValue Y) const override { 784 EVT VT = Y.getValueType(); 785 786 if (!VT.isVector()) 787 return hasAndNotCompare(Y); 788 789 TypeSize TS = VT.getSizeInBits(); 790 // TODO: We should be able to use bic/bif too for SVE. 791 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 792 } 793 794 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 795 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 796 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 797 SelectionDAG &DAG) const override; 798 799 ShiftLegalizationStrategy 800 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 801 unsigned ExpansionFactor) const override; 802 803 bool shouldTransformSignedTruncationCheck(EVT XVT, 804 unsigned KeptBits) const override { 805 // For vectors, we don't have a preference.. 806 if (XVT.isVector()) 807 return false; 808 809 auto VTIsOk = [](EVT VT) -> bool { 810 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 811 VT == MVT::i64; 812 }; 813 814 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 815 // XVT will be larger than KeptBitsVT. 816 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 817 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 818 } 819 820 bool preferIncOfAddToSubOfNot(EVT VT) const override; 821 822 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 823 824 bool isComplexDeinterleavingSupported() const override; 825 bool isComplexDeinterleavingOperationSupported( 826 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 827 828 Value *createComplexDeinterleavingIR( 829 Instruction *I, ComplexDeinterleavingOperation OperationType, 830 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 831 Value *Accumulator = nullptr) const override; 832 833 bool hasBitPreservingFPLogic(EVT VT) const override { 834 // FIXME: Is this always true? It should be true for vectors at least. 835 return VT == MVT::f32 || VT == MVT::f64; 836 } 837 838 bool supportSplitCSR(MachineFunction *MF) const override { 839 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 840 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 841 } 842 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 843 void insertCopiesSplitCSR( 844 MachineBasicBlock *Entry, 845 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 846 847 bool supportSwiftError() const override { 848 return true; 849 } 850 851 bool supportKCFIBundles() const override { return true; } 852 853 /// Enable aggressive FMA fusion on targets that want it. 854 bool enableAggressiveFMAFusion(EVT VT) const override; 855 856 /// Returns the size of the platform's va_list object. 857 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 858 859 /// Returns true if \p VecTy is a legal interleaved access type. This 860 /// function checks the vector element type and the overall width of the 861 /// vector. 862 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 863 bool &UseScalable) const; 864 865 /// Returns the number of interleaved accesses that will be generated when 866 /// lowering accesses of the given type. 867 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 868 bool UseScalable) const; 869 870 MachineMemOperand::Flags getTargetMMOFlags( 871 const Instruction &I) const override; 872 873 bool functionArgumentNeedsConsecutiveRegisters( 874 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 875 const DataLayout &DL) const override; 876 877 /// Used for exception handling on Win64. 878 bool needsFixedCatchObjects() const override; 879 880 bool fallBackToDAGISel(const Instruction &Inst) const override; 881 882 /// SVE code generation for fixed length vectors does not custom lower 883 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 884 /// merge. However, merging them creates a BUILD_VECTOR that is just as 885 /// illegal as the original, thus leading to an infinite legalisation loop. 886 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 887 /// vector types this override can be removed. 888 bool mergeStoresAfterLegalization(EVT VT) const override; 889 890 // If the platform/function should have a redzone, return the size in bytes. 891 unsigned getRedZoneSize(const Function &F) const { 892 if (F.hasFnAttribute(Attribute::NoRedZone)) 893 return 0; 894 return 128; 895 } 896 897 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 898 EVT getPromotedVTForPredicate(EVT VT) const; 899 900 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 901 bool AllowUnknown = false) const override; 902 903 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 904 905 /// If a change in streaming mode is required on entry to/return from a 906 /// function call it emits and returns the corresponding SMSTART or SMSTOP node. 907 /// \p Entry tells whether this is before/after the Call, which is necessary 908 /// because PSTATE.SM is only queried once. 909 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 910 SDValue Chain, SDValue InFlag, 911 SDValue PStateSM, bool Entry) const; 912 913 bool isVScaleKnownToBeAPowerOfTwo() const override; 914 915 // Normally SVE is only used for byte size vectors that do not fit within a 916 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 917 // used for 64bit and 128bit vectors as well. 918 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 919 920 private: 921 /// Keep a pointer to the AArch64Subtarget around so that we can 922 /// make the right decision when generating code for different targets. 923 const AArch64Subtarget *Subtarget; 924 925 bool isExtFreeImpl(const Instruction *Ext) const override; 926 927 void addTypeForNEON(MVT VT); 928 void addTypeForStreamingSVE(MVT VT); 929 void addTypeForFixedLengthSVE(MVT VT); 930 void addDRTypeForNEON(MVT VT); 931 void addQRTypeForNEON(MVT VT); 932 933 unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, 934 SelectionDAG &DAG) const; 935 936 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 937 bool isVarArg, 938 const SmallVectorImpl<ISD::InputArg> &Ins, 939 const SDLoc &DL, SelectionDAG &DAG, 940 SmallVectorImpl<SDValue> &InVals) const override; 941 942 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 943 SmallVectorImpl<SDValue> &InVals) const override; 944 945 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 946 CallingConv::ID CallConv, bool isVarArg, 947 const SmallVectorImpl<CCValAssign> &RVLocs, 948 const SDLoc &DL, SelectionDAG &DAG, 949 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 950 SDValue ThisVal) const; 951 952 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 953 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 954 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 955 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 956 957 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 958 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 959 960 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 961 962 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 963 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 964 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 965 966 bool 967 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 968 969 /// Finds the incoming stack arguments which overlap the given fixed stack 970 /// object and incorporates their load into the current chain. This prevents 971 /// an upcoming store from clobbering the stack argument before it's used. 972 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 973 MachineFrameInfo &MFI, int ClobberedFI) const; 974 975 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 976 977 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 978 SDValue &Chain) const; 979 980 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 981 bool isVarArg, 982 const SmallVectorImpl<ISD::OutputArg> &Outs, 983 LLVMContext &Context) const override; 984 985 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 986 const SmallVectorImpl<ISD::OutputArg> &Outs, 987 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 988 SelectionDAG &DAG) const override; 989 990 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 991 unsigned Flag) const; 992 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 993 unsigned Flag) const; 994 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 995 unsigned Flag) const; 996 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 997 unsigned Flag) const; 998 template <class NodeTy> 999 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1000 template <class NodeTy> 1001 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1002 template <class NodeTy> 1003 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1004 template <class NodeTy> 1005 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1006 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1011 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 1012 const SDLoc &DL, SelectionDAG &DAG) const; 1013 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 1014 SelectionDAG &DAG) const; 1015 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1016 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1018 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 1019 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1020 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 1021 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 1022 SDValue TVal, SDValue FVal, const SDLoc &dl, 1023 SelectionDAG &DAG) const; 1024 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1025 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 1026 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1027 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1028 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 1029 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 1030 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 1031 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1032 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 1033 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1034 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1035 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 1036 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1037 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1038 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1039 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1040 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1041 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1043 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 1044 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 1045 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1046 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 1048 unsigned NewOp) const; 1049 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 1050 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 1051 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1052 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1053 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1054 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1055 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1056 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1057 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1058 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1059 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1060 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1061 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1062 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1063 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1064 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1065 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1066 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1067 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1068 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1069 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1070 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1071 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1072 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1073 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1074 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1075 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1076 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1077 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1078 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1079 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 1080 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1081 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1082 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 1083 SDValue &Size, 1084 SelectionDAG &DAG) const; 1085 1086 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1087 SelectionDAG &DAG) const; 1088 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1089 SelectionDAG &DAG) const; 1090 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1091 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1092 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1093 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1094 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1095 SelectionDAG &DAG) const; 1096 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1097 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1098 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1099 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1100 SelectionDAG &DAG) const; 1101 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1102 SelectionDAG &DAG) const; 1103 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1104 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1105 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1106 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1107 SelectionDAG &DAG) const; 1108 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1109 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1110 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1111 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1112 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1113 SelectionDAG &DAG) const; 1114 1115 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1116 SmallVectorImpl<SDNode *> &Created) const override; 1117 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1118 SmallVectorImpl<SDNode *> &Created) const override; 1119 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1120 int &ExtraSteps, bool &UseOneConst, 1121 bool Reciprocal) const override; 1122 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1123 int &ExtraSteps) const override; 1124 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1125 const DenormalMode &Mode) const override; 1126 SDValue getSqrtResultForDenormInput(SDValue Operand, 1127 SelectionDAG &DAG) const override; 1128 unsigned combineRepeatedFPDivisors() const override; 1129 1130 ConstraintType getConstraintType(StringRef Constraint) const override; 1131 Register getRegisterByName(const char* RegName, LLT VT, 1132 const MachineFunction &MF) const override; 1133 1134 /// Examine constraint string and operand type and determine a weight value. 1135 /// The operand object must already have been set up with the operand type. 1136 ConstraintWeight 1137 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1138 const char *constraint) const override; 1139 1140 std::pair<unsigned, const TargetRegisterClass *> 1141 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1142 StringRef Constraint, MVT VT) const override; 1143 1144 const char *LowerXConstraint(EVT ConstraintVT) const override; 1145 1146 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1147 std::vector<SDValue> &Ops, 1148 SelectionDAG &DAG) const override; 1149 1150 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1151 if (ConstraintCode == "Q") 1152 return InlineAsm::Constraint_Q; 1153 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1154 // followed by llvm_unreachable so we'll leave them unimplemented in 1155 // the backend for now. 1156 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1157 } 1158 1159 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1160 bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; 1161 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1162 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1163 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1164 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1165 SDValue &Offset, ISD::MemIndexedMode &AM, 1166 bool &IsInc, SelectionDAG &DAG) const; 1167 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1168 ISD::MemIndexedMode &AM, 1169 SelectionDAG &DAG) const override; 1170 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1171 SDValue &Offset, ISD::MemIndexedMode &AM, 1172 SelectionDAG &DAG) const override; 1173 1174 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1175 SelectionDAG &DAG) const override; 1176 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1177 SelectionDAG &DAG) const; 1178 void ReplaceExtractSubVectorResults(SDNode *N, 1179 SmallVectorImpl<SDValue> &Results, 1180 SelectionDAG &DAG) const; 1181 1182 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1183 1184 void finalizeLowering(MachineFunction &MF) const override; 1185 1186 bool shouldLocalize(const MachineInstr &MI, 1187 const TargetTransformInfo *TTI) const override; 1188 1189 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1190 const APInt &OriginalDemandedBits, 1191 const APInt &OriginalDemandedElts, 1192 KnownBits &Known, 1193 TargetLoweringOpt &TLO, 1194 unsigned Depth) const override; 1195 1196 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1197 1198 // With the exception of data-predicate transitions, no instructions are 1199 // required to cast between legal scalable vector types. However: 1200 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1201 // is not universally useable. 1202 // 2. Most unpacked integer types are not legal and thus integer extends 1203 // cannot be used to convert between unpacked and packed types. 1204 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1205 // to transition between unpacked and packed types of the same element type, 1206 // with BITCAST used otherwise. 1207 // This function does not handle predicate bitcasts. 1208 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1209 1210 // Returns the runtime value for PSTATE.SM. When the function is streaming- 1211 // compatible, this generates a call to __arm_sme_state. 1212 SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs, 1213 SDLoc DL, EVT VT) const; 1214 1215 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 1216 LLT Ty2) const override; 1217 }; 1218 1219 namespace AArch64 { 1220 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1221 const TargetLibraryInfo *libInfo); 1222 } // end namespace AArch64 1223 1224 } // end namespace llvm 1225 1226 #endif 1227