1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "Utils/AArch64SMEAttributes.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/SelectionDAG.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/IR/CallingConv.h" 24 #include "llvm/IR/Instruction.h" 25 26 namespace llvm { 27 28 namespace AArch64ISD { 29 30 // For predicated nodes where the result is a vector, the operation is 31 // controlled by a governing predicate and the inactive lanes are explicitly 32 // defined with a value, please stick the following naming convention: 33 // 34 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 35 // to source operand OP<n>. 36 // 37 // _MERGE_ZERO The result value is a vector with inactive lanes 38 // actively zeroed. 39 // 40 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 41 // to the last source operand which only purpose is being 42 // a passthru value. 43 // 44 // For other cases where no explicit action is needed to set the inactive lanes, 45 // or when the result is not a vector and it is needed or helpful to 46 // distinguish a node from similar unpredicated nodes, use: 47 // 48 // _PRED 49 // 50 enum NodeType : unsigned { 51 FIRST_NUMBER = ISD::BUILTIN_OP_END, 52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 53 CALL, // Function call. 54 55 // Pseudo for a OBJC call that gets emitted together with a special `mov 56 // x29, x29` marker instruction. 57 CALL_RVMARKER, 58 59 CALL_BTI, // Function call followed by a BTI instruction. 60 61 // Function call, authenticating the callee value first: 62 // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands. 63 AUTH_CALL, 64 // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc, 65 // operands. 66 AUTH_TC_RETURN, 67 68 // Authenticated variant of CALL_RVMARKER. 69 AUTH_CALL_RVMARKER, 70 71 COALESCER_BARRIER, 72 73 VG_SAVE, 74 VG_RESTORE, 75 76 SMSTART, 77 SMSTOP, 78 RESTORE_ZA, 79 RESTORE_ZT, 80 SAVE_ZT, 81 82 // A call with the callee in x16, i.e. "blr x16". 83 CALL_ARM64EC_TO_X64, 84 85 // Produces the full sequence of instructions for getting the thread pointer 86 // offset of a variable into X0, using the TLSDesc model. 87 TLSDESC_CALLSEQ, 88 ADRP, // Page address of a TargetGlobalAddress operand. 89 ADR, // ADR 90 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 91 LOADgot, // Load from automatically generated descriptor (e.g. Global 92 // Offset Table, TLS record). 93 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. 94 BRCOND, // Conditional branch instruction; "b.cond". 95 CSEL, 96 CSINV, // Conditional select invert. 97 CSNEG, // Conditional select negate. 98 CSINC, // Conditional select increment. 99 100 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 101 // ELF. 102 THREAD_POINTER, 103 ADC, 104 SBC, // adc, sbc instructions 105 106 // To avoid stack clash, allocation is performed by block and each block is 107 // probed. 108 PROBED_ALLOCA, 109 110 // Predicated instructions where inactive lanes produce undefined results. 111 ABDS_PRED, 112 ABDU_PRED, 113 FADD_PRED, 114 FDIV_PRED, 115 FMA_PRED, 116 FMAX_PRED, 117 FMAXNM_PRED, 118 FMIN_PRED, 119 FMINNM_PRED, 120 FMUL_PRED, 121 FSUB_PRED, 122 HADDS_PRED, 123 HADDU_PRED, 124 MUL_PRED, 125 MULHS_PRED, 126 MULHU_PRED, 127 RHADDS_PRED, 128 RHADDU_PRED, 129 SDIV_PRED, 130 SHL_PRED, 131 SMAX_PRED, 132 SMIN_PRED, 133 SRA_PRED, 134 SRL_PRED, 135 UDIV_PRED, 136 UMAX_PRED, 137 UMIN_PRED, 138 139 // Unpredicated vector instructions 140 BIC, 141 142 SRAD_MERGE_OP1, 143 144 // Predicated instructions with the result of inactive lanes provided by the 145 // last operand. 146 FABS_MERGE_PASSTHRU, 147 FCEIL_MERGE_PASSTHRU, 148 FFLOOR_MERGE_PASSTHRU, 149 FNEARBYINT_MERGE_PASSTHRU, 150 FNEG_MERGE_PASSTHRU, 151 FRECPX_MERGE_PASSTHRU, 152 FRINT_MERGE_PASSTHRU, 153 FROUND_MERGE_PASSTHRU, 154 FROUNDEVEN_MERGE_PASSTHRU, 155 FSQRT_MERGE_PASSTHRU, 156 FTRUNC_MERGE_PASSTHRU, 157 FP_ROUND_MERGE_PASSTHRU, 158 FP_EXTEND_MERGE_PASSTHRU, 159 UINT_TO_FP_MERGE_PASSTHRU, 160 SINT_TO_FP_MERGE_PASSTHRU, 161 FCVTZU_MERGE_PASSTHRU, 162 FCVTZS_MERGE_PASSTHRU, 163 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 164 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 165 ABS_MERGE_PASSTHRU, 166 NEG_MERGE_PASSTHRU, 167 168 SETCC_MERGE_ZERO, 169 170 // Arithmetic instructions which write flags. 171 ADDS, 172 SUBS, 173 ADCS, 174 SBCS, 175 ANDS, 176 177 // Conditional compares. Operands: left,right,falsecc,cc,flags 178 CCMP, 179 CCMN, 180 FCCMP, 181 182 // Floating point comparison 183 FCMP, 184 185 // Scalar-to-vector duplication 186 DUP, 187 DUPLANE8, 188 DUPLANE16, 189 DUPLANE32, 190 DUPLANE64, 191 DUPLANE128, 192 193 // Vector immedate moves 194 MOVI, 195 MOVIshift, 196 MOVIedit, 197 MOVImsl, 198 FMOV, 199 MVNIshift, 200 MVNImsl, 201 202 // Vector immediate ops 203 BICi, 204 ORRi, 205 206 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 207 // element must be identical. 208 BSP, 209 210 // Vector shuffles 211 ZIP1, 212 ZIP2, 213 UZP1, 214 UZP2, 215 TRN1, 216 TRN2, 217 REV16, 218 REV32, 219 REV64, 220 EXT, 221 SPLICE, 222 223 // Vector shift by scalar 224 VSHL, 225 VLSHR, 226 VASHR, 227 228 // Vector shift by scalar (again) 229 SQSHL_I, 230 UQSHL_I, 231 SQSHLU_I, 232 SRSHR_I, 233 URSHR_I, 234 URSHR_I_PRED, 235 236 // Vector narrowing shift by immediate (bottom) 237 RSHRNB_I, 238 239 // Vector shift by constant and insert 240 VSLI, 241 VSRI, 242 243 // Vector comparisons 244 CMEQ, 245 CMGE, 246 CMGT, 247 CMHI, 248 CMHS, 249 FCMEQ, 250 FCMGE, 251 FCMGT, 252 253 // Vector zero comparisons 254 CMEQz, 255 CMGEz, 256 CMGTz, 257 CMLEz, 258 CMLTz, 259 FCMEQz, 260 FCMGEz, 261 FCMGTz, 262 FCMLEz, 263 FCMLTz, 264 265 // Round wide FP to narrow FP with inexact results to odd. 266 FCVTXN, 267 268 // Vector across-lanes addition 269 // Only the lower result lane is defined. 270 SADDV, 271 UADDV, 272 273 // Unsigned sum Long across Vector 274 UADDLV, 275 SADDLV, 276 277 // Add Pairwise of two vectors 278 ADDP, 279 // Add Long Pairwise 280 SADDLP, 281 UADDLP, 282 283 // udot/sdot instructions 284 UDOT, 285 SDOT, 286 287 // Vector across-lanes min/max 288 // Only the lower result lane is defined. 289 SMINV, 290 UMINV, 291 SMAXV, 292 UMAXV, 293 294 SADDV_PRED, 295 UADDV_PRED, 296 SMAXV_PRED, 297 UMAXV_PRED, 298 SMINV_PRED, 299 UMINV_PRED, 300 ORV_PRED, 301 EORV_PRED, 302 ANDV_PRED, 303 304 // Compare-and-branch 305 CBZ, 306 CBNZ, 307 TBZ, 308 TBNZ, 309 310 // Tail calls 311 TC_RETURN, 312 313 // Custom prefetch handling 314 PREFETCH, 315 316 // {s|u}int to FP within a FP register. 317 SITOF, 318 UITOF, 319 320 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 321 /// world w.r.t vectors; which causes additional REV instructions to be 322 /// generated to compensate for the byte-swapping. But sometimes we do 323 /// need to re-interpret the data in SIMD vector registers in big-endian 324 /// mode without emitting such REV instructions. 325 NVCAST, 326 327 MRS, // MRS, also sets the flags via a glue. 328 329 SMULL, 330 UMULL, 331 332 PMULL, 333 334 // Reciprocal estimates and steps. 335 FRECPE, 336 FRECPS, 337 FRSQRTE, 338 FRSQRTS, 339 340 SUNPKHI, 341 SUNPKLO, 342 UUNPKHI, 343 UUNPKLO, 344 345 CLASTA_N, 346 CLASTB_N, 347 LASTA, 348 LASTB, 349 TBL, 350 351 // Floating-point reductions. 352 FADDA_PRED, 353 FADDV_PRED, 354 FMAXV_PRED, 355 FMAXNMV_PRED, 356 FMINV_PRED, 357 FMINNMV_PRED, 358 359 INSR, 360 PTEST, 361 PTEST_ANY, 362 PTRUE, 363 364 CTTZ_ELTS, 365 366 BITREVERSE_MERGE_PASSTHRU, 367 BSWAP_MERGE_PASSTHRU, 368 REVH_MERGE_PASSTHRU, 369 REVW_MERGE_PASSTHRU, 370 CTLZ_MERGE_PASSTHRU, 371 CTPOP_MERGE_PASSTHRU, 372 DUP_MERGE_PASSTHRU, 373 INDEX_VECTOR, 374 375 // Cast between vectors of the same element type but differ in length. 376 REINTERPRET_CAST, 377 378 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 379 LS64_BUILD, 380 LS64_EXTRACT, 381 382 LD1_MERGE_ZERO, 383 LD1S_MERGE_ZERO, 384 LDNF1_MERGE_ZERO, 385 LDNF1S_MERGE_ZERO, 386 LDFF1_MERGE_ZERO, 387 LDFF1S_MERGE_ZERO, 388 LD1RQ_MERGE_ZERO, 389 LD1RO_MERGE_ZERO, 390 391 // Structured loads. 392 SVE_LD2_MERGE_ZERO, 393 SVE_LD3_MERGE_ZERO, 394 SVE_LD4_MERGE_ZERO, 395 396 // Unsigned gather loads. 397 GLD1_MERGE_ZERO, 398 GLD1_SCALED_MERGE_ZERO, 399 GLD1_UXTW_MERGE_ZERO, 400 GLD1_SXTW_MERGE_ZERO, 401 GLD1_UXTW_SCALED_MERGE_ZERO, 402 GLD1_SXTW_SCALED_MERGE_ZERO, 403 GLD1_IMM_MERGE_ZERO, 404 GLD1Q_MERGE_ZERO, 405 GLD1Q_INDEX_MERGE_ZERO, 406 407 // Signed gather loads 408 GLD1S_MERGE_ZERO, 409 GLD1S_SCALED_MERGE_ZERO, 410 GLD1S_UXTW_MERGE_ZERO, 411 GLD1S_SXTW_MERGE_ZERO, 412 GLD1S_UXTW_SCALED_MERGE_ZERO, 413 GLD1S_SXTW_SCALED_MERGE_ZERO, 414 GLD1S_IMM_MERGE_ZERO, 415 416 // Unsigned gather loads. 417 GLDFF1_MERGE_ZERO, 418 GLDFF1_SCALED_MERGE_ZERO, 419 GLDFF1_UXTW_MERGE_ZERO, 420 GLDFF1_SXTW_MERGE_ZERO, 421 GLDFF1_UXTW_SCALED_MERGE_ZERO, 422 GLDFF1_SXTW_SCALED_MERGE_ZERO, 423 GLDFF1_IMM_MERGE_ZERO, 424 425 // Signed gather loads. 426 GLDFF1S_MERGE_ZERO, 427 GLDFF1S_SCALED_MERGE_ZERO, 428 GLDFF1S_UXTW_MERGE_ZERO, 429 GLDFF1S_SXTW_MERGE_ZERO, 430 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 431 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 432 GLDFF1S_IMM_MERGE_ZERO, 433 434 // Non-temporal gather loads 435 GLDNT1_MERGE_ZERO, 436 GLDNT1_INDEX_MERGE_ZERO, 437 GLDNT1S_MERGE_ZERO, 438 439 // Contiguous masked store. 440 ST1_PRED, 441 442 // Scatter store 443 SST1_PRED, 444 SST1_SCALED_PRED, 445 SST1_UXTW_PRED, 446 SST1_SXTW_PRED, 447 SST1_UXTW_SCALED_PRED, 448 SST1_SXTW_SCALED_PRED, 449 SST1_IMM_PRED, 450 SST1Q_PRED, 451 SST1Q_INDEX_PRED, 452 453 // Non-temporal scatter store 454 SSTNT1_PRED, 455 SSTNT1_INDEX_PRED, 456 457 // SME 458 RDSVL, 459 REVD_MERGE_PASSTHRU, 460 ALLOCATE_ZA_BUFFER, 461 INIT_TPIDR2OBJ, 462 463 // Asserts that a function argument (i32) is zero-extended to i8 by 464 // the caller 465 ASSERT_ZEXT_BOOL, 466 467 // 128-bit system register accesses 468 // lo64, hi64, chain = MRRS(chain, sysregname) 469 MRRS, 470 // chain = MSRR(chain, sysregname, lo64, hi64) 471 MSRR, 472 473 // Strict (exception-raising) floating point comparison 474 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 475 STRICT_FCMPE, 476 477 // SME ZA loads and stores 478 SME_ZA_LDR, 479 SME_ZA_STR, 480 481 // NEON Load/Store with post-increment base updates 482 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 483 LD3post, 484 LD4post, 485 ST2post, 486 ST3post, 487 ST4post, 488 LD1x2post, 489 LD1x3post, 490 LD1x4post, 491 ST1x2post, 492 ST1x3post, 493 ST1x4post, 494 LD1DUPpost, 495 LD2DUPpost, 496 LD3DUPpost, 497 LD4DUPpost, 498 LD1LANEpost, 499 LD2LANEpost, 500 LD3LANEpost, 501 LD4LANEpost, 502 ST2LANEpost, 503 ST3LANEpost, 504 ST4LANEpost, 505 506 STG, 507 STZG, 508 ST2G, 509 STZ2G, 510 511 LDP, 512 LDIAPP, 513 LDNP, 514 STP, 515 STILP, 516 STNP, 517 518 // Memory Operations 519 MOPS_MEMSET, 520 MOPS_MEMSET_TAGGING, 521 MOPS_MEMCOPY, 522 MOPS_MEMMOVE, 523 }; 524 525 } // end namespace AArch64ISD 526 527 namespace AArch64 { 528 /// Possible values of current rounding mode, which is specified in bits 529 /// 23:22 of FPCR. 530 enum Rounding { 531 RN = 0, // Round to Nearest 532 RP = 1, // Round towards Plus infinity 533 RM = 2, // Round towards Minus infinity 534 RZ = 3, // Round towards Zero 535 rmMask = 3 // Bit mask selecting rounding mode 536 }; 537 538 // Bit position of rounding mode bits in FPCR. 539 const unsigned RoundingBitsPos = 22; 540 541 // Reserved bits should be preserved when modifying FPCR. 542 const uint64_t ReservedFPControlBits = 0xfffffffff80040f8; 543 544 // Registers used to pass function arguments. 545 ArrayRef<MCPhysReg> getGPRArgRegs(); 546 ArrayRef<MCPhysReg> getFPRArgRegs(); 547 548 /// Maximum allowed number of unprobed bytes above SP at an ABI 549 /// boundary. 550 const unsigned StackProbeMaxUnprobedStack = 1024; 551 552 /// Maximum number of iterations to unroll for a constant size probing loop. 553 const unsigned StackProbeMaxLoopUnroll = 4; 554 555 } // namespace AArch64 556 557 class AArch64Subtarget; 558 559 class AArch64TargetLowering : public TargetLowering { 560 public: 561 explicit AArch64TargetLowering(const TargetMachine &TM, 562 const AArch64Subtarget &STI); 563 564 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 565 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 566 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 567 SDValue N1) const override; 568 569 /// Selects the correct CCAssignFn for a given CallingConvention value. 570 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 571 572 /// Selects the correct CCAssignFn for a given CallingConvention value. 573 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 574 575 /// Determine which of the bits specified in Mask are known to be either zero 576 /// or one and return them in the KnownZero/KnownOne bitsets. 577 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 578 const APInt &DemandedElts, 579 const SelectionDAG &DAG, 580 unsigned Depth = 0) const override; 581 582 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 583 const APInt &DemandedElts, 584 const SelectionDAG &DAG, 585 unsigned Depth) const override; 586 587 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 588 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 589 // *DAG* representation of pointers will always be 64-bits. They will be 590 // truncated and extended when transferred to memory, but the 64-bit DAG 591 // allows us to use AArch64's addressing modes much more easily. 592 return MVT::getIntegerVT(64); 593 } 594 595 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 596 const APInt &DemandedElts, 597 TargetLoweringOpt &TLO) const override; 598 599 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 600 601 /// Returns true if the target allows unaligned memory accesses of the 602 /// specified type. 603 bool allowsMisalignedMemoryAccesses( 604 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 605 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 606 unsigned *Fast = nullptr) const override; 607 /// LLT variant. 608 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 609 Align Alignment, 610 MachineMemOperand::Flags Flags, 611 unsigned *Fast = nullptr) const override; 612 613 /// Provide custom lowering hooks for some operations. 614 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 615 616 const char *getTargetNodeName(unsigned Opcode) const override; 617 618 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 619 620 /// This method returns a target specific FastISel object, or null if the 621 /// target does not support "fast" ISel. 622 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 623 const TargetLibraryInfo *libInfo) const override; 624 625 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 626 627 bool isFPImmLegal(const APFloat &Imm, EVT VT, 628 bool ForCodeSize) const override; 629 630 /// Return true if the given shuffle mask can be codegen'd directly, or if it 631 /// should be stack expanded. 632 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 633 634 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 635 /// shuffle mask can be codegen'd directly. 636 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 637 638 /// Return the ISD::SETCC ValueType. 639 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 640 EVT VT) const override; 641 642 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 643 644 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 645 MachineBasicBlock *BB) const; 646 647 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 648 MachineBasicBlock *BB) const; 649 650 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, 651 MachineBasicBlock *MBB) const; 652 653 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 654 MachineInstr &MI, 655 MachineBasicBlock *BB) const; 656 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 657 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 658 MachineInstr &MI, MachineBasicBlock *BB) const; 659 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, 660 unsigned Opcode, bool Op0IsDef) const; 661 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 662 MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI, 663 MachineBasicBlock *BB) const; 664 MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI, 665 MachineBasicBlock *BB) const; 666 667 MachineBasicBlock * 668 EmitInstrWithCustomInserter(MachineInstr &MI, 669 MachineBasicBlock *MBB) const override; 670 671 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 672 MachineFunction &MF, 673 unsigned Intrinsic) const override; 674 675 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 676 EVT NewVT) const override; 677 678 bool shouldRemoveRedundantExtend(SDValue Op) const override; 679 680 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 681 bool isTruncateFree(EVT VT1, EVT VT2) const override; 682 683 bool isProfitableToHoist(Instruction *I) const override; 684 685 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 686 bool isZExtFree(EVT VT1, EVT VT2) const override; 687 bool isZExtFree(SDValue Val, EVT VT2) const override; 688 689 bool shouldSinkOperands(Instruction *I, 690 SmallVectorImpl<Use *> &Ops) const override; 691 692 bool optimizeExtendOrTruncateConversion( 693 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; 694 695 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 696 getMaxSupportedInterleaveFactor()697 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 698 699 bool lowerInterleavedLoad(LoadInst *LI, 700 ArrayRef<ShuffleVectorInst *> Shuffles, 701 ArrayRef<unsigned> Indices, 702 unsigned Factor) const override; 703 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 704 unsigned Factor) const override; 705 706 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 707 LoadInst *LI) const override; 708 709 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 710 StoreInst *SI) const override; 711 712 bool isLegalAddImmediate(int64_t) const override; 713 bool isLegalAddScalableImmediate(int64_t) const override; 714 bool isLegalICmpImmediate(int64_t) const override; 715 716 bool isMulAddWithConstProfitable(SDValue AddNode, 717 SDValue ConstNode) const override; 718 719 bool shouldConsiderGEPOffsetSplit() const override; 720 721 EVT getOptimalMemOpType(const MemOp &Op, 722 const AttributeList &FuncAttributes) const override; 723 724 LLT getOptimalMemOpLLT(const MemOp &Op, 725 const AttributeList &FuncAttributes) const override; 726 727 /// Return true if the addressing mode represented by AM is legal for this 728 /// target, for a load/store of the specified type. 729 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 730 unsigned AS, 731 Instruction *I = nullptr) const override; 732 733 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, 734 int64_t MaxOffset) const override; 735 736 /// Return true if an FMA operation is faster than a pair of fmul and fadd 737 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 738 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 739 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 740 EVT VT) const override; 741 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 742 743 bool generateFMAsInMachineCombiner(EVT VT, 744 CodeGenOptLevel OptLevel) const override; 745 746 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 747 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 748 749 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 750 bool isDesirableToCommuteWithShift(const SDNode *N, 751 CombineLevel Level) const override; 752 isDesirableToPullExtFromShl(const MachineInstr & MI)753 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { 754 return false; 755 } 756 757 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 758 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 759 760 /// Return true if it is profitable to fold a pair of shifts into a mask. 761 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 762 CombineLevel Level) const override; 763 764 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 765 EVT VT) const override; 766 767 /// Returns true if it is beneficial to convert a load of a constant 768 /// to just the constant itself. 769 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 770 Type *Ty) const override; 771 772 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 773 /// with this index. 774 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 775 unsigned Index) const override; 776 shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)777 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 778 bool MathUsed) const override { 779 // Using overflow ops for overflow checks only should beneficial on 780 // AArch64. 781 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 782 } 783 784 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 785 AtomicOrdering Ord) const override; 786 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 787 AtomicOrdering Ord) const override; 788 789 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 790 791 bool isOpSuitableForLDPSTP(const Instruction *I) const; 792 bool isOpSuitableForLSE128(const Instruction *I) const; 793 bool isOpSuitableForRCPC3(const Instruction *I) const; 794 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 795 bool 796 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 797 798 TargetLoweringBase::AtomicExpansionKind 799 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 800 TargetLoweringBase::AtomicExpansionKind 801 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 802 TargetLoweringBase::AtomicExpansionKind 803 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 804 805 TargetLoweringBase::AtomicExpansionKind 806 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 807 808 bool useLoadStackGuardNode() const override; 809 TargetLoweringBase::LegalizeTypeAction 810 getPreferredVectorAction(MVT VT) const override; 811 812 /// If the target has a standard location for the stack protector cookie, 813 /// returns the address of that location. Otherwise, returns nullptr. 814 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 815 816 void insertSSPDeclarations(Module &M) const override; 817 Value *getSDagStackGuard(const Module &M) const override; 818 Function *getSSPStackGuardCheck(const Module &M) const override; 819 820 /// If the target has a standard location for the unsafe stack pointer, 821 /// returns the address of that location. Otherwise, returns nullptr. 822 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 823 824 /// If a physical register, this returns the register that receives the 825 /// exception address on entry to an EH pad. 826 Register getExceptionPointerRegister(const Constant * PersonalityFn)827 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 828 // FIXME: This is a guess. Has this been defined yet? 829 return AArch64::X0; 830 } 831 832 /// If a physical register, this returns the register that receives the 833 /// exception typeid on entry to a landing pad. 834 Register getExceptionSelectorRegister(const Constant * PersonalityFn)835 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 836 // FIXME: This is a guess. Has this been defined yet? 837 return AArch64::X1; 838 } 839 840 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 841 canMergeStoresTo(unsigned AddressSpace,EVT MemVT,const MachineFunction & MF)842 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 843 const MachineFunction &MF) const override { 844 // Do not merge to float value size (128 bytes) if no implicit 845 // float attribute is set. 846 847 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 848 849 if (NoFloat) 850 return (MemVT.getSizeInBits() <= 64); 851 return true; 852 } 853 isCheapToSpeculateCttz(Type *)854 bool isCheapToSpeculateCttz(Type *) const override { 855 return true; 856 } 857 isCheapToSpeculateCtlz(Type *)858 bool isCheapToSpeculateCtlz(Type *) const override { 859 return true; 860 } 861 862 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 863 hasAndNotCompare(SDValue V)864 bool hasAndNotCompare(SDValue V) const override { 865 // We can use bics for any scalar. 866 return V.getValueType().isScalarInteger(); 867 } 868 hasAndNot(SDValue Y)869 bool hasAndNot(SDValue Y) const override { 870 EVT VT = Y.getValueType(); 871 872 if (!VT.isVector()) 873 return hasAndNotCompare(Y); 874 875 TypeSize TS = VT.getSizeInBits(); 876 // TODO: We should be able to use bic/bif too for SVE. 877 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 878 } 879 880 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 881 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 882 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 883 SelectionDAG &DAG) const override; 884 885 ShiftLegalizationStrategy 886 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 887 unsigned ExpansionFactor) const override; 888 shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)889 bool shouldTransformSignedTruncationCheck(EVT XVT, 890 unsigned KeptBits) const override { 891 // For vectors, we don't have a preference.. 892 if (XVT.isVector()) 893 return false; 894 895 auto VTIsOk = [](EVT VT) -> bool { 896 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 897 VT == MVT::i64; 898 }; 899 900 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 901 // XVT will be larger than KeptBitsVT. 902 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 903 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 904 } 905 906 bool preferIncOfAddToSubOfNot(EVT VT) const override; 907 908 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 909 shouldExpandCmpUsingSelects()910 bool shouldExpandCmpUsingSelects() const override { return true; } 911 912 bool isComplexDeinterleavingSupported() const override; 913 bool isComplexDeinterleavingOperationSupported( 914 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 915 916 Value *createComplexDeinterleavingIR( 917 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 918 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 919 Value *Accumulator = nullptr) const override; 920 supportSplitCSR(MachineFunction * MF)921 bool supportSplitCSR(MachineFunction *MF) const override { 922 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 923 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 924 } 925 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 926 void insertCopiesSplitCSR( 927 MachineBasicBlock *Entry, 928 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 929 supportSwiftError()930 bool supportSwiftError() const override { 931 return true; 932 } 933 supportPtrAuthBundles()934 bool supportPtrAuthBundles() const override { return true; } 935 supportKCFIBundles()936 bool supportKCFIBundles() const override { return true; } 937 938 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 939 MachineBasicBlock::instr_iterator &MBBI, 940 const TargetInstrInfo *TII) const override; 941 942 /// Enable aggressive FMA fusion on targets that want it. 943 bool enableAggressiveFMAFusion(EVT VT) const override; 944 945 /// Returns the size of the platform's va_list object. 946 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 947 948 /// Returns true if \p VecTy is a legal interleaved access type. This 949 /// function checks the vector element type and the overall width of the 950 /// vector. 951 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 952 bool &UseScalable) const; 953 954 /// Returns the number of interleaved accesses that will be generated when 955 /// lowering accesses of the given type. 956 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 957 bool UseScalable) const; 958 959 MachineMemOperand::Flags getTargetMMOFlags( 960 const Instruction &I) const override; 961 962 bool functionArgumentNeedsConsecutiveRegisters( 963 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 964 const DataLayout &DL) const override; 965 966 /// Used for exception handling on Win64. 967 bool needsFixedCatchObjects() const override; 968 969 bool fallBackToDAGISel(const Instruction &Inst) const override; 970 971 /// SVE code generation for fixed length vectors does not custom lower 972 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 973 /// merge. However, merging them creates a BUILD_VECTOR that is just as 974 /// illegal as the original, thus leading to an infinite legalisation loop. 975 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 976 /// vector types this override can be removed. 977 bool mergeStoresAfterLegalization(EVT VT) const override; 978 979 // If the platform/function should have a redzone, return the size in bytes. getRedZoneSize(const Function & F)980 unsigned getRedZoneSize(const Function &F) const { 981 if (F.hasFnAttribute(Attribute::NoRedZone)) 982 return 0; 983 return 128; 984 } 985 986 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 987 EVT getPromotedVTForPredicate(EVT VT) const; 988 989 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 990 bool AllowUnknown = false) const override; 991 992 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 993 994 bool shouldExpandCttzElements(EVT VT) const override; 995 996 /// If a change in streaming mode is required on entry to/return from a 997 /// function call it emits and returns the corresponding SMSTART or SMSTOP 998 /// node. \p Condition should be one of the enum values from 999 /// AArch64SME::ToggleCondition. 1000 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 1001 SDValue Chain, SDValue InGlue, unsigned Condition, 1002 SDValue PStateSM = SDValue()) const; 1003 isVScaleKnownToBeAPowerOfTwo()1004 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 1005 1006 // Normally SVE is only used for byte size vectors that do not fit within a 1007 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1008 // used for 64bit and 128bit vectors as well. 1009 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1010 1011 // Follow NEON ABI rules even when using SVE for fixed length vectors. 1012 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1013 EVT VT) const override; 1014 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1015 CallingConv::ID CC, 1016 EVT VT) const override; 1017 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 1018 CallingConv::ID CC, EVT VT, 1019 EVT &IntermediateVT, 1020 unsigned &NumIntermediates, 1021 MVT &RegisterVT) const override; 1022 1023 /// True if stack clash protection is enabled for this functions. 1024 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1025 1026 #ifndef NDEBUG 1027 void verifyTargetSDNode(const SDNode *N) const override; 1028 #endif 1029 1030 private: 1031 /// Keep a pointer to the AArch64Subtarget around so that we can 1032 /// make the right decision when generating code for different targets. 1033 const AArch64Subtarget *Subtarget; 1034 1035 llvm::BumpPtrAllocator BumpAlloc; 1036 llvm::StringSaver Saver{BumpAlloc}; 1037 1038 bool isExtFreeImpl(const Instruction *Ext) const override; 1039 1040 void addTypeForNEON(MVT VT); 1041 void addTypeForFixedLengthSVE(MVT VT); 1042 void addDRType(MVT VT); 1043 void addQRType(MVT VT); 1044 1045 bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override; 1046 1047 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 1048 bool isVarArg, 1049 const SmallVectorImpl<ISD::InputArg> &Ins, 1050 const SDLoc &DL, SelectionDAG &DAG, 1051 SmallVectorImpl<SDValue> &InVals) const override; 1052 1053 void AdjustInstrPostInstrSelection(MachineInstr &MI, 1054 SDNode *Node) const override; 1055 1056 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 1057 SmallVectorImpl<SDValue> &InVals) const override; 1058 1059 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1060 CallingConv::ID CallConv, bool isVarArg, 1061 const SmallVectorImpl<CCValAssign> &RVLocs, 1062 const SDLoc &DL, SelectionDAG &DAG, 1063 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 1064 SDValue ThisVal, bool RequiresSMChange) const; 1065 1066 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 1067 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 1068 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 1069 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 1070 1071 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 1072 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 1073 1074 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 1075 1076 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1077 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1078 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 1079 1080 bool 1081 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 1082 1083 /// Finds the incoming stack arguments which overlap the given fixed stack 1084 /// object and incorporates their load into the current chain. This prevents 1085 /// an upcoming store from clobbering the stack argument before it's used. 1086 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 1087 MachineFrameInfo &MFI, int ClobberedFI) const; 1088 1089 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 1090 1091 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 1092 SDValue &Chain) const; 1093 1094 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1095 bool isVarArg, 1096 const SmallVectorImpl<ISD::OutputArg> &Outs, 1097 LLVMContext &Context) const override; 1098 1099 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1100 const SmallVectorImpl<ISD::OutputArg> &Outs, 1101 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1102 SelectionDAG &DAG) const override; 1103 1104 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1105 unsigned Flag) const; 1106 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 1107 unsigned Flag) const; 1108 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 1109 unsigned Flag) const; 1110 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1111 unsigned Flag) const; 1112 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, 1113 unsigned Flag) const; 1114 template <class NodeTy> 1115 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1116 template <class NodeTy> 1117 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1118 template <class NodeTy> 1119 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1120 template <class NodeTy> 1121 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1122 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1123 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1124 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1125 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1126 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1127 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 1128 const SDLoc &DL, SelectionDAG &DAG) const; 1129 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 1130 SelectionDAG &DAG) const; 1131 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1132 SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1133 SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, 1134 AArch64PACKey::ID Key, 1135 SDValue Discriminator, 1136 SDValue AddrDiscriminator, 1137 SelectionDAG &DAG) const; 1138 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1139 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1140 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 1141 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1142 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 1143 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 1144 SDValue TVal, SDValue FVal, const SDLoc &dl, 1145 SelectionDAG &DAG) const; 1146 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1147 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1148 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1149 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 1150 SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const; 1151 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1152 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1153 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 1154 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 1155 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 1156 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1157 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 1158 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1159 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1160 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 1161 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1162 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1163 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1164 SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1165 SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1166 SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const; 1167 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1168 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1169 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1170 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 1171 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 1172 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1173 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 1174 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 1175 unsigned NewOp) const; 1176 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 1177 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 1178 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1179 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1180 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1181 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1182 SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; 1183 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1184 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1185 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1186 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1187 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1188 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1189 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1190 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1191 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1192 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1193 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1194 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1195 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1196 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1197 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1198 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1199 SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const; 1200 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1201 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1202 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1203 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1204 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1205 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1206 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1207 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1208 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1209 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1210 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1211 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1212 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1213 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1214 1215 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; 1216 1217 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1218 SelectionDAG &DAG) const; 1219 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1220 SelectionDAG &DAG) const; 1221 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1222 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1223 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1224 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1225 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1226 SelectionDAG &DAG) const; 1227 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1228 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1229 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1230 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1231 SelectionDAG &DAG) const; 1232 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1233 SelectionDAG &DAG) const; 1234 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1235 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1236 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1237 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1238 SelectionDAG &DAG) const; 1239 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1240 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1241 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1242 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1243 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1244 SelectionDAG &DAG) const; 1245 1246 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1247 SmallVectorImpl<SDNode *> &Created) const override; 1248 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1249 SmallVectorImpl<SDNode *> &Created) const override; 1250 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1251 int &ExtraSteps, bool &UseOneConst, 1252 bool Reciprocal) const override; 1253 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1254 int &ExtraSteps) const override; 1255 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1256 const DenormalMode &Mode) const override; 1257 SDValue getSqrtResultForDenormInput(SDValue Operand, 1258 SelectionDAG &DAG) const override; 1259 unsigned combineRepeatedFPDivisors() const override; 1260 1261 ConstraintType getConstraintType(StringRef Constraint) const override; 1262 Register getRegisterByName(const char* RegName, LLT VT, 1263 const MachineFunction &MF) const override; 1264 1265 /// Examine constraint string and operand type and determine a weight value. 1266 /// The operand object must already have been set up with the operand type. 1267 ConstraintWeight 1268 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1269 const char *constraint) const override; 1270 1271 std::pair<unsigned, const TargetRegisterClass *> 1272 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1273 StringRef Constraint, MVT VT) const override; 1274 1275 const char *LowerXConstraint(EVT ConstraintVT) const override; 1276 1277 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1278 std::vector<SDValue> &Ops, 1279 SelectionDAG &DAG) const override; 1280 1281 InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode)1282 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1283 if (ConstraintCode == "Q") 1284 return InlineAsm::ConstraintCode::Q; 1285 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1286 // followed by llvm_unreachable so we'll leave them unimplemented in 1287 // the backend for now. 1288 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1289 } 1290 1291 /// Handle Lowering flag assembly outputs. 1292 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1293 const SDLoc &DL, 1294 const AsmOperandInfo &Constraint, 1295 SelectionDAG &DAG) const override; 1296 1297 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1298 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 1299 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1300 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1301 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1302 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1303 SDValue &Offset, SelectionDAG &DAG) const; 1304 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1305 ISD::MemIndexedMode &AM, 1306 SelectionDAG &DAG) const override; 1307 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1308 SDValue &Offset, ISD::MemIndexedMode &AM, 1309 SelectionDAG &DAG) const override; 1310 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 1311 bool IsPre, MachineRegisterInfo &MRI) const override; 1312 1313 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1314 SelectionDAG &DAG) const override; 1315 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1316 SelectionDAG &DAG) const; 1317 void ReplaceExtractSubVectorResults(SDNode *N, 1318 SmallVectorImpl<SDValue> &Results, 1319 SelectionDAG &DAG) const; 1320 1321 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1322 1323 void finalizeLowering(MachineFunction &MF) const override; 1324 1325 bool shouldLocalize(const MachineInstr &MI, 1326 const TargetTransformInfo *TTI) const override; 1327 1328 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1329 const APInt &OriginalDemandedBits, 1330 const APInt &OriginalDemandedElts, 1331 KnownBits &Known, 1332 TargetLoweringOpt &TLO, 1333 unsigned Depth) const override; 1334 1335 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1336 1337 // With the exception of data-predicate transitions, no instructions are 1338 // required to cast between legal scalable vector types. However: 1339 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1340 // is not universally useable. 1341 // 2. Most unpacked integer types are not legal and thus integer extends 1342 // cannot be used to convert between unpacked and packed types. 1343 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1344 // to transition between unpacked and packed types of the same element type, 1345 // with BITCAST used otherwise. 1346 // This function does not handle predicate bitcasts. 1347 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1348 1349 // Returns the runtime value for PSTATE.SM by generating a call to 1350 // __arm_sme_state. 1351 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, 1352 EVT VT) const; 1353 1354 bool preferScalarizeSplat(SDNode *N) const override; 1355 1356 unsigned getMinimumJumpTableEntries() const override; 1357 softPromoteHalfType()1358 bool softPromoteHalfType() const override { return true; } 1359 }; 1360 1361 namespace AArch64 { 1362 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1363 const TargetLibraryInfo *libInfo); 1364 } // end namespace AArch64 1365 1366 } // end namespace llvm 1367 1368 #endif 1369