1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "Utils/AArch64SMEAttributes.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/SelectionDAG.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/IR/CallingConv.h" 24 #include "llvm/IR/Instruction.h" 25 26 namespace llvm { 27 28 namespace AArch64ISD { 29 30 // For predicated nodes where the result is a vector, the operation is 31 // controlled by a governing predicate and the inactive lanes are explicitly 32 // defined with a value, please stick the following naming convention: 33 // 34 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 35 // to source operand OP<n>. 36 // 37 // _MERGE_ZERO The result value is a vector with inactive lanes 38 // actively zeroed. 39 // 40 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 41 // to the last source operand which only purpose is being 42 // a passthru value. 43 // 44 // For other cases where no explicit action is needed to set the inactive lanes, 45 // or when the result is not a vector and it is needed or helpful to 46 // distinguish a node from similar unpredicated nodes, use: 47 // 48 // _PRED 49 // 50 enum NodeType : unsigned { 51 FIRST_NUMBER = ISD::BUILTIN_OP_END, 52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 53 CALL, // Function call. 54 55 // Pseudo for a OBJC call that gets emitted together with a special `mov 56 // x29, x29` marker instruction. 57 CALL_RVMARKER, 58 59 CALL_BTI, // Function call followed by a BTI instruction. 60 61 COALESCER_BARRIER, 62 63 SMSTART, 64 SMSTOP, 65 RESTORE_ZA, 66 RESTORE_ZT, 67 SAVE_ZT, 68 69 // A call with the callee in x16, i.e. "blr x16". 70 CALL_ARM64EC_TO_X64, 71 72 // Produces the full sequence of instructions for getting the thread pointer 73 // offset of a variable into X0, using the TLSDesc model. 74 TLSDESC_CALLSEQ, 75 ADRP, // Page address of a TargetGlobalAddress operand. 76 ADR, // ADR 77 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 78 LOADgot, // Load from automatically generated descriptor (e.g. Global 79 // Offset Table, TLS record). 80 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. 81 BRCOND, // Conditional branch instruction; "b.cond". 82 CSEL, 83 CSINV, // Conditional select invert. 84 CSNEG, // Conditional select negate. 85 CSINC, // Conditional select increment. 86 87 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 88 // ELF. 89 THREAD_POINTER, 90 ADC, 91 SBC, // adc, sbc instructions 92 93 // To avoid stack clash, allocation is performed by block and each block is 94 // probed. 95 PROBED_ALLOCA, 96 97 // Predicated instructions where inactive lanes produce undefined results. 98 ABDS_PRED, 99 ABDU_PRED, 100 FADD_PRED, 101 FDIV_PRED, 102 FMA_PRED, 103 FMAX_PRED, 104 FMAXNM_PRED, 105 FMIN_PRED, 106 FMINNM_PRED, 107 FMUL_PRED, 108 FSUB_PRED, 109 HADDS_PRED, 110 HADDU_PRED, 111 MUL_PRED, 112 MULHS_PRED, 113 MULHU_PRED, 114 RHADDS_PRED, 115 RHADDU_PRED, 116 SDIV_PRED, 117 SHL_PRED, 118 SMAX_PRED, 119 SMIN_PRED, 120 SRA_PRED, 121 SRL_PRED, 122 UDIV_PRED, 123 UMAX_PRED, 124 UMIN_PRED, 125 126 // Unpredicated vector instructions 127 BIC, 128 129 SRAD_MERGE_OP1, 130 131 // Predicated instructions with the result of inactive lanes provided by the 132 // last operand. 133 FABS_MERGE_PASSTHRU, 134 FCEIL_MERGE_PASSTHRU, 135 FFLOOR_MERGE_PASSTHRU, 136 FNEARBYINT_MERGE_PASSTHRU, 137 FNEG_MERGE_PASSTHRU, 138 FRECPX_MERGE_PASSTHRU, 139 FRINT_MERGE_PASSTHRU, 140 FROUND_MERGE_PASSTHRU, 141 FROUNDEVEN_MERGE_PASSTHRU, 142 FSQRT_MERGE_PASSTHRU, 143 FTRUNC_MERGE_PASSTHRU, 144 FP_ROUND_MERGE_PASSTHRU, 145 FP_EXTEND_MERGE_PASSTHRU, 146 UINT_TO_FP_MERGE_PASSTHRU, 147 SINT_TO_FP_MERGE_PASSTHRU, 148 FCVTZU_MERGE_PASSTHRU, 149 FCVTZS_MERGE_PASSTHRU, 150 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 151 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 152 ABS_MERGE_PASSTHRU, 153 NEG_MERGE_PASSTHRU, 154 155 SETCC_MERGE_ZERO, 156 157 // Arithmetic instructions which write flags. 158 ADDS, 159 SUBS, 160 ADCS, 161 SBCS, 162 ANDS, 163 164 // Conditional compares. Operands: left,right,falsecc,cc,flags 165 CCMP, 166 CCMN, 167 FCCMP, 168 169 // Floating point comparison 170 FCMP, 171 172 // Scalar-to-vector duplication 173 DUP, 174 DUPLANE8, 175 DUPLANE16, 176 DUPLANE32, 177 DUPLANE64, 178 DUPLANE128, 179 180 // Vector immedate moves 181 MOVI, 182 MOVIshift, 183 MOVIedit, 184 MOVImsl, 185 FMOV, 186 MVNIshift, 187 MVNImsl, 188 189 // Vector immediate ops 190 BICi, 191 ORRi, 192 193 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 194 // element must be identical. 195 BSP, 196 197 // Vector shuffles 198 ZIP1, 199 ZIP2, 200 UZP1, 201 UZP2, 202 TRN1, 203 TRN2, 204 REV16, 205 REV32, 206 REV64, 207 EXT, 208 SPLICE, 209 210 // Vector shift by scalar 211 VSHL, 212 VLSHR, 213 VASHR, 214 215 // Vector shift by scalar (again) 216 SQSHL_I, 217 UQSHL_I, 218 SQSHLU_I, 219 SRSHR_I, 220 URSHR_I, 221 222 // Vector narrowing shift by immediate (bottom) 223 RSHRNB_I, 224 225 // Vector shift by constant and insert 226 VSLI, 227 VSRI, 228 229 // Vector comparisons 230 CMEQ, 231 CMGE, 232 CMGT, 233 CMHI, 234 CMHS, 235 FCMEQ, 236 FCMGE, 237 FCMGT, 238 239 // Vector zero comparisons 240 CMEQz, 241 CMGEz, 242 CMGTz, 243 CMLEz, 244 CMLTz, 245 FCMEQz, 246 FCMGEz, 247 FCMGTz, 248 FCMLEz, 249 FCMLTz, 250 251 // Vector across-lanes addition 252 // Only the lower result lane is defined. 253 SADDV, 254 UADDV, 255 256 // Unsigned sum Long across Vector 257 UADDLV, 258 SADDLV, 259 260 // Add Pairwise of two vectors 261 ADDP, 262 // Add Long Pairwise 263 SADDLP, 264 UADDLP, 265 266 // udot/sdot instructions 267 UDOT, 268 SDOT, 269 270 // Vector across-lanes min/max 271 // Only the lower result lane is defined. 272 SMINV, 273 UMINV, 274 SMAXV, 275 UMAXV, 276 277 SADDV_PRED, 278 UADDV_PRED, 279 SMAXV_PRED, 280 UMAXV_PRED, 281 SMINV_PRED, 282 UMINV_PRED, 283 ORV_PRED, 284 EORV_PRED, 285 ANDV_PRED, 286 287 // Vector bitwise insertion 288 BIT, 289 290 // Compare-and-branch 291 CBZ, 292 CBNZ, 293 TBZ, 294 TBNZ, 295 296 // Tail calls 297 TC_RETURN, 298 299 // Custom prefetch handling 300 PREFETCH, 301 302 // {s|u}int to FP within a FP register. 303 SITOF, 304 UITOF, 305 306 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 307 /// world w.r.t vectors; which causes additional REV instructions to be 308 /// generated to compensate for the byte-swapping. But sometimes we do 309 /// need to re-interpret the data in SIMD vector registers in big-endian 310 /// mode without emitting such REV instructions. 311 NVCAST, 312 313 MRS, // MRS, also sets the flags via a glue. 314 315 SMULL, 316 UMULL, 317 318 PMULL, 319 320 // Reciprocal estimates and steps. 321 FRECPE, 322 FRECPS, 323 FRSQRTE, 324 FRSQRTS, 325 326 SUNPKHI, 327 SUNPKLO, 328 UUNPKHI, 329 UUNPKLO, 330 331 CLASTA_N, 332 CLASTB_N, 333 LASTA, 334 LASTB, 335 TBL, 336 337 // Floating-point reductions. 338 FADDA_PRED, 339 FADDV_PRED, 340 FMAXV_PRED, 341 FMAXNMV_PRED, 342 FMINV_PRED, 343 FMINNMV_PRED, 344 345 INSR, 346 PTEST, 347 PTEST_ANY, 348 PTRUE, 349 350 CTTZ_ELTS, 351 352 BITREVERSE_MERGE_PASSTHRU, 353 BSWAP_MERGE_PASSTHRU, 354 REVH_MERGE_PASSTHRU, 355 REVW_MERGE_PASSTHRU, 356 CTLZ_MERGE_PASSTHRU, 357 CTPOP_MERGE_PASSTHRU, 358 DUP_MERGE_PASSTHRU, 359 INDEX_VECTOR, 360 361 // Cast between vectors of the same element type but differ in length. 362 REINTERPRET_CAST, 363 364 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 365 LS64_BUILD, 366 LS64_EXTRACT, 367 368 LD1_MERGE_ZERO, 369 LD1S_MERGE_ZERO, 370 LDNF1_MERGE_ZERO, 371 LDNF1S_MERGE_ZERO, 372 LDFF1_MERGE_ZERO, 373 LDFF1S_MERGE_ZERO, 374 LD1RQ_MERGE_ZERO, 375 LD1RO_MERGE_ZERO, 376 377 // Structured loads. 378 SVE_LD2_MERGE_ZERO, 379 SVE_LD3_MERGE_ZERO, 380 SVE_LD4_MERGE_ZERO, 381 382 // Unsigned gather loads. 383 GLD1_MERGE_ZERO, 384 GLD1_SCALED_MERGE_ZERO, 385 GLD1_UXTW_MERGE_ZERO, 386 GLD1_SXTW_MERGE_ZERO, 387 GLD1_UXTW_SCALED_MERGE_ZERO, 388 GLD1_SXTW_SCALED_MERGE_ZERO, 389 GLD1_IMM_MERGE_ZERO, 390 GLD1Q_MERGE_ZERO, 391 GLD1Q_INDEX_MERGE_ZERO, 392 393 // Signed gather loads 394 GLD1S_MERGE_ZERO, 395 GLD1S_SCALED_MERGE_ZERO, 396 GLD1S_UXTW_MERGE_ZERO, 397 GLD1S_SXTW_MERGE_ZERO, 398 GLD1S_UXTW_SCALED_MERGE_ZERO, 399 GLD1S_SXTW_SCALED_MERGE_ZERO, 400 GLD1S_IMM_MERGE_ZERO, 401 402 // Unsigned gather loads. 403 GLDFF1_MERGE_ZERO, 404 GLDFF1_SCALED_MERGE_ZERO, 405 GLDFF1_UXTW_MERGE_ZERO, 406 GLDFF1_SXTW_MERGE_ZERO, 407 GLDFF1_UXTW_SCALED_MERGE_ZERO, 408 GLDFF1_SXTW_SCALED_MERGE_ZERO, 409 GLDFF1_IMM_MERGE_ZERO, 410 411 // Signed gather loads. 412 GLDFF1S_MERGE_ZERO, 413 GLDFF1S_SCALED_MERGE_ZERO, 414 GLDFF1S_UXTW_MERGE_ZERO, 415 GLDFF1S_SXTW_MERGE_ZERO, 416 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 417 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 418 GLDFF1S_IMM_MERGE_ZERO, 419 420 // Non-temporal gather loads 421 GLDNT1_MERGE_ZERO, 422 GLDNT1_INDEX_MERGE_ZERO, 423 GLDNT1S_MERGE_ZERO, 424 425 // Contiguous masked store. 426 ST1_PRED, 427 428 // Scatter store 429 SST1_PRED, 430 SST1_SCALED_PRED, 431 SST1_UXTW_PRED, 432 SST1_SXTW_PRED, 433 SST1_UXTW_SCALED_PRED, 434 SST1_SXTW_SCALED_PRED, 435 SST1_IMM_PRED, 436 SST1Q_PRED, 437 SST1Q_INDEX_PRED, 438 439 // Non-temporal scatter store 440 SSTNT1_PRED, 441 SSTNT1_INDEX_PRED, 442 443 // SME 444 RDSVL, 445 REVD_MERGE_PASSTHRU, 446 447 // Asserts that a function argument (i32) is zero-extended to i8 by 448 // the caller 449 ASSERT_ZEXT_BOOL, 450 451 // 128-bit system register accesses 452 // lo64, hi64, chain = MRRS(chain, sysregname) 453 MRRS, 454 // chain = MSRR(chain, sysregname, lo64, hi64) 455 MSRR, 456 457 // Strict (exception-raising) floating point comparison 458 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 459 STRICT_FCMPE, 460 461 // SME ZA loads and stores 462 SME_ZA_LDR, 463 SME_ZA_STR, 464 465 // NEON Load/Store with post-increment base updates 466 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 467 LD3post, 468 LD4post, 469 ST2post, 470 ST3post, 471 ST4post, 472 LD1x2post, 473 LD1x3post, 474 LD1x4post, 475 ST1x2post, 476 ST1x3post, 477 ST1x4post, 478 LD1DUPpost, 479 LD2DUPpost, 480 LD3DUPpost, 481 LD4DUPpost, 482 LD1LANEpost, 483 LD2LANEpost, 484 LD3LANEpost, 485 LD4LANEpost, 486 ST2LANEpost, 487 ST3LANEpost, 488 ST4LANEpost, 489 490 STG, 491 STZG, 492 ST2G, 493 STZ2G, 494 495 LDP, 496 LDIAPP, 497 LDNP, 498 STP, 499 STILP, 500 STNP, 501 502 // Memory Operations 503 MOPS_MEMSET, 504 MOPS_MEMSET_TAGGING, 505 MOPS_MEMCOPY, 506 MOPS_MEMMOVE, 507 }; 508 509 } // end namespace AArch64ISD 510 511 namespace AArch64 { 512 /// Possible values of current rounding mode, which is specified in bits 513 /// 23:22 of FPCR. 514 enum Rounding { 515 RN = 0, // Round to Nearest 516 RP = 1, // Round towards Plus infinity 517 RM = 2, // Round towards Minus infinity 518 RZ = 3, // Round towards Zero 519 rmMask = 3 // Bit mask selecting rounding mode 520 }; 521 522 // Bit position of rounding mode bits in FPCR. 523 const unsigned RoundingBitsPos = 22; 524 525 // Registers used to pass function arguments. 526 ArrayRef<MCPhysReg> getGPRArgRegs(); 527 ArrayRef<MCPhysReg> getFPRArgRegs(); 528 529 /// Maximum allowed number of unprobed bytes above SP at an ABI 530 /// boundary. 531 const unsigned StackProbeMaxUnprobedStack = 1024; 532 533 /// Maximum number of iterations to unroll for a constant size probing loop. 534 const unsigned StackProbeMaxLoopUnroll = 4; 535 536 } // namespace AArch64 537 538 class AArch64Subtarget; 539 540 class AArch64TargetLowering : public TargetLowering { 541 public: 542 explicit AArch64TargetLowering(const TargetMachine &TM, 543 const AArch64Subtarget &STI); 544 545 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 546 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 547 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 548 SDValue N1) const override; 549 550 /// Selects the correct CCAssignFn for a given CallingConvention value. 551 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 552 553 /// Selects the correct CCAssignFn for a given CallingConvention value. 554 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 555 556 /// Determine which of the bits specified in Mask are known to be either zero 557 /// or one and return them in the KnownZero/KnownOne bitsets. 558 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 559 const APInt &DemandedElts, 560 const SelectionDAG &DAG, 561 unsigned Depth = 0) const override; 562 563 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 564 const APInt &DemandedElts, 565 const SelectionDAG &DAG, 566 unsigned Depth) const override; 567 568 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 569 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 570 // *DAG* representation of pointers will always be 64-bits. They will be 571 // truncated and extended when transferred to memory, but the 64-bit DAG 572 // allows us to use AArch64's addressing modes much more easily. 573 return MVT::getIntegerVT(64); 574 } 575 576 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 577 const APInt &DemandedElts, 578 TargetLoweringOpt &TLO) const override; 579 580 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 581 582 /// Returns true if the target allows unaligned memory accesses of the 583 /// specified type. 584 bool allowsMisalignedMemoryAccesses( 585 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 586 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 587 unsigned *Fast = nullptr) const override; 588 /// LLT variant. 589 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 590 Align Alignment, 591 MachineMemOperand::Flags Flags, 592 unsigned *Fast = nullptr) const override; 593 594 /// Provide custom lowering hooks for some operations. 595 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 596 597 const char *getTargetNodeName(unsigned Opcode) const override; 598 599 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 600 601 /// This method returns a target specific FastISel object, or null if the 602 /// target does not support "fast" ISel. 603 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 604 const TargetLibraryInfo *libInfo) const override; 605 606 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 607 608 bool isFPImmLegal(const APFloat &Imm, EVT VT, 609 bool ForCodeSize) const override; 610 611 /// Return true if the given shuffle mask can be codegen'd directly, or if it 612 /// should be stack expanded. 613 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 614 615 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 616 /// shuffle mask can be codegen'd directly. 617 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 618 619 /// Return the ISD::SETCC ValueType. 620 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 621 EVT VT) const override; 622 623 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 624 625 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 626 MachineBasicBlock *BB) const; 627 628 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 629 MachineBasicBlock *BB) const; 630 631 MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, 632 MachineBasicBlock *MBB) const; 633 634 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 635 MachineInstr &MI, 636 MachineBasicBlock *BB) const; 637 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 638 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 639 MachineInstr &MI, MachineBasicBlock *BB, 640 bool HasTile) const; 641 MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, 642 unsigned Opcode, bool Op0IsDef) const; 643 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 644 645 MachineBasicBlock * 646 EmitInstrWithCustomInserter(MachineInstr &MI, 647 MachineBasicBlock *MBB) const override; 648 649 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 650 MachineFunction &MF, 651 unsigned Intrinsic) const override; 652 653 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 654 EVT NewVT) const override; 655 656 bool shouldRemoveRedundantExtend(SDValue Op) const override; 657 658 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 659 bool isTruncateFree(EVT VT1, EVT VT2) const override; 660 661 bool isProfitableToHoist(Instruction *I) const override; 662 663 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 664 bool isZExtFree(EVT VT1, EVT VT2) const override; 665 bool isZExtFree(SDValue Val, EVT VT2) const override; 666 667 bool shouldSinkOperands(Instruction *I, 668 SmallVectorImpl<Use *> &Ops) const override; 669 670 bool optimizeExtendOrTruncateConversion( 671 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; 672 673 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 674 675 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 676 677 bool lowerInterleavedLoad(LoadInst *LI, 678 ArrayRef<ShuffleVectorInst *> Shuffles, 679 ArrayRef<unsigned> Indices, 680 unsigned Factor) const override; 681 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 682 unsigned Factor) const override; 683 684 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 685 LoadInst *LI) const override; 686 687 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 688 StoreInst *SI) const override; 689 690 bool isLegalAddImmediate(int64_t) const override; 691 bool isLegalICmpImmediate(int64_t) const override; 692 693 bool isMulAddWithConstProfitable(SDValue AddNode, 694 SDValue ConstNode) const override; 695 696 bool shouldConsiderGEPOffsetSplit() const override; 697 698 EVT getOptimalMemOpType(const MemOp &Op, 699 const AttributeList &FuncAttributes) const override; 700 701 LLT getOptimalMemOpLLT(const MemOp &Op, 702 const AttributeList &FuncAttributes) const override; 703 704 /// Return true if the addressing mode represented by AM is legal for this 705 /// target, for a load/store of the specified type. 706 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 707 unsigned AS, 708 Instruction *I = nullptr) const override; 709 710 int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, 711 int64_t MaxOffset) const override; 712 713 /// Return true if an FMA operation is faster than a pair of fmul and fadd 714 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 715 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 716 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 717 EVT VT) const override; 718 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 719 720 bool generateFMAsInMachineCombiner(EVT VT, 721 CodeGenOptLevel OptLevel) const override; 722 723 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 724 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 725 726 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 727 bool isDesirableToCommuteWithShift(const SDNode *N, 728 CombineLevel Level) const override; 729 730 bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { 731 return false; 732 } 733 734 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 735 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 736 737 /// Return true if it is profitable to fold a pair of shifts into a mask. 738 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 739 CombineLevel Level) const override; 740 741 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 742 EVT VT) const override; 743 744 /// Returns true if it is beneficial to convert a load of a constant 745 /// to just the constant itself. 746 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 747 Type *Ty) const override; 748 749 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 750 /// with this index. 751 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 752 unsigned Index) const override; 753 754 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 755 bool MathUsed) const override { 756 // Using overflow ops for overflow checks only should beneficial on 757 // AArch64. 758 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 759 } 760 761 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 762 AtomicOrdering Ord) const override; 763 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 764 AtomicOrdering Ord) const override; 765 766 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 767 768 bool isOpSuitableForLDPSTP(const Instruction *I) const; 769 bool isOpSuitableForLSE128(const Instruction *I) const; 770 bool isOpSuitableForRCPC3(const Instruction *I) const; 771 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 772 bool 773 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 774 775 TargetLoweringBase::AtomicExpansionKind 776 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 777 TargetLoweringBase::AtomicExpansionKind 778 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 779 TargetLoweringBase::AtomicExpansionKind 780 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 781 782 TargetLoweringBase::AtomicExpansionKind 783 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 784 785 bool useLoadStackGuardNode() const override; 786 TargetLoweringBase::LegalizeTypeAction 787 getPreferredVectorAction(MVT VT) const override; 788 789 /// If the target has a standard location for the stack protector cookie, 790 /// returns the address of that location. Otherwise, returns nullptr. 791 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 792 793 void insertSSPDeclarations(Module &M) const override; 794 Value *getSDagStackGuard(const Module &M) const override; 795 Function *getSSPStackGuardCheck(const Module &M) const override; 796 797 /// If the target has a standard location for the unsafe stack pointer, 798 /// returns the address of that location. Otherwise, returns nullptr. 799 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 800 801 /// If a physical register, this returns the register that receives the 802 /// exception address on entry to an EH pad. 803 Register 804 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 805 // FIXME: This is a guess. Has this been defined yet? 806 return AArch64::X0; 807 } 808 809 /// If a physical register, this returns the register that receives the 810 /// exception typeid on entry to a landing pad. 811 Register 812 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 813 // FIXME: This is a guess. Has this been defined yet? 814 return AArch64::X1; 815 } 816 817 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 818 819 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 820 const MachineFunction &MF) const override { 821 // Do not merge to float value size (128 bytes) if no implicit 822 // float attribute is set. 823 824 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 825 826 if (NoFloat) 827 return (MemVT.getSizeInBits() <= 64); 828 return true; 829 } 830 831 bool isCheapToSpeculateCttz(Type *) const override { 832 return true; 833 } 834 835 bool isCheapToSpeculateCtlz(Type *) const override { 836 return true; 837 } 838 839 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 840 841 bool hasAndNotCompare(SDValue V) const override { 842 // We can use bics for any scalar. 843 return V.getValueType().isScalarInteger(); 844 } 845 846 bool hasAndNot(SDValue Y) const override { 847 EVT VT = Y.getValueType(); 848 849 if (!VT.isVector()) 850 return hasAndNotCompare(Y); 851 852 TypeSize TS = VT.getSizeInBits(); 853 // TODO: We should be able to use bic/bif too for SVE. 854 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 855 } 856 857 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 858 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 859 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 860 SelectionDAG &DAG) const override; 861 862 ShiftLegalizationStrategy 863 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 864 unsigned ExpansionFactor) const override; 865 866 bool shouldTransformSignedTruncationCheck(EVT XVT, 867 unsigned KeptBits) const override { 868 // For vectors, we don't have a preference.. 869 if (XVT.isVector()) 870 return false; 871 872 auto VTIsOk = [](EVT VT) -> bool { 873 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 874 VT == MVT::i64; 875 }; 876 877 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 878 // XVT will be larger than KeptBitsVT. 879 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 880 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 881 } 882 883 bool preferIncOfAddToSubOfNot(EVT VT) const override; 884 885 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 886 887 bool isComplexDeinterleavingSupported() const override; 888 bool isComplexDeinterleavingOperationSupported( 889 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 890 891 Value *createComplexDeinterleavingIR( 892 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 893 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 894 Value *Accumulator = nullptr) const override; 895 896 bool supportSplitCSR(MachineFunction *MF) const override { 897 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 898 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 899 } 900 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 901 void insertCopiesSplitCSR( 902 MachineBasicBlock *Entry, 903 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 904 905 bool supportSwiftError() const override { 906 return true; 907 } 908 909 bool supportKCFIBundles() const override { return true; } 910 911 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 912 MachineBasicBlock::instr_iterator &MBBI, 913 const TargetInstrInfo *TII) const override; 914 915 /// Enable aggressive FMA fusion on targets that want it. 916 bool enableAggressiveFMAFusion(EVT VT) const override; 917 918 /// Returns the size of the platform's va_list object. 919 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 920 921 /// Returns true if \p VecTy is a legal interleaved access type. This 922 /// function checks the vector element type and the overall width of the 923 /// vector. 924 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 925 bool &UseScalable) const; 926 927 /// Returns the number of interleaved accesses that will be generated when 928 /// lowering accesses of the given type. 929 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 930 bool UseScalable) const; 931 932 MachineMemOperand::Flags getTargetMMOFlags( 933 const Instruction &I) const override; 934 935 bool functionArgumentNeedsConsecutiveRegisters( 936 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 937 const DataLayout &DL) const override; 938 939 /// Used for exception handling on Win64. 940 bool needsFixedCatchObjects() const override; 941 942 bool fallBackToDAGISel(const Instruction &Inst) const override; 943 944 /// SVE code generation for fixed length vectors does not custom lower 945 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 946 /// merge. However, merging them creates a BUILD_VECTOR that is just as 947 /// illegal as the original, thus leading to an infinite legalisation loop. 948 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 949 /// vector types this override can be removed. 950 bool mergeStoresAfterLegalization(EVT VT) const override; 951 952 // If the platform/function should have a redzone, return the size in bytes. 953 unsigned getRedZoneSize(const Function &F) const { 954 if (F.hasFnAttribute(Attribute::NoRedZone)) 955 return 0; 956 return 128; 957 } 958 959 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 960 EVT getPromotedVTForPredicate(EVT VT) const; 961 962 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 963 bool AllowUnknown = false) const override; 964 965 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 966 967 bool shouldExpandCttzElements(EVT VT) const override; 968 969 /// If a change in streaming mode is required on entry to/return from a 970 /// function call it emits and returns the corresponding SMSTART or SMSTOP node. 971 /// \p Entry tells whether this is before/after the Call, which is necessary 972 /// because PSTATE.SM is only queried once. 973 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 974 SDValue Chain, SDValue InGlue, 975 SDValue PStateSM, bool Entry) const; 976 977 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 978 979 // Normally SVE is only used for byte size vectors that do not fit within a 980 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 981 // used for 64bit and 128bit vectors as well. 982 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 983 984 // Follow NEON ABI rules even when using SVE for fixed length vectors. 985 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 986 EVT VT) const override; 987 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 988 CallingConv::ID CC, 989 EVT VT) const override; 990 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 991 CallingConv::ID CC, EVT VT, 992 EVT &IntermediateVT, 993 unsigned &NumIntermediates, 994 MVT &RegisterVT) const override; 995 996 /// True if stack clash protection is enabled for this functions. 997 bool hasInlineStackProbe(const MachineFunction &MF) const override; 998 999 private: 1000 /// Keep a pointer to the AArch64Subtarget around so that we can 1001 /// make the right decision when generating code for different targets. 1002 const AArch64Subtarget *Subtarget; 1003 1004 llvm::BumpPtrAllocator BumpAlloc; 1005 llvm::StringSaver Saver{BumpAlloc}; 1006 1007 bool isExtFreeImpl(const Instruction *Ext) const override; 1008 1009 void addTypeForNEON(MVT VT); 1010 void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE); 1011 void addDRTypeForNEON(MVT VT); 1012 void addQRTypeForNEON(MVT VT); 1013 1014 unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, 1015 SelectionDAG &DAG) const; 1016 1017 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 1018 bool isVarArg, 1019 const SmallVectorImpl<ISD::InputArg> &Ins, 1020 const SDLoc &DL, SelectionDAG &DAG, 1021 SmallVectorImpl<SDValue> &InVals) const override; 1022 1023 void AdjustInstrPostInstrSelection(MachineInstr &MI, 1024 SDNode *Node) const override; 1025 1026 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 1027 SmallVectorImpl<SDValue> &InVals) const override; 1028 1029 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1030 CallingConv::ID CallConv, bool isVarArg, 1031 const SmallVectorImpl<CCValAssign> &RVLocs, 1032 const SDLoc &DL, SelectionDAG &DAG, 1033 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 1034 SDValue ThisVal, bool RequiresSMChange) const; 1035 1036 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 1037 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 1038 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 1039 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 1040 1041 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 1043 1044 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 1045 1046 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1048 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 1049 1050 bool 1051 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 1052 1053 /// Finds the incoming stack arguments which overlap the given fixed stack 1054 /// object and incorporates their load into the current chain. This prevents 1055 /// an upcoming store from clobbering the stack argument before it's used. 1056 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 1057 MachineFrameInfo &MFI, int ClobberedFI) const; 1058 1059 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 1060 1061 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 1062 SDValue &Chain) const; 1063 1064 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1065 bool isVarArg, 1066 const SmallVectorImpl<ISD::OutputArg> &Outs, 1067 LLVMContext &Context) const override; 1068 1069 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1070 const SmallVectorImpl<ISD::OutputArg> &Outs, 1071 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1072 SelectionDAG &DAG) const override; 1073 1074 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1075 unsigned Flag) const; 1076 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 1077 unsigned Flag) const; 1078 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 1079 unsigned Flag) const; 1080 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1081 unsigned Flag) const; 1082 SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, 1083 unsigned Flag) const; 1084 template <class NodeTy> 1085 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1086 template <class NodeTy> 1087 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1088 template <class NodeTy> 1089 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1090 template <class NodeTy> 1091 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1092 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1093 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1094 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1095 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1096 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1097 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 1098 const SDLoc &DL, SelectionDAG &DAG) const; 1099 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 1100 SelectionDAG &DAG) const; 1101 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1102 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1103 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1104 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 1105 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1106 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 1107 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 1108 SDValue TVal, SDValue FVal, const SDLoc &dl, 1109 SelectionDAG &DAG) const; 1110 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1111 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 1112 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1113 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1114 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 1115 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 1116 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 1117 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1118 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 1119 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1120 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1121 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 1122 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1123 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1124 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1125 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1126 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1127 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1128 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 1129 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 1130 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1131 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 1132 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 1133 unsigned NewOp) const; 1134 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 1135 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 1136 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1137 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1138 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1139 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1140 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1141 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1142 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1143 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1144 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1145 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1146 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1147 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1148 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1149 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1150 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1151 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1152 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1153 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1154 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1155 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1156 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1157 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1158 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1159 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1160 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1161 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1162 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1163 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1164 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1165 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1166 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1167 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1168 SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1169 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1170 1171 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; 1172 1173 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1174 SelectionDAG &DAG) const; 1175 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1176 SelectionDAG &DAG) const; 1177 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1178 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1179 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1180 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1181 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1182 SelectionDAG &DAG) const; 1183 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1184 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1185 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1186 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1187 SelectionDAG &DAG) const; 1188 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1189 SelectionDAG &DAG) const; 1190 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1191 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1192 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1193 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1194 SelectionDAG &DAG) const; 1195 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1196 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1197 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1198 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1199 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1200 SelectionDAG &DAG) const; 1201 1202 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1203 SmallVectorImpl<SDNode *> &Created) const override; 1204 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1205 SmallVectorImpl<SDNode *> &Created) const override; 1206 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1207 int &ExtraSteps, bool &UseOneConst, 1208 bool Reciprocal) const override; 1209 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1210 int &ExtraSteps) const override; 1211 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1212 const DenormalMode &Mode) const override; 1213 SDValue getSqrtResultForDenormInput(SDValue Operand, 1214 SelectionDAG &DAG) const override; 1215 unsigned combineRepeatedFPDivisors() const override; 1216 1217 ConstraintType getConstraintType(StringRef Constraint) const override; 1218 Register getRegisterByName(const char* RegName, LLT VT, 1219 const MachineFunction &MF) const override; 1220 1221 /// Examine constraint string and operand type and determine a weight value. 1222 /// The operand object must already have been set up with the operand type. 1223 ConstraintWeight 1224 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1225 const char *constraint) const override; 1226 1227 std::pair<unsigned, const TargetRegisterClass *> 1228 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1229 StringRef Constraint, MVT VT) const override; 1230 1231 const char *LowerXConstraint(EVT ConstraintVT) const override; 1232 1233 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1234 std::vector<SDValue> &Ops, 1235 SelectionDAG &DAG) const override; 1236 1237 InlineAsm::ConstraintCode 1238 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1239 if (ConstraintCode == "Q") 1240 return InlineAsm::ConstraintCode::Q; 1241 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1242 // followed by llvm_unreachable so we'll leave them unimplemented in 1243 // the backend for now. 1244 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1245 } 1246 1247 /// Handle Lowering flag assembly outputs. 1248 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1249 const SDLoc &DL, 1250 const AsmOperandInfo &Constraint, 1251 SelectionDAG &DAG) const override; 1252 1253 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1254 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 1255 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1256 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1257 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1258 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1259 SDValue &Offset, SelectionDAG &DAG) const; 1260 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1261 ISD::MemIndexedMode &AM, 1262 SelectionDAG &DAG) const override; 1263 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1264 SDValue &Offset, ISD::MemIndexedMode &AM, 1265 SelectionDAG &DAG) const override; 1266 bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, 1267 bool IsPre, MachineRegisterInfo &MRI) const override; 1268 1269 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1270 SelectionDAG &DAG) const override; 1271 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1272 SelectionDAG &DAG) const; 1273 void ReplaceExtractSubVectorResults(SDNode *N, 1274 SmallVectorImpl<SDValue> &Results, 1275 SelectionDAG &DAG) const; 1276 1277 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1278 1279 void finalizeLowering(MachineFunction &MF) const override; 1280 1281 bool shouldLocalize(const MachineInstr &MI, 1282 const TargetTransformInfo *TTI) const override; 1283 1284 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1285 const APInt &OriginalDemandedBits, 1286 const APInt &OriginalDemandedElts, 1287 KnownBits &Known, 1288 TargetLoweringOpt &TLO, 1289 unsigned Depth) const override; 1290 1291 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1292 1293 // With the exception of data-predicate transitions, no instructions are 1294 // required to cast between legal scalable vector types. However: 1295 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1296 // is not universally useable. 1297 // 2. Most unpacked integer types are not legal and thus integer extends 1298 // cannot be used to convert between unpacked and packed types. 1299 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1300 // to transition between unpacked and packed types of the same element type, 1301 // with BITCAST used otherwise. 1302 // This function does not handle predicate bitcasts. 1303 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1304 1305 // Returns the runtime value for PSTATE.SM by generating a call to 1306 // __arm_sme_state. 1307 SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL, 1308 EVT VT) const; 1309 1310 bool preferScalarizeSplat(SDNode *N) const override; 1311 1312 unsigned getMinimumJumpTableEntries() const override; 1313 }; 1314 1315 namespace AArch64 { 1316 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1317 const TargetLibraryInfo *libInfo); 1318 } // end namespace AArch64 1319 1320 } // end namespace llvm 1321 1322 #endif 1323