1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "Utils/AArch64SMEAttributes.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/SelectionDAG.h" 22 #include "llvm/CodeGen/TargetLowering.h" 23 #include "llvm/IR/CallingConv.h" 24 #include "llvm/IR/Instruction.h" 25 26 namespace llvm { 27 28 namespace AArch64ISD { 29 30 // For predicated nodes where the result is a vector, the operation is 31 // controlled by a governing predicate and the inactive lanes are explicitly 32 // defined with a value, please stick the following naming convention: 33 // 34 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 35 // to source operand OP<n>. 36 // 37 // _MERGE_ZERO The result value is a vector with inactive lanes 38 // actively zeroed. 39 // 40 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 41 // to the last source operand which only purpose is being 42 // a passthru value. 43 // 44 // For other cases where no explicit action is needed to set the inactive lanes, 45 // or when the result is not a vector and it is needed or helpful to 46 // distinguish a node from similar unpredicated nodes, use: 47 // 48 // _PRED 49 // 50 enum NodeType : unsigned { 51 FIRST_NUMBER = ISD::BUILTIN_OP_END, 52 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 53 CALL, // Function call. 54 55 // Pseudo for a OBJC call that gets emitted together with a special `mov 56 // x29, x29` marker instruction. 57 CALL_RVMARKER, 58 59 CALL_BTI, // Function call followed by a BTI instruction. 60 61 // Essentially like a normal COPY that works on GPRs, but cannot be 62 // rematerialised by passes like the simple register coalescer. It's 63 // required for SME when lowering calls because we cannot allow frame 64 // index calculations using addvl to slip in between the smstart/smstop 65 // and the bl instruction. The scalable vector length may change across 66 // the smstart/smstop boundary. 67 OBSCURE_COPY, 68 SMSTART, 69 SMSTOP, 70 RESTORE_ZA, 71 72 // Produces the full sequence of instructions for getting the thread pointer 73 // offset of a variable into X0, using the TLSDesc model. 74 TLSDESC_CALLSEQ, 75 ADRP, // Page address of a TargetGlobalAddress operand. 76 ADR, // ADR 77 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 78 LOADgot, // Load from automatically generated descriptor (e.g. Global 79 // Offset Table, TLS record). 80 RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand. 81 BRCOND, // Conditional branch instruction; "b.cond". 82 CSEL, 83 CSINV, // Conditional select invert. 84 CSNEG, // Conditional select negate. 85 CSINC, // Conditional select increment. 86 87 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 88 // ELF. 89 THREAD_POINTER, 90 ADC, 91 SBC, // adc, sbc instructions 92 93 // Predicated instructions where inactive lanes produce undefined results. 94 ABDS_PRED, 95 ABDU_PRED, 96 FADD_PRED, 97 FDIV_PRED, 98 FMA_PRED, 99 FMAX_PRED, 100 FMAXNM_PRED, 101 FMIN_PRED, 102 FMINNM_PRED, 103 FMUL_PRED, 104 FSUB_PRED, 105 HADDS_PRED, 106 HADDU_PRED, 107 MUL_PRED, 108 MULHS_PRED, 109 MULHU_PRED, 110 RHADDS_PRED, 111 RHADDU_PRED, 112 SDIV_PRED, 113 SHL_PRED, 114 SMAX_PRED, 115 SMIN_PRED, 116 SRA_PRED, 117 SRL_PRED, 118 UDIV_PRED, 119 UMAX_PRED, 120 UMIN_PRED, 121 122 // Unpredicated vector instructions 123 BIC, 124 125 SRAD_MERGE_OP1, 126 127 // Predicated instructions with the result of inactive lanes provided by the 128 // last operand. 129 FABS_MERGE_PASSTHRU, 130 FCEIL_MERGE_PASSTHRU, 131 FFLOOR_MERGE_PASSTHRU, 132 FNEARBYINT_MERGE_PASSTHRU, 133 FNEG_MERGE_PASSTHRU, 134 FRECPX_MERGE_PASSTHRU, 135 FRINT_MERGE_PASSTHRU, 136 FROUND_MERGE_PASSTHRU, 137 FROUNDEVEN_MERGE_PASSTHRU, 138 FSQRT_MERGE_PASSTHRU, 139 FTRUNC_MERGE_PASSTHRU, 140 FP_ROUND_MERGE_PASSTHRU, 141 FP_EXTEND_MERGE_PASSTHRU, 142 UINT_TO_FP_MERGE_PASSTHRU, 143 SINT_TO_FP_MERGE_PASSTHRU, 144 FCVTZU_MERGE_PASSTHRU, 145 FCVTZS_MERGE_PASSTHRU, 146 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 147 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 148 ABS_MERGE_PASSTHRU, 149 NEG_MERGE_PASSTHRU, 150 151 SETCC_MERGE_ZERO, 152 153 // Arithmetic instructions which write flags. 154 ADDS, 155 SUBS, 156 ADCS, 157 SBCS, 158 ANDS, 159 160 // Conditional compares. Operands: left,right,falsecc,cc,flags 161 CCMP, 162 CCMN, 163 FCCMP, 164 165 // Floating point comparison 166 FCMP, 167 168 // Scalar extract 169 EXTR, 170 171 // Scalar-to-vector duplication 172 DUP, 173 DUPLANE8, 174 DUPLANE16, 175 DUPLANE32, 176 DUPLANE64, 177 DUPLANE128, 178 179 // Vector immedate moves 180 MOVI, 181 MOVIshift, 182 MOVIedit, 183 MOVImsl, 184 FMOV, 185 MVNIshift, 186 MVNImsl, 187 188 // Vector immediate ops 189 BICi, 190 ORRi, 191 192 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 193 // element must be identical. 194 BSP, 195 196 // Vector shuffles 197 ZIP1, 198 ZIP2, 199 UZP1, 200 UZP2, 201 TRN1, 202 TRN2, 203 REV16, 204 REV32, 205 REV64, 206 EXT, 207 SPLICE, 208 209 // Vector shift by scalar 210 VSHL, 211 VLSHR, 212 VASHR, 213 214 // Vector shift by scalar (again) 215 SQSHL_I, 216 UQSHL_I, 217 SQSHLU_I, 218 SRSHR_I, 219 URSHR_I, 220 221 // Vector shift by constant and insert 222 VSLI, 223 VSRI, 224 225 // Vector comparisons 226 CMEQ, 227 CMGE, 228 CMGT, 229 CMHI, 230 CMHS, 231 FCMEQ, 232 FCMGE, 233 FCMGT, 234 235 // Vector zero comparisons 236 CMEQz, 237 CMGEz, 238 CMGTz, 239 CMLEz, 240 CMLTz, 241 FCMEQz, 242 FCMGEz, 243 FCMGTz, 244 FCMLEz, 245 FCMLTz, 246 247 // Vector across-lanes addition 248 // Only the lower result lane is defined. 249 SADDV, 250 UADDV, 251 252 // Add Pairwise of two vectors 253 ADDP, 254 // Add Long Pairwise 255 SADDLP, 256 UADDLP, 257 258 // udot/sdot instructions 259 UDOT, 260 SDOT, 261 262 // Vector across-lanes min/max 263 // Only the lower result lane is defined. 264 SMINV, 265 UMINV, 266 SMAXV, 267 UMAXV, 268 269 SADDV_PRED, 270 UADDV_PRED, 271 SMAXV_PRED, 272 UMAXV_PRED, 273 SMINV_PRED, 274 UMINV_PRED, 275 ORV_PRED, 276 EORV_PRED, 277 ANDV_PRED, 278 279 // Vector bitwise insertion 280 BIT, 281 282 // Compare-and-branch 283 CBZ, 284 CBNZ, 285 TBZ, 286 TBNZ, 287 288 // Tail calls 289 TC_RETURN, 290 291 // Custom prefetch handling 292 PREFETCH, 293 294 // {s|u}int to FP within a FP register. 295 SITOF, 296 UITOF, 297 298 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 299 /// world w.r.t vectors; which causes additional REV instructions to be 300 /// generated to compensate for the byte-swapping. But sometimes we do 301 /// need to re-interpret the data in SIMD vector registers in big-endian 302 /// mode without emitting such REV instructions. 303 NVCAST, 304 305 MRS, // MRS, also sets the flags via a glue. 306 307 SMULL, 308 UMULL, 309 310 PMULL, 311 312 // Reciprocal estimates and steps. 313 FRECPE, 314 FRECPS, 315 FRSQRTE, 316 FRSQRTS, 317 318 SUNPKHI, 319 SUNPKLO, 320 UUNPKHI, 321 UUNPKLO, 322 323 CLASTA_N, 324 CLASTB_N, 325 LASTA, 326 LASTB, 327 TBL, 328 329 // Floating-point reductions. 330 FADDA_PRED, 331 FADDV_PRED, 332 FMAXV_PRED, 333 FMAXNMV_PRED, 334 FMINV_PRED, 335 FMINNMV_PRED, 336 337 INSR, 338 PTEST, 339 PTEST_ANY, 340 PTRUE, 341 342 BITREVERSE_MERGE_PASSTHRU, 343 BSWAP_MERGE_PASSTHRU, 344 REVH_MERGE_PASSTHRU, 345 REVW_MERGE_PASSTHRU, 346 CTLZ_MERGE_PASSTHRU, 347 CTPOP_MERGE_PASSTHRU, 348 DUP_MERGE_PASSTHRU, 349 INDEX_VECTOR, 350 351 // Cast between vectors of the same element type but differ in length. 352 REINTERPRET_CAST, 353 354 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 355 LS64_BUILD, 356 LS64_EXTRACT, 357 358 LD1_MERGE_ZERO, 359 LD1S_MERGE_ZERO, 360 LDNF1_MERGE_ZERO, 361 LDNF1S_MERGE_ZERO, 362 LDFF1_MERGE_ZERO, 363 LDFF1S_MERGE_ZERO, 364 LD1RQ_MERGE_ZERO, 365 LD1RO_MERGE_ZERO, 366 367 // Structured loads. 368 SVE_LD2_MERGE_ZERO, 369 SVE_LD3_MERGE_ZERO, 370 SVE_LD4_MERGE_ZERO, 371 372 // Unsigned gather loads. 373 GLD1_MERGE_ZERO, 374 GLD1_SCALED_MERGE_ZERO, 375 GLD1_UXTW_MERGE_ZERO, 376 GLD1_SXTW_MERGE_ZERO, 377 GLD1_UXTW_SCALED_MERGE_ZERO, 378 GLD1_SXTW_SCALED_MERGE_ZERO, 379 GLD1_IMM_MERGE_ZERO, 380 381 // Signed gather loads 382 GLD1S_MERGE_ZERO, 383 GLD1S_SCALED_MERGE_ZERO, 384 GLD1S_UXTW_MERGE_ZERO, 385 GLD1S_SXTW_MERGE_ZERO, 386 GLD1S_UXTW_SCALED_MERGE_ZERO, 387 GLD1S_SXTW_SCALED_MERGE_ZERO, 388 GLD1S_IMM_MERGE_ZERO, 389 390 // Unsigned gather loads. 391 GLDFF1_MERGE_ZERO, 392 GLDFF1_SCALED_MERGE_ZERO, 393 GLDFF1_UXTW_MERGE_ZERO, 394 GLDFF1_SXTW_MERGE_ZERO, 395 GLDFF1_UXTW_SCALED_MERGE_ZERO, 396 GLDFF1_SXTW_SCALED_MERGE_ZERO, 397 GLDFF1_IMM_MERGE_ZERO, 398 399 // Signed gather loads. 400 GLDFF1S_MERGE_ZERO, 401 GLDFF1S_SCALED_MERGE_ZERO, 402 GLDFF1S_UXTW_MERGE_ZERO, 403 GLDFF1S_SXTW_MERGE_ZERO, 404 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 405 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 406 GLDFF1S_IMM_MERGE_ZERO, 407 408 // Non-temporal gather loads 409 GLDNT1_MERGE_ZERO, 410 GLDNT1_INDEX_MERGE_ZERO, 411 GLDNT1S_MERGE_ZERO, 412 413 // Contiguous masked store. 414 ST1_PRED, 415 416 // Scatter store 417 SST1_PRED, 418 SST1_SCALED_PRED, 419 SST1_UXTW_PRED, 420 SST1_SXTW_PRED, 421 SST1_UXTW_SCALED_PRED, 422 SST1_SXTW_SCALED_PRED, 423 SST1_IMM_PRED, 424 425 // Non-temporal scatter store 426 SSTNT1_PRED, 427 SSTNT1_INDEX_PRED, 428 429 // SME 430 RDSVL, 431 REVD_MERGE_PASSTHRU, 432 433 // Asserts that a function argument (i32) is zero-extended to i8 by 434 // the caller 435 ASSERT_ZEXT_BOOL, 436 437 // 128-bit system register accesses 438 // lo64, hi64, chain = MRRS(chain, sysregname) 439 MRRS, 440 // chain = MSRR(chain, sysregname, lo64, hi64) 441 MSRR, 442 443 // Strict (exception-raising) floating point comparison 444 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 445 STRICT_FCMPE, 446 447 // NEON Load/Store with post-increment base updates 448 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 449 LD3post, 450 LD4post, 451 ST2post, 452 ST3post, 453 ST4post, 454 LD1x2post, 455 LD1x3post, 456 LD1x4post, 457 ST1x2post, 458 ST1x3post, 459 ST1x4post, 460 LD1DUPpost, 461 LD2DUPpost, 462 LD3DUPpost, 463 LD4DUPpost, 464 LD1LANEpost, 465 LD2LANEpost, 466 LD3LANEpost, 467 LD4LANEpost, 468 ST2LANEpost, 469 ST3LANEpost, 470 ST4LANEpost, 471 472 STG, 473 STZG, 474 ST2G, 475 STZ2G, 476 477 LDP, 478 LDIAPP, 479 LDNP, 480 STP, 481 STILP, 482 STNP, 483 484 // Memory Operations 485 MOPS_MEMSET, 486 MOPS_MEMSET_TAGGING, 487 MOPS_MEMCOPY, 488 MOPS_MEMMOVE, 489 }; 490 491 } // end namespace AArch64ISD 492 493 namespace AArch64 { 494 /// Possible values of current rounding mode, which is specified in bits 495 /// 23:22 of FPCR. 496 enum Rounding { 497 RN = 0, // Round to Nearest 498 RP = 1, // Round towards Plus infinity 499 RM = 2, // Round towards Minus infinity 500 RZ = 3, // Round towards Zero 501 rmMask = 3 // Bit mask selecting rounding mode 502 }; 503 504 // Bit position of rounding mode bits in FPCR. 505 const unsigned RoundingBitsPos = 22; 506 507 // Registers used to pass function arguments. 508 const ArrayRef<MCPhysReg> getGPRArgRegs(); 509 const ArrayRef<MCPhysReg> getFPRArgRegs(); 510 511 } // namespace AArch64 512 513 class AArch64Subtarget; 514 515 class AArch64TargetLowering : public TargetLowering { 516 public: 517 explicit AArch64TargetLowering(const TargetMachine &TM, 518 const AArch64Subtarget &STI); 519 520 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 521 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 522 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 523 SDValue N1) const override; 524 525 /// Selects the correct CCAssignFn for a given CallingConvention value. 526 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 527 528 /// Selects the correct CCAssignFn for a given CallingConvention value. 529 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 530 531 /// Determine which of the bits specified in Mask are known to be either zero 532 /// or one and return them in the KnownZero/KnownOne bitsets. 533 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 534 const APInt &DemandedElts, 535 const SelectionDAG &DAG, 536 unsigned Depth = 0) const override; 537 538 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 539 const APInt &DemandedElts, 540 const SelectionDAG &DAG, 541 unsigned Depth) const override; 542 543 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 544 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 545 // *DAG* representation of pointers will always be 64-bits. They will be 546 // truncated and extended when transferred to memory, but the 64-bit DAG 547 // allows us to use AArch64's addressing modes much more easily. 548 return MVT::getIntegerVT(64); 549 } 550 551 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 552 const APInt &DemandedElts, 553 TargetLoweringOpt &TLO) const override; 554 555 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 556 557 /// Returns true if the target allows unaligned memory accesses of the 558 /// specified type. 559 bool allowsMisalignedMemoryAccesses( 560 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 561 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 562 unsigned *Fast = nullptr) const override; 563 /// LLT variant. 564 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 565 Align Alignment, 566 MachineMemOperand::Flags Flags, 567 unsigned *Fast = nullptr) const override; 568 569 /// Provide custom lowering hooks for some operations. 570 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 571 572 const char *getTargetNodeName(unsigned Opcode) const override; 573 574 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 575 576 /// This method returns a target specific FastISel object, or null if the 577 /// target does not support "fast" ISel. 578 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 579 const TargetLibraryInfo *libInfo) const override; 580 581 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 582 583 bool isFPImmLegal(const APFloat &Imm, EVT VT, 584 bool ForCodeSize) const override; 585 586 /// Return true if the given shuffle mask can be codegen'd directly, or if it 587 /// should be stack expanded. 588 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 589 590 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 591 /// shuffle mask can be codegen'd directly. 592 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 593 594 /// Return the ISD::SETCC ValueType. 595 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 596 EVT VT) const override; 597 598 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 599 600 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 601 MachineBasicBlock *BB) const; 602 603 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 604 MachineBasicBlock *BB) const; 605 606 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 607 MachineInstr &MI, 608 MachineBasicBlock *BB) const; 609 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 610 MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, 611 MachineInstr &MI, MachineBasicBlock *BB, 612 bool HasTile) const; 613 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 614 615 MachineBasicBlock * 616 EmitInstrWithCustomInserter(MachineInstr &MI, 617 MachineBasicBlock *MBB) const override; 618 619 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 620 MachineFunction &MF, 621 unsigned Intrinsic) const override; 622 623 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 624 EVT NewVT) const override; 625 626 bool shouldRemoveRedundantExtend(SDValue Op) const override; 627 628 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 629 bool isTruncateFree(EVT VT1, EVT VT2) const override; 630 631 bool isProfitableToHoist(Instruction *I) const override; 632 633 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 634 bool isZExtFree(EVT VT1, EVT VT2) const override; 635 bool isZExtFree(SDValue Val, EVT VT2) const override; 636 637 bool shouldSinkOperands(Instruction *I, 638 SmallVectorImpl<Use *> &Ops) const override; 639 640 bool optimizeExtendOrTruncateConversion( 641 Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override; 642 643 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 644 645 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 646 647 bool lowerInterleavedLoad(LoadInst *LI, 648 ArrayRef<ShuffleVectorInst *> Shuffles, 649 ArrayRef<unsigned> Indices, 650 unsigned Factor) const override; 651 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 652 unsigned Factor) const override; 653 654 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, 655 LoadInst *LI) const override; 656 657 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 658 StoreInst *SI) const override; 659 660 bool isLegalAddImmediate(int64_t) const override; 661 bool isLegalICmpImmediate(int64_t) const override; 662 663 bool isMulAddWithConstProfitable(SDValue AddNode, 664 SDValue ConstNode) const override; 665 666 bool shouldConsiderGEPOffsetSplit() const override; 667 668 EVT getOptimalMemOpType(const MemOp &Op, 669 const AttributeList &FuncAttributes) const override; 670 671 LLT getOptimalMemOpLLT(const MemOp &Op, 672 const AttributeList &FuncAttributes) const override; 673 674 /// Return true if the addressing mode represented by AM is legal for this 675 /// target, for a load/store of the specified type. 676 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 677 unsigned AS, 678 Instruction *I = nullptr) const override; 679 680 /// Return true if an FMA operation is faster than a pair of fmul and fadd 681 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 682 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 683 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 684 EVT VT) const override; 685 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 686 687 bool generateFMAsInMachineCombiner(EVT VT, 688 CodeGenOpt::Level OptLevel) const override; 689 690 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 691 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 692 693 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 694 bool isDesirableToCommuteWithShift(const SDNode *N, 695 CombineLevel Level) const override; 696 697 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 698 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 699 700 /// Return true if it is profitable to fold a pair of shifts into a mask. 701 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 702 CombineLevel Level) const override; 703 704 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 705 EVT VT) const override; 706 707 /// Returns true if it is beneficial to convert a load of a constant 708 /// to just the constant itself. 709 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 710 Type *Ty) const override; 711 712 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 713 /// with this index. 714 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 715 unsigned Index) const override; 716 717 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 718 bool MathUsed) const override { 719 // Using overflow ops for overflow checks only should beneficial on 720 // AArch64. 721 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 722 } 723 724 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 725 AtomicOrdering Ord) const override; 726 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 727 AtomicOrdering Ord) const override; 728 729 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 730 731 bool isOpSuitableForLDPSTP(const Instruction *I) const; 732 bool isOpSuitableForLSE128(const Instruction *I) const; 733 bool isOpSuitableForRCPC3(const Instruction *I) const; 734 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 735 bool 736 shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override; 737 738 TargetLoweringBase::AtomicExpansionKind 739 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 740 TargetLoweringBase::AtomicExpansionKind 741 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 742 TargetLoweringBase::AtomicExpansionKind 743 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 744 745 TargetLoweringBase::AtomicExpansionKind 746 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 747 748 bool useLoadStackGuardNode() const override; 749 TargetLoweringBase::LegalizeTypeAction 750 getPreferredVectorAction(MVT VT) const override; 751 752 /// If the target has a standard location for the stack protector cookie, 753 /// returns the address of that location. Otherwise, returns nullptr. 754 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 755 756 void insertSSPDeclarations(Module &M) const override; 757 Value *getSDagStackGuard(const Module &M) const override; 758 Function *getSSPStackGuardCheck(const Module &M) const override; 759 760 /// If the target has a standard location for the unsafe stack pointer, 761 /// returns the address of that location. Otherwise, returns nullptr. 762 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 763 764 /// If a physical register, this returns the register that receives the 765 /// exception address on entry to an EH pad. 766 Register 767 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 768 // FIXME: This is a guess. Has this been defined yet? 769 return AArch64::X0; 770 } 771 772 /// If a physical register, this returns the register that receives the 773 /// exception typeid on entry to a landing pad. 774 Register 775 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 776 // FIXME: This is a guess. Has this been defined yet? 777 return AArch64::X1; 778 } 779 780 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 781 782 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 783 const MachineFunction &MF) const override { 784 // Do not merge to float value size (128 bytes) if no implicit 785 // float attribute is set. 786 787 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 788 789 if (NoFloat) 790 return (MemVT.getSizeInBits() <= 64); 791 return true; 792 } 793 794 bool isCheapToSpeculateCttz(Type *) const override { 795 return true; 796 } 797 798 bool isCheapToSpeculateCtlz(Type *) const override { 799 return true; 800 } 801 802 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 803 804 bool hasAndNotCompare(SDValue V) const override { 805 // We can use bics for any scalar. 806 return V.getValueType().isScalarInteger(); 807 } 808 809 bool hasAndNot(SDValue Y) const override { 810 EVT VT = Y.getValueType(); 811 812 if (!VT.isVector()) 813 return hasAndNotCompare(Y); 814 815 TypeSize TS = VT.getSizeInBits(); 816 // TODO: We should be able to use bic/bif too for SVE. 817 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 818 } 819 820 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 821 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 822 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 823 SelectionDAG &DAG) const override; 824 825 ShiftLegalizationStrategy 826 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 827 unsigned ExpansionFactor) const override; 828 829 bool shouldTransformSignedTruncationCheck(EVT XVT, 830 unsigned KeptBits) const override { 831 // For vectors, we don't have a preference.. 832 if (XVT.isVector()) 833 return false; 834 835 auto VTIsOk = [](EVT VT) -> bool { 836 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 837 VT == MVT::i64; 838 }; 839 840 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 841 // XVT will be larger than KeptBitsVT. 842 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 843 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 844 } 845 846 bool preferIncOfAddToSubOfNot(EVT VT) const override; 847 848 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 849 850 bool isComplexDeinterleavingSupported() const override; 851 bool isComplexDeinterleavingOperationSupported( 852 ComplexDeinterleavingOperation Operation, Type *Ty) const override; 853 854 Value *createComplexDeinterleavingIR( 855 IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, 856 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, 857 Value *Accumulator = nullptr) const override; 858 859 bool supportSplitCSR(MachineFunction *MF) const override { 860 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 861 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 862 } 863 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 864 void insertCopiesSplitCSR( 865 MachineBasicBlock *Entry, 866 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 867 868 bool supportSwiftError() const override { 869 return true; 870 } 871 872 bool supportKCFIBundles() const override { return true; } 873 874 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 875 MachineBasicBlock::instr_iterator &MBBI, 876 const TargetInstrInfo *TII) const override; 877 878 /// Enable aggressive FMA fusion on targets that want it. 879 bool enableAggressiveFMAFusion(EVT VT) const override; 880 881 /// Returns the size of the platform's va_list object. 882 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 883 884 /// Returns true if \p VecTy is a legal interleaved access type. This 885 /// function checks the vector element type and the overall width of the 886 /// vector. 887 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 888 bool &UseScalable) const; 889 890 /// Returns the number of interleaved accesses that will be generated when 891 /// lowering accesses of the given type. 892 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 893 bool UseScalable) const; 894 895 MachineMemOperand::Flags getTargetMMOFlags( 896 const Instruction &I) const override; 897 898 bool functionArgumentNeedsConsecutiveRegisters( 899 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 900 const DataLayout &DL) const override; 901 902 /// Used for exception handling on Win64. 903 bool needsFixedCatchObjects() const override; 904 905 bool fallBackToDAGISel(const Instruction &Inst) const override; 906 907 /// SVE code generation for fixed length vectors does not custom lower 908 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 909 /// merge. However, merging them creates a BUILD_VECTOR that is just as 910 /// illegal as the original, thus leading to an infinite legalisation loop. 911 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 912 /// vector types this override can be removed. 913 bool mergeStoresAfterLegalization(EVT VT) const override; 914 915 // If the platform/function should have a redzone, return the size in bytes. 916 unsigned getRedZoneSize(const Function &F) const { 917 if (F.hasFnAttribute(Attribute::NoRedZone)) 918 return 0; 919 return 128; 920 } 921 922 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 923 EVT getPromotedVTForPredicate(EVT VT) const; 924 925 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 926 bool AllowUnknown = false) const override; 927 928 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 929 930 /// If a change in streaming mode is required on entry to/return from a 931 /// function call it emits and returns the corresponding SMSTART or SMSTOP node. 932 /// \p Entry tells whether this is before/after the Call, which is necessary 933 /// because PSTATE.SM is only queried once. 934 SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable, 935 SDValue Chain, SDValue InGlue, 936 SDValue PStateSM, bool Entry) const; 937 938 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 939 940 // Normally SVE is only used for byte size vectors that do not fit within a 941 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 942 // used for 64bit and 128bit vectors as well. 943 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 944 945 private: 946 /// Keep a pointer to the AArch64Subtarget around so that we can 947 /// make the right decision when generating code for different targets. 948 const AArch64Subtarget *Subtarget; 949 950 bool isExtFreeImpl(const Instruction *Ext) const override; 951 952 void addTypeForNEON(MVT VT); 953 void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE); 954 void addDRTypeForNEON(MVT VT); 955 void addQRTypeForNEON(MVT VT); 956 957 unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, 958 SelectionDAG &DAG) const; 959 960 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 961 bool isVarArg, 962 const SmallVectorImpl<ISD::InputArg> &Ins, 963 const SDLoc &DL, SelectionDAG &DAG, 964 SmallVectorImpl<SDValue> &InVals) const override; 965 966 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 967 SmallVectorImpl<SDValue> &InVals) const override; 968 969 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 970 CallingConv::ID CallConv, bool isVarArg, 971 const SmallVectorImpl<CCValAssign> &RVLocs, 972 const SDLoc &DL, SelectionDAG &DAG, 973 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 974 SDValue ThisVal) const; 975 976 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 977 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 978 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 979 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 980 981 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 982 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 983 984 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 985 986 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 987 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 988 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 989 990 bool 991 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 992 993 /// Finds the incoming stack arguments which overlap the given fixed stack 994 /// object and incorporates their load into the current chain. This prevents 995 /// an upcoming store from clobbering the stack argument before it's used. 996 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 997 MachineFrameInfo &MFI, int ClobberedFI) const; 998 999 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 1000 1001 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 1002 SDValue &Chain) const; 1003 1004 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1005 bool isVarArg, 1006 const SmallVectorImpl<ISD::OutputArg> &Outs, 1007 LLVMContext &Context) const override; 1008 1009 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1010 const SmallVectorImpl<ISD::OutputArg> &Outs, 1011 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 1012 SelectionDAG &DAG) const override; 1013 1014 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1015 unsigned Flag) const; 1016 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 1017 unsigned Flag) const; 1018 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 1019 unsigned Flag) const; 1020 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 1021 unsigned Flag) const; 1022 template <class NodeTy> 1023 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1024 template <class NodeTy> 1025 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1026 template <class NodeTy> 1027 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1028 template <class NodeTy> 1029 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 1030 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1031 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1032 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1033 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1034 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1035 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 1036 const SDLoc &DL, SelectionDAG &DAG) const; 1037 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 1038 SelectionDAG &DAG) const; 1039 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1040 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1041 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1042 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 1043 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1044 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 1045 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 1046 SDValue TVal, SDValue FVal, const SDLoc &dl, 1047 SelectionDAG &DAG) const; 1048 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1049 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 1050 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1051 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1052 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 1053 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 1054 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 1055 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1056 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 1057 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1058 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1059 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 1060 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1061 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1062 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1063 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1064 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1065 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1066 SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const; 1067 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 1068 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1069 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 1070 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 1071 unsigned NewOp) const; 1072 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 1073 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 1074 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1075 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1076 SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1077 SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 1078 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1079 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1080 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1081 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1082 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1083 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1084 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1085 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1086 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1087 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1088 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1089 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1090 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1091 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1092 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1093 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1094 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1095 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1096 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1097 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1098 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1099 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1100 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1101 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1102 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1103 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1104 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 1105 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1106 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1107 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 1108 SDValue &Size, 1109 SelectionDAG &DAG) const; 1110 SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; 1111 1112 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1113 SelectionDAG &DAG) const; 1114 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1115 SelectionDAG &DAG) const; 1116 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1117 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1118 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1119 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1120 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1121 SelectionDAG &DAG) const; 1122 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1123 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1124 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1125 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1126 SelectionDAG &DAG) const; 1127 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1128 SelectionDAG &DAG) const; 1129 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1130 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1131 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1132 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1133 SelectionDAG &DAG) const; 1134 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1135 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1136 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1137 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1138 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1139 SelectionDAG &DAG) const; 1140 1141 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1142 SmallVectorImpl<SDNode *> &Created) const override; 1143 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1144 SmallVectorImpl<SDNode *> &Created) const override; 1145 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1146 int &ExtraSteps, bool &UseOneConst, 1147 bool Reciprocal) const override; 1148 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1149 int &ExtraSteps) const override; 1150 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1151 const DenormalMode &Mode) const override; 1152 SDValue getSqrtResultForDenormInput(SDValue Operand, 1153 SelectionDAG &DAG) const override; 1154 unsigned combineRepeatedFPDivisors() const override; 1155 1156 ConstraintType getConstraintType(StringRef Constraint) const override; 1157 Register getRegisterByName(const char* RegName, LLT VT, 1158 const MachineFunction &MF) const override; 1159 1160 /// Examine constraint string and operand type and determine a weight value. 1161 /// The operand object must already have been set up with the operand type. 1162 ConstraintWeight 1163 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1164 const char *constraint) const override; 1165 1166 std::pair<unsigned, const TargetRegisterClass *> 1167 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1168 StringRef Constraint, MVT VT) const override; 1169 1170 const char *LowerXConstraint(EVT ConstraintVT) const override; 1171 1172 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1173 std::vector<SDValue> &Ops, 1174 SelectionDAG &DAG) const override; 1175 1176 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1177 if (ConstraintCode == "Q") 1178 return InlineAsm::Constraint_Q; 1179 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1180 // followed by llvm_unreachable so we'll leave them unimplemented in 1181 // the backend for now. 1182 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1183 } 1184 1185 /// Handle Lowering flag assembly outputs. 1186 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1187 const SDLoc &DL, 1188 const AsmOperandInfo &Constraint, 1189 SelectionDAG &DAG) const override; 1190 1191 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1192 bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; 1193 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1194 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1195 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1196 bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1197 SDValue &Offset, SelectionDAG &DAG) const; 1198 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1199 ISD::MemIndexedMode &AM, 1200 SelectionDAG &DAG) const override; 1201 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1202 SDValue &Offset, ISD::MemIndexedMode &AM, 1203 SelectionDAG &DAG) const override; 1204 1205 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1206 SelectionDAG &DAG) const override; 1207 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1208 SelectionDAG &DAG) const; 1209 void ReplaceExtractSubVectorResults(SDNode *N, 1210 SmallVectorImpl<SDValue> &Results, 1211 SelectionDAG &DAG) const; 1212 1213 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1214 1215 void finalizeLowering(MachineFunction &MF) const override; 1216 1217 bool shouldLocalize(const MachineInstr &MI, 1218 const TargetTransformInfo *TTI) const override; 1219 1220 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1221 const APInt &OriginalDemandedBits, 1222 const APInt &OriginalDemandedElts, 1223 KnownBits &Known, 1224 TargetLoweringOpt &TLO, 1225 unsigned Depth) const override; 1226 1227 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1228 1229 // With the exception of data-predicate transitions, no instructions are 1230 // required to cast between legal scalable vector types. However: 1231 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1232 // is not universally useable. 1233 // 2. Most unpacked integer types are not legal and thus integer extends 1234 // cannot be used to convert between unpacked and packed types. 1235 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1236 // to transition between unpacked and packed types of the same element type, 1237 // with BITCAST used otherwise. 1238 // This function does not handle predicate bitcasts. 1239 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1240 1241 // Returns the runtime value for PSTATE.SM. When the function is streaming- 1242 // compatible, this generates a call to __arm_sme_state. 1243 SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs, 1244 SDLoc DL, EVT VT) const; 1245 1246 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 1247 LLT Ty2) const override; 1248 1249 bool preferScalarizeSplat(SDNode *N) const override; 1250 }; 1251 1252 namespace AArch64 { 1253 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1254 const TargetLibraryInfo *libInfo); 1255 } // end namespace AArch64 1256 1257 } // end namespace llvm 1258 1259 #endif 1260