1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H 16 17 #include "AArch64.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/CodeGen/TargetLowering.h" 22 #include "llvm/IR/CallingConv.h" 23 #include "llvm/IR/Instruction.h" 24 25 namespace llvm { 26 27 namespace AArch64ISD { 28 29 // For predicated nodes where the result is a vector, the operation is 30 // controlled by a governing predicate and the inactive lanes are explicitly 31 // defined with a value, please stick the following naming convention: 32 // 33 // _MERGE_OP<n> The result value is a vector with inactive lanes equal 34 // to source operand OP<n>. 35 // 36 // _MERGE_ZERO The result value is a vector with inactive lanes 37 // actively zeroed. 38 // 39 // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal 40 // to the last source operand which only purpose is being 41 // a passthru value. 42 // 43 // For other cases where no explicit action is needed to set the inactive lanes, 44 // or when the result is not a vector and it is needed or helpful to 45 // distinguish a node from similar unpredicated nodes, use: 46 // 47 // _PRED 48 // 49 enum NodeType : unsigned { 50 FIRST_NUMBER = ISD::BUILTIN_OP_END, 51 WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. 52 CALL, // Function call. 53 54 // Pseudo for a OBJC call that gets emitted together with a special `mov 55 // x29, x29` marker instruction. 56 CALL_RVMARKER, 57 58 CALL_BTI, // Function call followed by a BTI instruction. 59 60 // Produces the full sequence of instructions for getting the thread pointer 61 // offset of a variable into X0, using the TLSDesc model. 62 TLSDESC_CALLSEQ, 63 ADRP, // Page address of a TargetGlobalAddress operand. 64 ADR, // ADR 65 ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. 66 LOADgot, // Load from automatically generated descriptor (e.g. Global 67 // Offset Table, TLS record). 68 RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. 69 BRCOND, // Conditional branch instruction; "b.cond". 70 CSEL, 71 CSINV, // Conditional select invert. 72 CSNEG, // Conditional select negate. 73 CSINC, // Conditional select increment. 74 75 // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on 76 // ELF. 77 THREAD_POINTER, 78 ADC, 79 SBC, // adc, sbc instructions 80 81 // Predicated instructions where inactive lanes produce undefined results. 82 ABDS_PRED, 83 ABDU_PRED, 84 FADD_PRED, 85 FDIV_PRED, 86 FMA_PRED, 87 FMAX_PRED, 88 FMAXNM_PRED, 89 FMIN_PRED, 90 FMINNM_PRED, 91 FMUL_PRED, 92 FSUB_PRED, 93 MUL_PRED, 94 MULHS_PRED, 95 MULHU_PRED, 96 SDIV_PRED, 97 SHL_PRED, 98 SMAX_PRED, 99 SMIN_PRED, 100 SRA_PRED, 101 SRL_PRED, 102 UDIV_PRED, 103 UMAX_PRED, 104 UMIN_PRED, 105 106 // Unpredicated vector instructions 107 BIC, 108 109 SRAD_MERGE_OP1, 110 111 // Predicated instructions with the result of inactive lanes provided by the 112 // last operand. 113 FABS_MERGE_PASSTHRU, 114 FCEIL_MERGE_PASSTHRU, 115 FFLOOR_MERGE_PASSTHRU, 116 FNEARBYINT_MERGE_PASSTHRU, 117 FNEG_MERGE_PASSTHRU, 118 FRECPX_MERGE_PASSTHRU, 119 FRINT_MERGE_PASSTHRU, 120 FROUND_MERGE_PASSTHRU, 121 FROUNDEVEN_MERGE_PASSTHRU, 122 FSQRT_MERGE_PASSTHRU, 123 FTRUNC_MERGE_PASSTHRU, 124 FP_ROUND_MERGE_PASSTHRU, 125 FP_EXTEND_MERGE_PASSTHRU, 126 UINT_TO_FP_MERGE_PASSTHRU, 127 SINT_TO_FP_MERGE_PASSTHRU, 128 FCVTZU_MERGE_PASSTHRU, 129 FCVTZS_MERGE_PASSTHRU, 130 SIGN_EXTEND_INREG_MERGE_PASSTHRU, 131 ZERO_EXTEND_INREG_MERGE_PASSTHRU, 132 ABS_MERGE_PASSTHRU, 133 NEG_MERGE_PASSTHRU, 134 135 SETCC_MERGE_ZERO, 136 137 // Arithmetic instructions which write flags. 138 ADDS, 139 SUBS, 140 ADCS, 141 SBCS, 142 ANDS, 143 144 // Conditional compares. Operands: left,right,falsecc,cc,flags 145 CCMP, 146 CCMN, 147 FCCMP, 148 149 // Floating point comparison 150 FCMP, 151 152 // Scalar extract 153 EXTR, 154 155 // Scalar-to-vector duplication 156 DUP, 157 DUPLANE8, 158 DUPLANE16, 159 DUPLANE32, 160 DUPLANE64, 161 DUPLANE128, 162 163 // Vector immedate moves 164 MOVI, 165 MOVIshift, 166 MOVIedit, 167 MOVImsl, 168 FMOV, 169 MVNIshift, 170 MVNImsl, 171 172 // Vector immediate ops 173 BICi, 174 ORRi, 175 176 // Vector bitwise select: similar to ISD::VSELECT but not all bits within an 177 // element must be identical. 178 BSP, 179 180 // Vector shuffles 181 ZIP1, 182 ZIP2, 183 UZP1, 184 UZP2, 185 TRN1, 186 TRN2, 187 REV16, 188 REV32, 189 REV64, 190 EXT, 191 SPLICE, 192 193 // Vector shift by scalar 194 VSHL, 195 VLSHR, 196 VASHR, 197 198 // Vector shift by scalar (again) 199 SQSHL_I, 200 UQSHL_I, 201 SQSHLU_I, 202 SRSHR_I, 203 URSHR_I, 204 205 // Vector shift by constant and insert 206 VSLI, 207 VSRI, 208 209 // Vector comparisons 210 CMEQ, 211 CMGE, 212 CMGT, 213 CMHI, 214 CMHS, 215 FCMEQ, 216 FCMGE, 217 FCMGT, 218 219 // Vector zero comparisons 220 CMEQz, 221 CMGEz, 222 CMGTz, 223 CMLEz, 224 CMLTz, 225 FCMEQz, 226 FCMGEz, 227 FCMGTz, 228 FCMLEz, 229 FCMLTz, 230 231 // Vector across-lanes addition 232 // Only the lower result lane is defined. 233 SADDV, 234 UADDV, 235 236 // Add Pairwise of two vectors 237 ADDP, 238 // Add Long Pairwise 239 SADDLP, 240 UADDLP, 241 242 // udot/sdot instructions 243 UDOT, 244 SDOT, 245 246 // Vector across-lanes min/max 247 // Only the lower result lane is defined. 248 SMINV, 249 UMINV, 250 SMAXV, 251 UMAXV, 252 253 SADDV_PRED, 254 UADDV_PRED, 255 SMAXV_PRED, 256 UMAXV_PRED, 257 SMINV_PRED, 258 UMINV_PRED, 259 ORV_PRED, 260 EORV_PRED, 261 ANDV_PRED, 262 263 // Vector bitwise insertion 264 BIT, 265 266 // Compare-and-branch 267 CBZ, 268 CBNZ, 269 TBZ, 270 TBNZ, 271 272 // Tail calls 273 TC_RETURN, 274 275 // Custom prefetch handling 276 PREFETCH, 277 278 // {s|u}int to FP within a FP register. 279 SITOF, 280 UITOF, 281 282 /// Natural vector cast. ISD::BITCAST is not natural in the big-endian 283 /// world w.r.t vectors; which causes additional REV instructions to be 284 /// generated to compensate for the byte-swapping. But sometimes we do 285 /// need to re-interpret the data in SIMD vector registers in big-endian 286 /// mode without emitting such REV instructions. 287 NVCAST, 288 289 MRS, // MRS, also sets the flags via a glue. 290 291 SMULL, 292 UMULL, 293 294 // Reciprocal estimates and steps. 295 FRECPE, 296 FRECPS, 297 FRSQRTE, 298 FRSQRTS, 299 300 SUNPKHI, 301 SUNPKLO, 302 UUNPKHI, 303 UUNPKLO, 304 305 CLASTA_N, 306 CLASTB_N, 307 LASTA, 308 LASTB, 309 TBL, 310 311 // Floating-point reductions. 312 FADDA_PRED, 313 FADDV_PRED, 314 FMAXV_PRED, 315 FMAXNMV_PRED, 316 FMINV_PRED, 317 FMINNMV_PRED, 318 319 INSR, 320 PTEST, 321 PTRUE, 322 323 BITREVERSE_MERGE_PASSTHRU, 324 BSWAP_MERGE_PASSTHRU, 325 REVH_MERGE_PASSTHRU, 326 REVW_MERGE_PASSTHRU, 327 CTLZ_MERGE_PASSTHRU, 328 CTPOP_MERGE_PASSTHRU, 329 DUP_MERGE_PASSTHRU, 330 INDEX_VECTOR, 331 332 // Cast between vectors of the same element type but differ in length. 333 REINTERPRET_CAST, 334 335 // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa 336 LS64_BUILD, 337 LS64_EXTRACT, 338 339 LD1_MERGE_ZERO, 340 LD1S_MERGE_ZERO, 341 LDNF1_MERGE_ZERO, 342 LDNF1S_MERGE_ZERO, 343 LDFF1_MERGE_ZERO, 344 LDFF1S_MERGE_ZERO, 345 LD1RQ_MERGE_ZERO, 346 LD1RO_MERGE_ZERO, 347 348 // Structured loads. 349 SVE_LD2_MERGE_ZERO, 350 SVE_LD3_MERGE_ZERO, 351 SVE_LD4_MERGE_ZERO, 352 353 // Unsigned gather loads. 354 GLD1_MERGE_ZERO, 355 GLD1_SCALED_MERGE_ZERO, 356 GLD1_UXTW_MERGE_ZERO, 357 GLD1_SXTW_MERGE_ZERO, 358 GLD1_UXTW_SCALED_MERGE_ZERO, 359 GLD1_SXTW_SCALED_MERGE_ZERO, 360 GLD1_IMM_MERGE_ZERO, 361 362 // Signed gather loads 363 GLD1S_MERGE_ZERO, 364 GLD1S_SCALED_MERGE_ZERO, 365 GLD1S_UXTW_MERGE_ZERO, 366 GLD1S_SXTW_MERGE_ZERO, 367 GLD1S_UXTW_SCALED_MERGE_ZERO, 368 GLD1S_SXTW_SCALED_MERGE_ZERO, 369 GLD1S_IMM_MERGE_ZERO, 370 371 // Unsigned gather loads. 372 GLDFF1_MERGE_ZERO, 373 GLDFF1_SCALED_MERGE_ZERO, 374 GLDFF1_UXTW_MERGE_ZERO, 375 GLDFF1_SXTW_MERGE_ZERO, 376 GLDFF1_UXTW_SCALED_MERGE_ZERO, 377 GLDFF1_SXTW_SCALED_MERGE_ZERO, 378 GLDFF1_IMM_MERGE_ZERO, 379 380 // Signed gather loads. 381 GLDFF1S_MERGE_ZERO, 382 GLDFF1S_SCALED_MERGE_ZERO, 383 GLDFF1S_UXTW_MERGE_ZERO, 384 GLDFF1S_SXTW_MERGE_ZERO, 385 GLDFF1S_UXTW_SCALED_MERGE_ZERO, 386 GLDFF1S_SXTW_SCALED_MERGE_ZERO, 387 GLDFF1S_IMM_MERGE_ZERO, 388 389 // Non-temporal gather loads 390 GLDNT1_MERGE_ZERO, 391 GLDNT1_INDEX_MERGE_ZERO, 392 GLDNT1S_MERGE_ZERO, 393 394 // Contiguous masked store. 395 ST1_PRED, 396 397 // Scatter store 398 SST1_PRED, 399 SST1_SCALED_PRED, 400 SST1_UXTW_PRED, 401 SST1_SXTW_PRED, 402 SST1_UXTW_SCALED_PRED, 403 SST1_SXTW_SCALED_PRED, 404 SST1_IMM_PRED, 405 406 // Non-temporal scatter store 407 SSTNT1_PRED, 408 SSTNT1_INDEX_PRED, 409 410 // SME 411 RDSVL, 412 REVD_MERGE_PASSTHRU, 413 414 // Asserts that a function argument (i32) is zero-extended to i8 by 415 // the caller 416 ASSERT_ZEXT_BOOL, 417 418 // Strict (exception-raising) floating point comparison 419 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, 420 STRICT_FCMPE, 421 422 // NEON Load/Store with post-increment base updates 423 LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, 424 LD3post, 425 LD4post, 426 ST2post, 427 ST3post, 428 ST4post, 429 LD1x2post, 430 LD1x3post, 431 LD1x4post, 432 ST1x2post, 433 ST1x3post, 434 ST1x4post, 435 LD1DUPpost, 436 LD2DUPpost, 437 LD3DUPpost, 438 LD4DUPpost, 439 LD1LANEpost, 440 LD2LANEpost, 441 LD3LANEpost, 442 LD4LANEpost, 443 ST2LANEpost, 444 ST3LANEpost, 445 ST4LANEpost, 446 447 STG, 448 STZG, 449 ST2G, 450 STZ2G, 451 452 LDP, 453 STP, 454 STNP, 455 456 // Memory Operations 457 MOPS_MEMSET, 458 MOPS_MEMSET_TAGGING, 459 MOPS_MEMCOPY, 460 MOPS_MEMMOVE, 461 }; 462 463 } // end namespace AArch64ISD 464 465 namespace AArch64 { 466 /// Possible values of current rounding mode, which is specified in bits 467 /// 23:22 of FPCR. 468 enum Rounding { 469 RN = 0, // Round to Nearest 470 RP = 1, // Round towards Plus infinity 471 RM = 2, // Round towards Minus infinity 472 RZ = 3, // Round towards Zero 473 rmMask = 3 // Bit mask selecting rounding mode 474 }; 475 476 // Bit position of rounding mode bits in FPCR. 477 const unsigned RoundingBitsPos = 22; 478 } // namespace AArch64 479 480 class AArch64Subtarget; 481 482 class AArch64TargetLowering : public TargetLowering { 483 public: 484 explicit AArch64TargetLowering(const TargetMachine &TM, 485 const AArch64Subtarget &STI); 486 487 /// Control the following reassociation of operands: (op (op x, c1), y) -> (op 488 /// (op x, y), c1) where N0 is (op x, c1) and N1 is y. 489 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, 490 SDValue N1) const override; 491 492 /// Selects the correct CCAssignFn for a given CallingConvention value. 493 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; 494 495 /// Selects the correct CCAssignFn for a given CallingConvention value. 496 CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const; 497 498 /// Determine which of the bits specified in Mask are known to be either zero 499 /// or one and return them in the KnownZero/KnownOne bitsets. 500 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, 501 const APInt &DemandedElts, 502 const SelectionDAG &DAG, 503 unsigned Depth = 0) const override; 504 505 MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { 506 // Returning i64 unconditionally here (i.e. even for ILP32) means that the 507 // *DAG* representation of pointers will always be 64-bits. They will be 508 // truncated and extended when transferred to memory, but the 64-bit DAG 509 // allows us to use AArch64's addressing modes much more easily. 510 return MVT::getIntegerVT(64); 511 } 512 513 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 514 const APInt &DemandedElts, 515 TargetLoweringOpt &TLO) const override; 516 517 MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; 518 519 /// Returns true if the target allows unaligned memory accesses of the 520 /// specified type. 521 bool allowsMisalignedMemoryAccesses( 522 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 523 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 524 bool *Fast = nullptr) const override; 525 /// LLT variant. 526 bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, 527 Align Alignment, 528 MachineMemOperand::Flags Flags, 529 bool *Fast = nullptr) const override; 530 531 /// Provide custom lowering hooks for some operations. 532 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 533 534 const char *getTargetNodeName(unsigned Opcode) const override; 535 536 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 537 538 /// This method returns a target specific FastISel object, or null if the 539 /// target does not support "fast" ISel. 540 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 541 const TargetLibraryInfo *libInfo) const override; 542 543 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 544 545 bool isFPImmLegal(const APFloat &Imm, EVT VT, 546 bool ForCodeSize) const override; 547 548 /// Return true if the given shuffle mask can be codegen'd directly, or if it 549 /// should be stack expanded. 550 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 551 552 /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero' 553 /// shuffle mask can be codegen'd directly. 554 bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override; 555 556 /// Return the ISD::SETCC ValueType. 557 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 558 EVT VT) const override; 559 560 SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; 561 562 MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, 563 MachineBasicBlock *BB) const; 564 565 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 566 MachineBasicBlock *BB) const; 567 568 MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg, 569 MachineInstr &MI, 570 MachineBasicBlock *BB) const; 571 MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; 572 MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI, 573 MachineBasicBlock *BB) const; 574 MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, 575 MachineInstr &MI, 576 MachineBasicBlock *BB) const; 577 MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; 578 MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, 579 MachineInstr &MI, 580 MachineBasicBlock *BB) const; 581 582 MachineBasicBlock * 583 EmitInstrWithCustomInserter(MachineInstr &MI, 584 MachineBasicBlock *MBB) const override; 585 586 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 587 MachineFunction &MF, 588 unsigned Intrinsic) const override; 589 590 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 591 EVT NewVT) const override; 592 593 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 594 bool isTruncateFree(EVT VT1, EVT VT2) const override; 595 596 bool isProfitableToHoist(Instruction *I) const override; 597 598 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 599 bool isZExtFree(EVT VT1, EVT VT2) const override; 600 bool isZExtFree(SDValue Val, EVT VT2) const override; 601 602 bool shouldSinkOperands(Instruction *I, 603 SmallVectorImpl<Use *> &Ops) const override; 604 605 bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override; 606 607 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 608 609 bool lowerInterleavedLoad(LoadInst *LI, 610 ArrayRef<ShuffleVectorInst *> Shuffles, 611 ArrayRef<unsigned> Indices, 612 unsigned Factor) const override; 613 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 614 unsigned Factor) const override; 615 616 bool isLegalAddImmediate(int64_t) const override; 617 bool isLegalICmpImmediate(int64_t) const override; 618 619 bool isMulAddWithConstProfitable(SDValue AddNode, 620 SDValue ConstNode) const override; 621 622 bool shouldConsiderGEPOffsetSplit() const override; 623 624 EVT getOptimalMemOpType(const MemOp &Op, 625 const AttributeList &FuncAttributes) const override; 626 627 LLT getOptimalMemOpLLT(const MemOp &Op, 628 const AttributeList &FuncAttributes) const override; 629 630 /// Return true if the addressing mode represented by AM is legal for this 631 /// target, for a load/store of the specified type. 632 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 633 unsigned AS, 634 Instruction *I = nullptr) const override; 635 636 /// Return the cost of the scaling factor used in the addressing 637 /// mode represented by AM for this target, for a load/store 638 /// of the specified type. 639 /// If the AM is supported, the return value must be >= 0. 640 /// If the AM is not supported, it returns a negative value. 641 InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, 642 Type *Ty, unsigned AS) const override; 643 644 /// Return true if an FMA operation is faster than a pair of fmul and fadd 645 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method 646 /// returns true, otherwise fmuladd is expanded to fmul + fadd. 647 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 648 EVT VT) const override; 649 bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; 650 651 bool generateFMAsInMachineCombiner(EVT VT, 652 CodeGenOpt::Level OptLevel) const override; 653 654 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 655 656 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 657 bool isDesirableToCommuteWithShift(const SDNode *N, 658 CombineLevel Level) const override; 659 660 /// Returns false if N is a bit extraction pattern of (X >> C) & Mask. 661 bool isDesirableToCommuteXorWithShift(const SDNode *N) const override; 662 663 /// Return true if it is profitable to fold a pair of shifts into a mask. 664 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 665 CombineLevel Level) const override; 666 667 /// Returns true if it is beneficial to convert a load of a constant 668 /// to just the constant itself. 669 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 670 Type *Ty) const override; 671 672 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 673 /// with this index. 674 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 675 unsigned Index) const override; 676 677 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 678 bool MathUsed) const override { 679 // Using overflow ops for overflow checks only should beneficial on 680 // AArch64. 681 return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); 682 } 683 684 Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, 685 AtomicOrdering Ord) const override; 686 Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, 687 AtomicOrdering Ord) const override; 688 689 void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; 690 691 bool isOpSuitableForLDPSTP(const Instruction *I) const; 692 bool shouldInsertFencesForAtomic(const Instruction *I) const override; 693 694 TargetLoweringBase::AtomicExpansionKind 695 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 696 TargetLoweringBase::AtomicExpansionKind 697 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 698 TargetLoweringBase::AtomicExpansionKind 699 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 700 701 TargetLoweringBase::AtomicExpansionKind 702 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; 703 704 bool useLoadStackGuardNode() const override; 705 TargetLoweringBase::LegalizeTypeAction 706 getPreferredVectorAction(MVT VT) const override; 707 708 /// If the target has a standard location for the stack protector cookie, 709 /// returns the address of that location. Otherwise, returns nullptr. 710 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 711 712 void insertSSPDeclarations(Module &M) const override; 713 Value *getSDagStackGuard(const Module &M) const override; 714 Function *getSSPStackGuardCheck(const Module &M) const override; 715 716 /// If the target has a standard location for the unsafe stack pointer, 717 /// returns the address of that location. Otherwise, returns nullptr. 718 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 719 720 /// If a physical register, this returns the register that receives the 721 /// exception address on entry to an EH pad. 722 Register 723 getExceptionPointerRegister(const Constant *PersonalityFn) const override { 724 // FIXME: This is a guess. Has this been defined yet? 725 return AArch64::X0; 726 } 727 728 /// If a physical register, this returns the register that receives the 729 /// exception typeid on entry to a landing pad. 730 Register 731 getExceptionSelectorRegister(const Constant *PersonalityFn) const override { 732 // FIXME: This is a guess. Has this been defined yet? 733 return AArch64::X1; 734 } 735 736 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 737 738 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 739 const MachineFunction &MF) const override { 740 // Do not merge to float value size (128 bytes) if no implicit 741 // float attribute is set. 742 743 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat); 744 745 if (NoFloat) 746 return (MemVT.getSizeInBits() <= 64); 747 return true; 748 } 749 750 bool isCheapToSpeculateCttz() const override { 751 return true; 752 } 753 754 bool isCheapToSpeculateCtlz() const override { 755 return true; 756 } 757 758 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 759 760 bool hasAndNotCompare(SDValue V) const override { 761 // We can use bics for any scalar. 762 return V.getValueType().isScalarInteger(); 763 } 764 765 bool hasAndNot(SDValue Y) const override { 766 EVT VT = Y.getValueType(); 767 768 if (!VT.isVector()) 769 return hasAndNotCompare(Y); 770 771 TypeSize TS = VT.getSizeInBits(); 772 // TODO: We should be able to use bic/bif too for SVE. 773 return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic' 774 } 775 776 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 777 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 778 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 779 SelectionDAG &DAG) const override; 780 781 bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; 782 783 bool shouldTransformSignedTruncationCheck(EVT XVT, 784 unsigned KeptBits) const override { 785 // For vectors, we don't have a preference.. 786 if (XVT.isVector()) 787 return false; 788 789 auto VTIsOk = [](EVT VT) -> bool { 790 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 791 VT == MVT::i64; 792 }; 793 794 // We are ok with KeptBitsVT being byte/word/dword, what SXT supports. 795 // XVT will be larger than KeptBitsVT. 796 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 797 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 798 } 799 800 bool preferIncOfAddToSubOfNot(EVT VT) const override; 801 802 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 803 804 bool hasBitPreservingFPLogic(EVT VT) const override { 805 // FIXME: Is this always true? It should be true for vectors at least. 806 return VT == MVT::f32 || VT == MVT::f64; 807 } 808 809 bool supportSplitCSR(MachineFunction *MF) const override { 810 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 811 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 812 } 813 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 814 void insertCopiesSplitCSR( 815 MachineBasicBlock *Entry, 816 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 817 818 bool supportSwiftError() const override { 819 return true; 820 } 821 822 /// Enable aggressive FMA fusion on targets that want it. 823 bool enableAggressiveFMAFusion(EVT VT) const override; 824 825 /// Returns the size of the platform's va_list object. 826 unsigned getVaListSizeInBits(const DataLayout &DL) const override; 827 828 /// Returns true if \p VecTy is a legal interleaved access type. This 829 /// function checks the vector element type and the overall width of the 830 /// vector. 831 bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, 832 bool &UseScalable) const; 833 834 /// Returns the number of interleaved accesses that will be generated when 835 /// lowering accesses of the given type. 836 unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, 837 bool UseScalable) const; 838 839 MachineMemOperand::Flags getTargetMMOFlags( 840 const Instruction &I) const override; 841 842 bool functionArgumentNeedsConsecutiveRegisters( 843 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 844 const DataLayout &DL) const override; 845 846 /// Used for exception handling on Win64. 847 bool needsFixedCatchObjects() const override; 848 849 bool fallBackToDAGISel(const Instruction &Inst) const override; 850 851 /// SVE code generation for fixed length vectors does not custom lower 852 /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to 853 /// merge. However, merging them creates a BUILD_VECTOR that is just as 854 /// illegal as the original, thus leading to an infinite legalisation loop. 855 /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal 856 /// vector types this override can be removed. 857 bool mergeStoresAfterLegalization(EVT VT) const override; 858 859 // If the platform/function should have a redzone, return the size in bytes. 860 unsigned getRedZoneSize(const Function &F) const { 861 if (F.hasFnAttribute(Attribute::NoRedZone)) 862 return 0; 863 return 128; 864 } 865 866 bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const; 867 EVT getPromotedVTForPredicate(EVT VT) const; 868 869 EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, 870 bool AllowUnknown = false) const override; 871 872 bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override; 873 874 private: 875 /// Keep a pointer to the AArch64Subtarget around so that we can 876 /// make the right decision when generating code for different targets. 877 const AArch64Subtarget *Subtarget; 878 879 bool isExtFreeImpl(const Instruction *Ext) const override; 880 881 void addTypeForNEON(MVT VT); 882 void addTypeForFixedLengthSVE(MVT VT); 883 void addDRTypeForNEON(MVT VT); 884 void addQRTypeForNEON(MVT VT); 885 886 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 887 bool isVarArg, 888 const SmallVectorImpl<ISD::InputArg> &Ins, 889 const SDLoc &DL, SelectionDAG &DAG, 890 SmallVectorImpl<SDValue> &InVals) const override; 891 892 SDValue LowerCall(CallLoweringInfo & /*CLI*/, 893 SmallVectorImpl<SDValue> &InVals) const override; 894 895 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 896 CallingConv::ID CallConv, bool isVarArg, 897 const SmallVectorImpl<CCValAssign> &RVLocs, 898 const SDLoc &DL, SelectionDAG &DAG, 899 SmallVectorImpl<SDValue> &InVals, bool isThisReturn, 900 SDValue ThisVal) const; 901 902 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 903 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 904 SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const; 905 SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; 906 907 SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; 908 SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; 909 910 SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const; 911 912 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 913 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 914 915 bool 916 isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; 917 918 /// Finds the incoming stack arguments which overlap the given fixed stack 919 /// object and incorporates their load into the current chain. This prevents 920 /// an upcoming store from clobbering the stack argument before it's used. 921 SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, 922 MachineFrameInfo &MFI, int ClobberedFI) const; 923 924 bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; 925 926 void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL, 927 SDValue &Chain) const; 928 929 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 930 bool isVarArg, 931 const SmallVectorImpl<ISD::OutputArg> &Outs, 932 LLVMContext &Context) const override; 933 934 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 935 const SmallVectorImpl<ISD::OutputArg> &Outs, 936 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 937 SelectionDAG &DAG) const override; 938 939 SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 940 unsigned Flag) const; 941 SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, 942 unsigned Flag) const; 943 SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, 944 unsigned Flag) const; 945 SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, 946 unsigned Flag) const; 947 template <class NodeTy> 948 SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 949 template <class NodeTy> 950 SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 951 template <class NodeTy> 952 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 953 template <class NodeTy> 954 SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; 955 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 956 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 957 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 958 SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 959 SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 960 SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase, 961 const SDLoc &DL, SelectionDAG &DAG) const; 962 SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, 963 SelectionDAG &DAG) const; 964 SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 965 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 966 SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; 967 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 968 SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; 969 SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, 970 SDValue TVal, SDValue FVal, const SDLoc &dl, 971 SelectionDAG &DAG) const; 972 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 973 SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; 974 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 975 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 976 SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; 977 SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; 978 SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; 979 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 980 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 981 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 982 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 983 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const; 984 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 985 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 986 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 987 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 988 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 989 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 990 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 991 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 992 SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const; 993 SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const; 994 SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, 995 unsigned NewOp) const; 996 SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 997 SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 998 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 999 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 1000 SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; 1001 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 1002 SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; 1003 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 1004 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 1005 SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const; 1006 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 1007 SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const; 1008 SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; 1009 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 1010 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1011 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1012 SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1013 SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1014 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1015 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1016 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1017 SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1018 SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; 1019 SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const; 1020 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 1021 SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; 1022 SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; 1023 SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const; 1024 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1025 SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 1026 SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 1027 SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const; 1028 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1029 SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain, 1030 SDValue &Size, 1031 SelectionDAG &DAG) const; 1032 SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps, 1033 EVT VT, SelectionDAG &DAG, const SDLoc &DL) const; 1034 1035 SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, 1036 SelectionDAG &DAG) const; 1037 SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op, 1038 SelectionDAG &DAG) const; 1039 SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1040 SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const; 1041 SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const; 1042 SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const; 1043 SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp, 1044 SelectionDAG &DAG) const; 1045 SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const; 1046 SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const; 1047 SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const; 1048 SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op, 1049 SelectionDAG &DAG) const; 1050 SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op, 1051 SelectionDAG &DAG) const; 1052 SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const; 1053 SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const; 1054 SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const; 1055 SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op, 1056 SelectionDAG &DAG) const; 1057 SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const; 1058 SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const; 1059 SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const; 1060 SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const; 1061 SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, 1062 SelectionDAG &DAG) const; 1063 1064 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1065 SmallVectorImpl<SDNode *> &Created) const override; 1066 SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1067 SmallVectorImpl<SDNode *> &Created) const override; 1068 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1069 int &ExtraSteps, bool &UseOneConst, 1070 bool Reciprocal) const override; 1071 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 1072 int &ExtraSteps) const override; 1073 SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, 1074 const DenormalMode &Mode) const override; 1075 SDValue getSqrtResultForDenormInput(SDValue Operand, 1076 SelectionDAG &DAG) const override; 1077 unsigned combineRepeatedFPDivisors() const override; 1078 1079 ConstraintType getConstraintType(StringRef Constraint) const override; 1080 Register getRegisterByName(const char* RegName, LLT VT, 1081 const MachineFunction &MF) const override; 1082 1083 /// Examine constraint string and operand type and determine a weight value. 1084 /// The operand object must already have been set up with the operand type. 1085 ConstraintWeight 1086 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1087 const char *constraint) const override; 1088 1089 std::pair<unsigned, const TargetRegisterClass *> 1090 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1091 StringRef Constraint, MVT VT) const override; 1092 1093 const char *LowerXConstraint(EVT ConstraintVT) const override; 1094 1095 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 1096 std::vector<SDValue> &Ops, 1097 SelectionDAG &DAG) const override; 1098 1099 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1100 if (ConstraintCode == "Q") 1101 return InlineAsm::Constraint_Q; 1102 // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are 1103 // followed by llvm_unreachable so we'll leave them unimplemented in 1104 // the backend for now. 1105 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1106 } 1107 1108 bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; 1109 bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; 1110 bool isVectorLoadExtDesirable(SDValue ExtVal) const override; 1111 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1112 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1113 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 1114 ISD::MemIndexedMode &AM, bool &IsInc, 1115 SelectionDAG &DAG) const; 1116 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 1117 ISD::MemIndexedMode &AM, 1118 SelectionDAG &DAG) const override; 1119 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 1120 SDValue &Offset, ISD::MemIndexedMode &AM, 1121 SelectionDAG &DAG) const override; 1122 1123 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1124 SelectionDAG &DAG) const override; 1125 void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 1126 SelectionDAG &DAG) const; 1127 void ReplaceExtractSubVectorResults(SDNode *N, 1128 SmallVectorImpl<SDValue> &Results, 1129 SelectionDAG &DAG) const; 1130 1131 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; 1132 1133 void finalizeLowering(MachineFunction &MF) const override; 1134 1135 bool shouldLocalize(const MachineInstr &MI, 1136 const TargetTransformInfo *TTI) const override; 1137 1138 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1139 const APInt &OriginalDemandedBits, 1140 const APInt &OriginalDemandedElts, 1141 KnownBits &Known, 1142 TargetLoweringOpt &TLO, 1143 unsigned Depth) const override; 1144 1145 bool isTargetCanonicalConstantNode(SDValue Op) const override; 1146 1147 // Normally SVE is only used for byte size vectors that do not fit within a 1148 // NEON vector. This changes when OverrideNEON is true, allowing SVE to be 1149 // used for 64bit and 128bit vectors as well. 1150 bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const; 1151 1152 // With the exception of data-predicate transitions, no instructions are 1153 // required to cast between legal scalable vector types. However: 1154 // 1. Packed and unpacked types have different bit lengths, meaning BITCAST 1155 // is not universally useable. 1156 // 2. Most unpacked integer types are not legal and thus integer extends 1157 // cannot be used to convert between unpacked and packed types. 1158 // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used 1159 // to transition between unpacked and packed types of the same element type, 1160 // with BITCAST used otherwise. 1161 // This function does not handle predicate bitcasts. 1162 SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const; 1163 1164 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 1165 LLT Ty2) const override; 1166 }; 1167 1168 namespace AArch64 { 1169 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1170 const TargetLibraryInfo *libInfo); 1171 } // end namespace AArch64 1172 1173 } // end namespace llvm 1174 1175 #endif 1176