xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision 3e8eb5c7f4909209c042403ddee340b2ee7003a5)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   CALL_BTI, // Function call followed by a BTI instruction.
59 
60   // Produces the full sequence of instructions for getting the thread pointer
61   // offset of a variable into X0, using the TLSDesc model.
62   TLSDESC_CALLSEQ,
63   ADRP,     // Page address of a TargetGlobalAddress operand.
64   ADR,      // ADR
65   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
66   LOADgot,  // Load from automatically generated descriptor (e.g. Global
67             // Offset Table, TLS record).
68   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
69   BRCOND,   // Conditional branch instruction; "b.cond".
70   CSEL,
71   CSINV, // Conditional select invert.
72   CSNEG, // Conditional select negate.
73   CSINC, // Conditional select increment.
74 
75   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
76   // ELF.
77   THREAD_POINTER,
78   ADC,
79   SBC, // adc, sbc instructions
80 
81   // Predicated instructions where inactive lanes produce undefined results.
82   ABDS_PRED,
83   ABDU_PRED,
84   ADD_PRED,
85   FADD_PRED,
86   FDIV_PRED,
87   FMA_PRED,
88   FMAX_PRED,
89   FMAXNM_PRED,
90   FMIN_PRED,
91   FMINNM_PRED,
92   FMUL_PRED,
93   FSUB_PRED,
94   MUL_PRED,
95   MULHS_PRED,
96   MULHU_PRED,
97   SDIV_PRED,
98   SHL_PRED,
99   SMAX_PRED,
100   SMIN_PRED,
101   SRA_PRED,
102   SRL_PRED,
103   SUB_PRED,
104   UDIV_PRED,
105   UMAX_PRED,
106   UMIN_PRED,
107 
108   // Unpredicated vector instructions
109   BIC,
110 
111   SRAD_MERGE_OP1,
112 
113   // Predicated instructions with the result of inactive lanes provided by the
114   // last operand.
115   FABS_MERGE_PASSTHRU,
116   FCEIL_MERGE_PASSTHRU,
117   FFLOOR_MERGE_PASSTHRU,
118   FNEARBYINT_MERGE_PASSTHRU,
119   FNEG_MERGE_PASSTHRU,
120   FRECPX_MERGE_PASSTHRU,
121   FRINT_MERGE_PASSTHRU,
122   FROUND_MERGE_PASSTHRU,
123   FROUNDEVEN_MERGE_PASSTHRU,
124   FSQRT_MERGE_PASSTHRU,
125   FTRUNC_MERGE_PASSTHRU,
126   FP_ROUND_MERGE_PASSTHRU,
127   FP_EXTEND_MERGE_PASSTHRU,
128   UINT_TO_FP_MERGE_PASSTHRU,
129   SINT_TO_FP_MERGE_PASSTHRU,
130   FCVTZU_MERGE_PASSTHRU,
131   FCVTZS_MERGE_PASSTHRU,
132   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
133   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
134   ABS_MERGE_PASSTHRU,
135   NEG_MERGE_PASSTHRU,
136 
137   SETCC_MERGE_ZERO,
138 
139   // Arithmetic instructions which write flags.
140   ADDS,
141   SUBS,
142   ADCS,
143   SBCS,
144   ANDS,
145 
146   // Conditional compares. Operands: left,right,falsecc,cc,flags
147   CCMP,
148   CCMN,
149   FCCMP,
150 
151   // Floating point comparison
152   FCMP,
153 
154   // Scalar extract
155   EXTR,
156 
157   // Scalar-to-vector duplication
158   DUP,
159   DUPLANE8,
160   DUPLANE16,
161   DUPLANE32,
162   DUPLANE64,
163 
164   // Vector immedate moves
165   MOVI,
166   MOVIshift,
167   MOVIedit,
168   MOVImsl,
169   FMOV,
170   MVNIshift,
171   MVNImsl,
172 
173   // Vector immediate ops
174   BICi,
175   ORRi,
176 
177   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
178   // element must be identical.
179   BSP,
180 
181   // Vector shuffles
182   ZIP1,
183   ZIP2,
184   UZP1,
185   UZP2,
186   TRN1,
187   TRN2,
188   REV16,
189   REV32,
190   REV64,
191   EXT,
192   SPLICE,
193 
194   // Vector shift by scalar
195   VSHL,
196   VLSHR,
197   VASHR,
198 
199   // Vector shift by scalar (again)
200   SQSHL_I,
201   UQSHL_I,
202   SQSHLU_I,
203   SRSHR_I,
204   URSHR_I,
205 
206   // Vector shift by constant and insert
207   VSLI,
208   VSRI,
209 
210   // Vector comparisons
211   CMEQ,
212   CMGE,
213   CMGT,
214   CMHI,
215   CMHS,
216   FCMEQ,
217   FCMGE,
218   FCMGT,
219 
220   // Vector zero comparisons
221   CMEQz,
222   CMGEz,
223   CMGTz,
224   CMLEz,
225   CMLTz,
226   FCMEQz,
227   FCMGEz,
228   FCMGTz,
229   FCMLEz,
230   FCMLTz,
231 
232   // Vector across-lanes addition
233   // Only the lower result lane is defined.
234   SADDV,
235   UADDV,
236 
237   // Vector halving addition
238   SHADD,
239   UHADD,
240 
241   // Vector rounding halving addition
242   SRHADD,
243   URHADD,
244 
245   // Unsigned Add Long Pairwise
246   UADDLP,
247 
248   // udot/sdot instructions
249   UDOT,
250   SDOT,
251 
252   // Vector across-lanes min/max
253   // Only the lower result lane is defined.
254   SMINV,
255   UMINV,
256   SMAXV,
257   UMAXV,
258 
259   SADDV_PRED,
260   UADDV_PRED,
261   SMAXV_PRED,
262   UMAXV_PRED,
263   SMINV_PRED,
264   UMINV_PRED,
265   ORV_PRED,
266   EORV_PRED,
267   ANDV_PRED,
268 
269   // Vector bitwise insertion
270   BIT,
271 
272   // Compare-and-branch
273   CBZ,
274   CBNZ,
275   TBZ,
276   TBNZ,
277 
278   // Tail calls
279   TC_RETURN,
280 
281   // Custom prefetch handling
282   PREFETCH,
283 
284   // {s|u}int to FP within a FP register.
285   SITOF,
286   UITOF,
287 
288   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
289   /// world w.r.t vectors; which causes additional REV instructions to be
290   /// generated to compensate for the byte-swapping. But sometimes we do
291   /// need to re-interpret the data in SIMD vector registers in big-endian
292   /// mode without emitting such REV instructions.
293   NVCAST,
294 
295   MRS, // MRS, also sets the flags via a glue.
296 
297   SMULL,
298   UMULL,
299 
300   // Reciprocal estimates and steps.
301   FRECPE,
302   FRECPS,
303   FRSQRTE,
304   FRSQRTS,
305 
306   SUNPKHI,
307   SUNPKLO,
308   UUNPKHI,
309   UUNPKLO,
310 
311   CLASTA_N,
312   CLASTB_N,
313   LASTA,
314   LASTB,
315   TBL,
316 
317   // Floating-point reductions.
318   FADDA_PRED,
319   FADDV_PRED,
320   FMAXV_PRED,
321   FMAXNMV_PRED,
322   FMINV_PRED,
323   FMINNMV_PRED,
324 
325   INSR,
326   PTEST,
327   PTRUE,
328 
329   BITREVERSE_MERGE_PASSTHRU,
330   BSWAP_MERGE_PASSTHRU,
331   REVH_MERGE_PASSTHRU,
332   REVW_MERGE_PASSTHRU,
333   CTLZ_MERGE_PASSTHRU,
334   CTPOP_MERGE_PASSTHRU,
335   DUP_MERGE_PASSTHRU,
336   INDEX_VECTOR,
337 
338   // Cast between vectors of the same element type but differ in length.
339   REINTERPRET_CAST,
340 
341   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
342   LS64_BUILD,
343   LS64_EXTRACT,
344 
345   LD1_MERGE_ZERO,
346   LD1S_MERGE_ZERO,
347   LDNF1_MERGE_ZERO,
348   LDNF1S_MERGE_ZERO,
349   LDFF1_MERGE_ZERO,
350   LDFF1S_MERGE_ZERO,
351   LD1RQ_MERGE_ZERO,
352   LD1RO_MERGE_ZERO,
353 
354   // Structured loads.
355   SVE_LD2_MERGE_ZERO,
356   SVE_LD3_MERGE_ZERO,
357   SVE_LD4_MERGE_ZERO,
358 
359   // Unsigned gather loads.
360   GLD1_MERGE_ZERO,
361   GLD1_SCALED_MERGE_ZERO,
362   GLD1_UXTW_MERGE_ZERO,
363   GLD1_SXTW_MERGE_ZERO,
364   GLD1_UXTW_SCALED_MERGE_ZERO,
365   GLD1_SXTW_SCALED_MERGE_ZERO,
366   GLD1_IMM_MERGE_ZERO,
367 
368   // Signed gather loads
369   GLD1S_MERGE_ZERO,
370   GLD1S_SCALED_MERGE_ZERO,
371   GLD1S_UXTW_MERGE_ZERO,
372   GLD1S_SXTW_MERGE_ZERO,
373   GLD1S_UXTW_SCALED_MERGE_ZERO,
374   GLD1S_SXTW_SCALED_MERGE_ZERO,
375   GLD1S_IMM_MERGE_ZERO,
376 
377   // Unsigned gather loads.
378   GLDFF1_MERGE_ZERO,
379   GLDFF1_SCALED_MERGE_ZERO,
380   GLDFF1_UXTW_MERGE_ZERO,
381   GLDFF1_SXTW_MERGE_ZERO,
382   GLDFF1_UXTW_SCALED_MERGE_ZERO,
383   GLDFF1_SXTW_SCALED_MERGE_ZERO,
384   GLDFF1_IMM_MERGE_ZERO,
385 
386   // Signed gather loads.
387   GLDFF1S_MERGE_ZERO,
388   GLDFF1S_SCALED_MERGE_ZERO,
389   GLDFF1S_UXTW_MERGE_ZERO,
390   GLDFF1S_SXTW_MERGE_ZERO,
391   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
392   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
393   GLDFF1S_IMM_MERGE_ZERO,
394 
395   // Non-temporal gather loads
396   GLDNT1_MERGE_ZERO,
397   GLDNT1_INDEX_MERGE_ZERO,
398   GLDNT1S_MERGE_ZERO,
399 
400   // Contiguous masked store.
401   ST1_PRED,
402 
403   // Scatter store
404   SST1_PRED,
405   SST1_SCALED_PRED,
406   SST1_UXTW_PRED,
407   SST1_SXTW_PRED,
408   SST1_UXTW_SCALED_PRED,
409   SST1_SXTW_SCALED_PRED,
410   SST1_IMM_PRED,
411 
412   // Non-temporal scatter store
413   SSTNT1_PRED,
414   SSTNT1_INDEX_PRED,
415 
416   // Asserts that a function argument (i32) is zero-extended to i8 by
417   // the caller
418   ASSERT_ZEXT_BOOL,
419 
420   // Strict (exception-raising) floating point comparison
421   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
422   STRICT_FCMPE,
423 
424   // NEON Load/Store with post-increment base updates
425   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
426   LD3post,
427   LD4post,
428   ST2post,
429   ST3post,
430   ST4post,
431   LD1x2post,
432   LD1x3post,
433   LD1x4post,
434   ST1x2post,
435   ST1x3post,
436   ST1x4post,
437   LD1DUPpost,
438   LD2DUPpost,
439   LD3DUPpost,
440   LD4DUPpost,
441   LD1LANEpost,
442   LD2LANEpost,
443   LD3LANEpost,
444   LD4LANEpost,
445   ST2LANEpost,
446   ST3LANEpost,
447   ST4LANEpost,
448 
449   STG,
450   STZG,
451   ST2G,
452   STZ2G,
453 
454   LDP,
455   STP,
456   STNP,
457 
458   // Memory Operations
459   MOPS_MEMSET,
460   MOPS_MEMSET_TAGGING,
461   MOPS_MEMCOPY,
462   MOPS_MEMMOVE,
463 };
464 
465 } // end namespace AArch64ISD
466 
467 namespace {
468 
469 // Any instruction that defines a 32-bit result zeros out the high half of the
470 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
471 // be copying from a truncate. But any other 32-bit operation will zero-extend
472 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
473 // 32 bits, they're probably just qualifying a CopyFromReg.
474 static inline bool isDef32(const SDNode &N) {
475   unsigned Opc = N.getOpcode();
476   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
477          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
478          Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
479          Opc != ISD::FREEZE;
480 }
481 
482 } // end anonymous namespace
483 
484 namespace AArch64 {
485 /// Possible values of current rounding mode, which is specified in bits
486 /// 23:22 of FPCR.
487 enum Rounding {
488   RN = 0,    // Round to Nearest
489   RP = 1,    // Round towards Plus infinity
490   RM = 2,    // Round towards Minus infinity
491   RZ = 3,    // Round towards Zero
492   rmMask = 3 // Bit mask selecting rounding mode
493 };
494 
495 // Bit position of rounding mode bits in FPCR.
496 const unsigned RoundingBitsPos = 22;
497 } // namespace AArch64
498 
499 class AArch64Subtarget;
500 
501 class AArch64TargetLowering : public TargetLowering {
502 public:
503   explicit AArch64TargetLowering(const TargetMachine &TM,
504                                  const AArch64Subtarget &STI);
505 
506   /// Selects the correct CCAssignFn for a given CallingConvention value.
507   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
508 
509   /// Selects the correct CCAssignFn for a given CallingConvention value.
510   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
511 
512   /// Determine which of the bits specified in Mask are known to be either zero
513   /// or one and return them in the KnownZero/KnownOne bitsets.
514   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
515                                      const APInt &DemandedElts,
516                                      const SelectionDAG &DAG,
517                                      unsigned Depth = 0) const override;
518 
519   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
520     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
521     // *DAG* representation of pointers will always be 64-bits. They will be
522     // truncated and extended when transferred to memory, but the 64-bit DAG
523     // allows us to use AArch64's addressing modes much more easily.
524     return MVT::getIntegerVT(64);
525   }
526 
527   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
528                                     const APInt &DemandedElts,
529                                     TargetLoweringOpt &TLO) const override;
530 
531   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
532 
533   /// Returns true if the target allows unaligned memory accesses of the
534   /// specified type.
535   bool allowsMisalignedMemoryAccesses(
536       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
537       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
538       bool *Fast = nullptr) const override;
539   /// LLT variant.
540   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
541                                       Align Alignment,
542                                       MachineMemOperand::Flags Flags,
543                                       bool *Fast = nullptr) const override;
544 
545   /// Provide custom lowering hooks for some operations.
546   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
547 
548   const char *getTargetNodeName(unsigned Opcode) const override;
549 
550   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
551 
552   /// This method returns a target specific FastISel object, or null if the
553   /// target does not support "fast" ISel.
554   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
555                            const TargetLibraryInfo *libInfo) const override;
556 
557   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
558 
559   bool isFPImmLegal(const APFloat &Imm, EVT VT,
560                     bool ForCodeSize) const override;
561 
562   /// Return true if the given shuffle mask can be codegen'd directly, or if it
563   /// should be stack expanded.
564   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
565 
566   /// Return the ISD::SETCC ValueType.
567   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
568                          EVT VT) const override;
569 
570   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
571 
572   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
573                                   MachineBasicBlock *BB) const;
574 
575   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
576                                            MachineBasicBlock *BB) const;
577 
578   MachineBasicBlock *
579   EmitInstrWithCustomInserter(MachineInstr &MI,
580                               MachineBasicBlock *MBB) const override;
581 
582   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
583                           MachineFunction &MF,
584                           unsigned Intrinsic) const override;
585 
586   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
587                              EVT NewVT) const override;
588 
589   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
590   bool isTruncateFree(EVT VT1, EVT VT2) const override;
591 
592   bool isProfitableToHoist(Instruction *I) const override;
593 
594   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
595   bool isZExtFree(EVT VT1, EVT VT2) const override;
596   bool isZExtFree(SDValue Val, EVT VT2) const override;
597 
598   bool shouldSinkOperands(Instruction *I,
599                           SmallVectorImpl<Use *> &Ops) const override;
600 
601   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
602 
603   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
604 
605   bool lowerInterleavedLoad(LoadInst *LI,
606                             ArrayRef<ShuffleVectorInst *> Shuffles,
607                             ArrayRef<unsigned> Indices,
608                             unsigned Factor) const override;
609   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
610                              unsigned Factor) const override;
611 
612   bool isLegalAddImmediate(int64_t) const override;
613   bool isLegalICmpImmediate(int64_t) const override;
614 
615   bool isMulAddWithConstProfitable(const SDValue &AddNode,
616                                    const SDValue &ConstNode) const override;
617 
618   bool shouldConsiderGEPOffsetSplit() const override;
619 
620   EVT getOptimalMemOpType(const MemOp &Op,
621                           const AttributeList &FuncAttributes) const override;
622 
623   LLT getOptimalMemOpLLT(const MemOp &Op,
624                          const AttributeList &FuncAttributes) const override;
625 
626   /// Return true if the addressing mode represented by AM is legal for this
627   /// target, for a load/store of the specified type.
628   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
629                              unsigned AS,
630                              Instruction *I = nullptr) const override;
631 
632   /// Return the cost of the scaling factor used in the addressing
633   /// mode represented by AM for this target, for a load/store
634   /// of the specified type.
635   /// If the AM is supported, the return value must be >= 0.
636   /// If the AM is not supported, it returns a negative value.
637   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
638                                        Type *Ty, unsigned AS) const override;
639 
640   /// Return true if an FMA operation is faster than a pair of fmul and fadd
641   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
642   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
643   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
644                                   EVT VT) const override;
645   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
646 
647   bool generateFMAsInMachineCombiner(EVT VT,
648                                      CodeGenOpt::Level OptLevel) const override;
649 
650   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
651 
652   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
653   bool isDesirableToCommuteWithShift(const SDNode *N,
654                                      CombineLevel Level) const override;
655 
656   /// Returns true if it is beneficial to convert a load of a constant
657   /// to just the constant itself.
658   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
659                                          Type *Ty) const override;
660 
661   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
662   /// with this index.
663   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
664                                unsigned Index) const override;
665 
666   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
667                             bool MathUsed) const override {
668     // Using overflow ops for overflow checks only should beneficial on
669     // AArch64.
670     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
671   }
672 
673   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
674                         AtomicOrdering Ord) const override;
675   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
676                               AtomicOrdering Ord) const override;
677 
678   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
679 
680   bool isOpSuitableForLDPSTP(const Instruction *I) const;
681   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
682 
683   TargetLoweringBase::AtomicExpansionKind
684   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
685   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
686   TargetLoweringBase::AtomicExpansionKind
687   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
688 
689   TargetLoweringBase::AtomicExpansionKind
690   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
691 
692   bool useLoadStackGuardNode() const override;
693   TargetLoweringBase::LegalizeTypeAction
694   getPreferredVectorAction(MVT VT) const override;
695 
696   /// If the target has a standard location for the stack protector cookie,
697   /// returns the address of that location. Otherwise, returns nullptr.
698   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
699 
700   void insertSSPDeclarations(Module &M) const override;
701   Value *getSDagStackGuard(const Module &M) const override;
702   Function *getSSPStackGuardCheck(const Module &M) const override;
703 
704   /// If the target has a standard location for the unsafe stack pointer,
705   /// returns the address of that location. Otherwise, returns nullptr.
706   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
707 
708   /// If a physical register, this returns the register that receives the
709   /// exception address on entry to an EH pad.
710   Register
711   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
712     // FIXME: This is a guess. Has this been defined yet?
713     return AArch64::X0;
714   }
715 
716   /// If a physical register, this returns the register that receives the
717   /// exception typeid on entry to a landing pad.
718   Register
719   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
720     // FIXME: This is a guess. Has this been defined yet?
721     return AArch64::X1;
722   }
723 
724   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
725 
726   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
727                         const MachineFunction &MF) const override {
728     // Do not merge to float value size (128 bytes) if no implicit
729     // float attribute is set.
730 
731     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
732 
733     if (NoFloat)
734       return (MemVT.getSizeInBits() <= 64);
735     return true;
736   }
737 
738   bool isCheapToSpeculateCttz() const override {
739     return true;
740   }
741 
742   bool isCheapToSpeculateCtlz() const override {
743     return true;
744   }
745 
746   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
747 
748   bool hasAndNotCompare(SDValue V) const override {
749     // We can use bics for any scalar.
750     return V.getValueType().isScalarInteger();
751   }
752 
753   bool hasAndNot(SDValue Y) const override {
754     EVT VT = Y.getValueType();
755 
756     if (!VT.isVector())
757       return hasAndNotCompare(Y);
758 
759     TypeSize TS = VT.getSizeInBits();
760     // TODO: We should be able to use bic/bif too for SVE.
761     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
762   }
763 
764   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
765       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
766       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
767       SelectionDAG &DAG) const override;
768 
769   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
770 
771   bool shouldTransformSignedTruncationCheck(EVT XVT,
772                                             unsigned KeptBits) const override {
773     // For vectors, we don't have a preference..
774     if (XVT.isVector())
775       return false;
776 
777     auto VTIsOk = [](EVT VT) -> bool {
778       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
779              VT == MVT::i64;
780     };
781 
782     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
783     // XVT will be larger than KeptBitsVT.
784     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
785     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
786   }
787 
788   bool preferIncOfAddToSubOfNot(EVT VT) const override;
789 
790   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
791 
792   bool hasBitPreservingFPLogic(EVT VT) const override {
793     // FIXME: Is this always true? It should be true for vectors at least.
794     return VT == MVT::f32 || VT == MVT::f64;
795   }
796 
797   bool supportSplitCSR(MachineFunction *MF) const override {
798     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
799            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
800   }
801   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
802   void insertCopiesSplitCSR(
803       MachineBasicBlock *Entry,
804       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
805 
806   bool supportSwiftError() const override {
807     return true;
808   }
809 
810   /// Enable aggressive FMA fusion on targets that want it.
811   bool enableAggressiveFMAFusion(EVT VT) const override;
812 
813   /// Returns the size of the platform's va_list object.
814   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
815 
816   /// Returns true if \p VecTy is a legal interleaved access type. This
817   /// function checks the vector element type and the overall width of the
818   /// vector.
819   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
820                                     bool &UseScalable) const;
821 
822   /// Returns the number of interleaved accesses that will be generated when
823   /// lowering accesses of the given type.
824   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
825                                      bool UseScalable) const;
826 
827   MachineMemOperand::Flags getTargetMMOFlags(
828     const Instruction &I) const override;
829 
830   bool functionArgumentNeedsConsecutiveRegisters(
831       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
832       const DataLayout &DL) const override;
833 
834   /// Used for exception handling on Win64.
835   bool needsFixedCatchObjects() const override;
836 
837   bool fallBackToDAGISel(const Instruction &Inst) const override;
838 
839   /// SVE code generation for fixed length vectors does not custom lower
840   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
841   /// merge. However, merging them creates a BUILD_VECTOR that is just as
842   /// illegal as the original, thus leading to an infinite legalisation loop.
843   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
844   /// vector types this override can be removed.
845   bool mergeStoresAfterLegalization(EVT VT) const override;
846 
847   // If the platform/function should have a redzone, return the size in bytes.
848   unsigned getRedZoneSize(const Function &F) const {
849     if (F.hasFnAttribute(Attribute::NoRedZone))
850       return 0;
851     return 128;
852   }
853 
854   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
855   EVT getPromotedVTForPredicate(EVT VT) const;
856 
857   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
858                              bool AllowUnknown = false) const override;
859 
860   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
861 
862 private:
863   /// Keep a pointer to the AArch64Subtarget around so that we can
864   /// make the right decision when generating code for different targets.
865   const AArch64Subtarget *Subtarget;
866 
867   bool isExtFreeImpl(const Instruction *Ext) const override;
868 
869   void addTypeForNEON(MVT VT);
870   void addTypeForFixedLengthSVE(MVT VT);
871   void addDRTypeForNEON(MVT VT);
872   void addQRTypeForNEON(MVT VT);
873 
874   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
875                                bool isVarArg,
876                                const SmallVectorImpl<ISD::InputArg> &Ins,
877                                const SDLoc &DL, SelectionDAG &DAG,
878                                SmallVectorImpl<SDValue> &InVals) const override;
879 
880   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
881                     SmallVectorImpl<SDValue> &InVals) const override;
882 
883   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
884                           CallingConv::ID CallConv, bool isVarArg,
885                           const SmallVectorImpl<ISD::InputArg> &Ins,
886                           const SDLoc &DL, SelectionDAG &DAG,
887                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
888                           SDValue ThisVal) const;
889 
890   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
891   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
892   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
893   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
894 
895   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
896   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
897 
898   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
899 
900   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
901   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
902 
903   bool
904   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
905 
906   /// Finds the incoming stack arguments which overlap the given fixed stack
907   /// object and incorporates their load into the current chain. This prevents
908   /// an upcoming store from clobbering the stack argument before it's used.
909   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
910                               MachineFrameInfo &MFI, int ClobberedFI) const;
911 
912   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
913 
914   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
915                            SDValue &Chain) const;
916 
917   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
918                       bool isVarArg,
919                       const SmallVectorImpl<ISD::OutputArg> &Outs,
920                       LLVMContext &Context) const override;
921 
922   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
923                       const SmallVectorImpl<ISD::OutputArg> &Outs,
924                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
925                       SelectionDAG &DAG) const override;
926 
927   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
928                         unsigned Flag) const;
929   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
930                         unsigned Flag) const;
931   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
932                         unsigned Flag) const;
933   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
934                         unsigned Flag) const;
935   template <class NodeTy>
936   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
937   template <class NodeTy>
938   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
939   template <class NodeTy>
940   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
941   template <class NodeTy>
942   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
943   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
944   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
946   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
947   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
948   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
949                                const SDLoc &DL, SelectionDAG &DAG) const;
950   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
951                                  SelectionDAG &DAG) const;
952   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
956   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
957   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
958                          SDValue TVal, SDValue FVal, const SDLoc &dl,
959                          SelectionDAG &DAG) const;
960   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
961   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
962   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
966   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
967   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
970   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
972   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
980   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
981   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
983                               bool OverrideNEON = false) const;
984   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
985   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
986   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
989   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
990   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
991   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
992   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
993   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
994   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
995   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
996   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
997   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
998   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
999   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1000   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1001   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1002   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1003   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1004   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1005   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1006   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1007   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1008   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1009   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1010   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1011   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1012   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1013   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1014   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
1015   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1016   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1018                                          SDValue &Size,
1019                                          SelectionDAG &DAG) const;
1020   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
1021                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
1022 
1023   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1024                                                SelectionDAG &DAG) const;
1025   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1026                                                SelectionDAG &DAG) const;
1027   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1028   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1029   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1030   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1031   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1032                               SelectionDAG &DAG) const;
1033   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1034   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1035   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1036   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1037                                             SelectionDAG &DAG) const;
1038   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1039                                               SelectionDAG &DAG) const;
1040   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1041   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1042   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1043   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1044                                              SelectionDAG &DAG) const;
1045   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1046   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1047   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1048   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1049   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1050                                               SelectionDAG &DAG) const;
1051 
1052   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1053                         SmallVectorImpl<SDNode *> &Created) const override;
1054   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1055                           int &ExtraSteps, bool &UseOneConst,
1056                           bool Reciprocal) const override;
1057   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1058                            int &ExtraSteps) const override;
1059   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1060                            const DenormalMode &Mode) const override;
1061   SDValue getSqrtResultForDenormInput(SDValue Operand,
1062                                       SelectionDAG &DAG) const override;
1063   unsigned combineRepeatedFPDivisors() const override;
1064 
1065   ConstraintType getConstraintType(StringRef Constraint) const override;
1066   Register getRegisterByName(const char* RegName, LLT VT,
1067                              const MachineFunction &MF) const override;
1068 
1069   /// Examine constraint string and operand type and determine a weight value.
1070   /// The operand object must already have been set up with the operand type.
1071   ConstraintWeight
1072   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1073                                  const char *constraint) const override;
1074 
1075   std::pair<unsigned, const TargetRegisterClass *>
1076   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1077                                StringRef Constraint, MVT VT) const override;
1078 
1079   const char *LowerXConstraint(EVT ConstraintVT) const override;
1080 
1081   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1082                                     std::vector<SDValue> &Ops,
1083                                     SelectionDAG &DAG) const override;
1084 
1085   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1086     if (ConstraintCode == "Q")
1087       return InlineAsm::Constraint_Q;
1088     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1089     //        followed by llvm_unreachable so we'll leave them unimplemented in
1090     //        the backend for now.
1091     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1092   }
1093 
1094   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1095   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1096   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1097   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1098   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1099   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1100                               ISD::MemIndexedMode &AM, bool &IsInc,
1101                               SelectionDAG &DAG) const;
1102   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1103                                  ISD::MemIndexedMode &AM,
1104                                  SelectionDAG &DAG) const override;
1105   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1106                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1107                                   SelectionDAG &DAG) const override;
1108 
1109   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1110                           SelectionDAG &DAG) const override;
1111   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1112                              SelectionDAG &DAG) const;
1113   void ReplaceExtractSubVectorResults(SDNode *N,
1114                                       SmallVectorImpl<SDValue> &Results,
1115                                       SelectionDAG &DAG) const;
1116 
1117   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1118 
1119   void finalizeLowering(MachineFunction &MF) const override;
1120 
1121   bool shouldLocalize(const MachineInstr &MI,
1122                       const TargetTransformInfo *TTI) const override;
1123 
1124   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1125                                          const APInt &OriginalDemandedBits,
1126                                          const APInt &OriginalDemandedElts,
1127                                          KnownBits &Known,
1128                                          TargetLoweringOpt &TLO,
1129                                          unsigned Depth) const override;
1130 
1131   // Normally SVE is only used for byte size vectors that do not fit within a
1132   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1133   // used for 64bit and 128bit vectors as well.
1134   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1135 
1136   // With the exception of data-predicate transitions, no instructions are
1137   // required to cast between legal scalable vector types. However:
1138   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1139   //     is not universally useable.
1140   //  2. Most unpacked integer types are not legal and thus integer extends
1141   //     cannot be used to convert between unpacked and packed types.
1142   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1143   // to transition between unpacked and packed types of the same element type,
1144   // with BITCAST used otherwise.
1145   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1146 
1147   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
1148                                               LLT Ty2) const override;
1149 };
1150 
1151 namespace AArch64 {
1152 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1153                          const TargetLibraryInfo *libInfo);
1154 } // end namespace AArch64
1155 
1156 } // end namespace llvm
1157 
1158 #endif
1159