xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision af23369a6deaaeb612ab266eb88b8bb8d560c322)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   CALL_BTI, // Function call followed by a BTI instruction.
59 
60   // Produces the full sequence of instructions for getting the thread pointer
61   // offset of a variable into X0, using the TLSDesc model.
62   TLSDESC_CALLSEQ,
63   ADRP,     // Page address of a TargetGlobalAddress operand.
64   ADR,      // ADR
65   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
66   LOADgot,  // Load from automatically generated descriptor (e.g. Global
67             // Offset Table, TLS record).
68   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
69   BRCOND,   // Conditional branch instruction; "b.cond".
70   CSEL,
71   CSINV, // Conditional select invert.
72   CSNEG, // Conditional select negate.
73   CSINC, // Conditional select increment.
74 
75   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
76   // ELF.
77   THREAD_POINTER,
78   ADC,
79   SBC, // adc, sbc instructions
80 
81   // Predicated instructions where inactive lanes produce undefined results.
82   ABDS_PRED,
83   ABDU_PRED,
84   FADD_PRED,
85   FDIV_PRED,
86   FMA_PRED,
87   FMAX_PRED,
88   FMAXNM_PRED,
89   FMIN_PRED,
90   FMINNM_PRED,
91   FMUL_PRED,
92   FSUB_PRED,
93   MUL_PRED,
94   MULHS_PRED,
95   MULHU_PRED,
96   SDIV_PRED,
97   SHL_PRED,
98   SMAX_PRED,
99   SMIN_PRED,
100   SRA_PRED,
101   SRL_PRED,
102   UDIV_PRED,
103   UMAX_PRED,
104   UMIN_PRED,
105 
106   // Unpredicated vector instructions
107   BIC,
108 
109   SRAD_MERGE_OP1,
110 
111   // Predicated instructions with the result of inactive lanes provided by the
112   // last operand.
113   FABS_MERGE_PASSTHRU,
114   FCEIL_MERGE_PASSTHRU,
115   FFLOOR_MERGE_PASSTHRU,
116   FNEARBYINT_MERGE_PASSTHRU,
117   FNEG_MERGE_PASSTHRU,
118   FRECPX_MERGE_PASSTHRU,
119   FRINT_MERGE_PASSTHRU,
120   FROUND_MERGE_PASSTHRU,
121   FROUNDEVEN_MERGE_PASSTHRU,
122   FSQRT_MERGE_PASSTHRU,
123   FTRUNC_MERGE_PASSTHRU,
124   FP_ROUND_MERGE_PASSTHRU,
125   FP_EXTEND_MERGE_PASSTHRU,
126   UINT_TO_FP_MERGE_PASSTHRU,
127   SINT_TO_FP_MERGE_PASSTHRU,
128   FCVTZU_MERGE_PASSTHRU,
129   FCVTZS_MERGE_PASSTHRU,
130   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
131   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
132   ABS_MERGE_PASSTHRU,
133   NEG_MERGE_PASSTHRU,
134 
135   SETCC_MERGE_ZERO,
136 
137   // Arithmetic instructions which write flags.
138   ADDS,
139   SUBS,
140   ADCS,
141   SBCS,
142   ANDS,
143 
144   // Conditional compares. Operands: left,right,falsecc,cc,flags
145   CCMP,
146   CCMN,
147   FCCMP,
148 
149   // Floating point comparison
150   FCMP,
151 
152   // Scalar extract
153   EXTR,
154 
155   // Scalar-to-vector duplication
156   DUP,
157   DUPLANE8,
158   DUPLANE16,
159   DUPLANE32,
160   DUPLANE64,
161   DUPLANE128,
162 
163   // Vector immedate moves
164   MOVI,
165   MOVIshift,
166   MOVIedit,
167   MOVImsl,
168   FMOV,
169   MVNIshift,
170   MVNImsl,
171 
172   // Vector immediate ops
173   BICi,
174   ORRi,
175 
176   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
177   // element must be identical.
178   BSP,
179 
180   // Vector shuffles
181   ZIP1,
182   ZIP2,
183   UZP1,
184   UZP2,
185   TRN1,
186   TRN2,
187   REV16,
188   REV32,
189   REV64,
190   EXT,
191   SPLICE,
192 
193   // Vector shift by scalar
194   VSHL,
195   VLSHR,
196   VASHR,
197 
198   // Vector shift by scalar (again)
199   SQSHL_I,
200   UQSHL_I,
201   SQSHLU_I,
202   SRSHR_I,
203   URSHR_I,
204 
205   // Vector shift by constant and insert
206   VSLI,
207   VSRI,
208 
209   // Vector comparisons
210   CMEQ,
211   CMGE,
212   CMGT,
213   CMHI,
214   CMHS,
215   FCMEQ,
216   FCMGE,
217   FCMGT,
218 
219   // Vector zero comparisons
220   CMEQz,
221   CMGEz,
222   CMGTz,
223   CMLEz,
224   CMLTz,
225   FCMEQz,
226   FCMGEz,
227   FCMGTz,
228   FCMLEz,
229   FCMLTz,
230 
231   // Vector across-lanes addition
232   // Only the lower result lane is defined.
233   SADDV,
234   UADDV,
235 
236   // Add Pairwise of two vectors
237   ADDP,
238   // Add Long Pairwise
239   SADDLP,
240   UADDLP,
241 
242   // udot/sdot instructions
243   UDOT,
244   SDOT,
245 
246   // Vector across-lanes min/max
247   // Only the lower result lane is defined.
248   SMINV,
249   UMINV,
250   SMAXV,
251   UMAXV,
252 
253   SADDV_PRED,
254   UADDV_PRED,
255   SMAXV_PRED,
256   UMAXV_PRED,
257   SMINV_PRED,
258   UMINV_PRED,
259   ORV_PRED,
260   EORV_PRED,
261   ANDV_PRED,
262 
263   // Vector bitwise insertion
264   BIT,
265 
266   // Compare-and-branch
267   CBZ,
268   CBNZ,
269   TBZ,
270   TBNZ,
271 
272   // Tail calls
273   TC_RETURN,
274 
275   // Custom prefetch handling
276   PREFETCH,
277 
278   // {s|u}int to FP within a FP register.
279   SITOF,
280   UITOF,
281 
282   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
283   /// world w.r.t vectors; which causes additional REV instructions to be
284   /// generated to compensate for the byte-swapping. But sometimes we do
285   /// need to re-interpret the data in SIMD vector registers in big-endian
286   /// mode without emitting such REV instructions.
287   NVCAST,
288 
289   MRS, // MRS, also sets the flags via a glue.
290 
291   SMULL,
292   UMULL,
293 
294   // Reciprocal estimates and steps.
295   FRECPE,
296   FRECPS,
297   FRSQRTE,
298   FRSQRTS,
299 
300   SUNPKHI,
301   SUNPKLO,
302   UUNPKHI,
303   UUNPKLO,
304 
305   CLASTA_N,
306   CLASTB_N,
307   LASTA,
308   LASTB,
309   TBL,
310 
311   // Floating-point reductions.
312   FADDA_PRED,
313   FADDV_PRED,
314   FMAXV_PRED,
315   FMAXNMV_PRED,
316   FMINV_PRED,
317   FMINNMV_PRED,
318 
319   INSR,
320   PTEST,
321   PTRUE,
322 
323   BITREVERSE_MERGE_PASSTHRU,
324   BSWAP_MERGE_PASSTHRU,
325   REVH_MERGE_PASSTHRU,
326   REVW_MERGE_PASSTHRU,
327   CTLZ_MERGE_PASSTHRU,
328   CTPOP_MERGE_PASSTHRU,
329   DUP_MERGE_PASSTHRU,
330   INDEX_VECTOR,
331 
332   // Cast between vectors of the same element type but differ in length.
333   REINTERPRET_CAST,
334 
335   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
336   LS64_BUILD,
337   LS64_EXTRACT,
338 
339   LD1_MERGE_ZERO,
340   LD1S_MERGE_ZERO,
341   LDNF1_MERGE_ZERO,
342   LDNF1S_MERGE_ZERO,
343   LDFF1_MERGE_ZERO,
344   LDFF1S_MERGE_ZERO,
345   LD1RQ_MERGE_ZERO,
346   LD1RO_MERGE_ZERO,
347 
348   // Structured loads.
349   SVE_LD2_MERGE_ZERO,
350   SVE_LD3_MERGE_ZERO,
351   SVE_LD4_MERGE_ZERO,
352 
353   // Unsigned gather loads.
354   GLD1_MERGE_ZERO,
355   GLD1_SCALED_MERGE_ZERO,
356   GLD1_UXTW_MERGE_ZERO,
357   GLD1_SXTW_MERGE_ZERO,
358   GLD1_UXTW_SCALED_MERGE_ZERO,
359   GLD1_SXTW_SCALED_MERGE_ZERO,
360   GLD1_IMM_MERGE_ZERO,
361 
362   // Signed gather loads
363   GLD1S_MERGE_ZERO,
364   GLD1S_SCALED_MERGE_ZERO,
365   GLD1S_UXTW_MERGE_ZERO,
366   GLD1S_SXTW_MERGE_ZERO,
367   GLD1S_UXTW_SCALED_MERGE_ZERO,
368   GLD1S_SXTW_SCALED_MERGE_ZERO,
369   GLD1S_IMM_MERGE_ZERO,
370 
371   // Unsigned gather loads.
372   GLDFF1_MERGE_ZERO,
373   GLDFF1_SCALED_MERGE_ZERO,
374   GLDFF1_UXTW_MERGE_ZERO,
375   GLDFF1_SXTW_MERGE_ZERO,
376   GLDFF1_UXTW_SCALED_MERGE_ZERO,
377   GLDFF1_SXTW_SCALED_MERGE_ZERO,
378   GLDFF1_IMM_MERGE_ZERO,
379 
380   // Signed gather loads.
381   GLDFF1S_MERGE_ZERO,
382   GLDFF1S_SCALED_MERGE_ZERO,
383   GLDFF1S_UXTW_MERGE_ZERO,
384   GLDFF1S_SXTW_MERGE_ZERO,
385   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
386   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
387   GLDFF1S_IMM_MERGE_ZERO,
388 
389   // Non-temporal gather loads
390   GLDNT1_MERGE_ZERO,
391   GLDNT1_INDEX_MERGE_ZERO,
392   GLDNT1S_MERGE_ZERO,
393 
394   // Contiguous masked store.
395   ST1_PRED,
396 
397   // Scatter store
398   SST1_PRED,
399   SST1_SCALED_PRED,
400   SST1_UXTW_PRED,
401   SST1_SXTW_PRED,
402   SST1_UXTW_SCALED_PRED,
403   SST1_SXTW_SCALED_PRED,
404   SST1_IMM_PRED,
405 
406   // Non-temporal scatter store
407   SSTNT1_PRED,
408   SSTNT1_INDEX_PRED,
409 
410   // SME
411   RDSVL,
412   REVD_MERGE_PASSTHRU,
413 
414   // Asserts that a function argument (i32) is zero-extended to i8 by
415   // the caller
416   ASSERT_ZEXT_BOOL,
417 
418   // Strict (exception-raising) floating point comparison
419   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
420   STRICT_FCMPE,
421 
422   // NEON Load/Store with post-increment base updates
423   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
424   LD3post,
425   LD4post,
426   ST2post,
427   ST3post,
428   ST4post,
429   LD1x2post,
430   LD1x3post,
431   LD1x4post,
432   ST1x2post,
433   ST1x3post,
434   ST1x4post,
435   LD1DUPpost,
436   LD2DUPpost,
437   LD3DUPpost,
438   LD4DUPpost,
439   LD1LANEpost,
440   LD2LANEpost,
441   LD3LANEpost,
442   LD4LANEpost,
443   ST2LANEpost,
444   ST3LANEpost,
445   ST4LANEpost,
446 
447   STG,
448   STZG,
449   ST2G,
450   STZ2G,
451 
452   LDP,
453   STP,
454   STNP,
455 
456   // Memory Operations
457   MOPS_MEMSET,
458   MOPS_MEMSET_TAGGING,
459   MOPS_MEMCOPY,
460   MOPS_MEMMOVE,
461 };
462 
463 } // end namespace AArch64ISD
464 
465 namespace AArch64 {
466 /// Possible values of current rounding mode, which is specified in bits
467 /// 23:22 of FPCR.
468 enum Rounding {
469   RN = 0,    // Round to Nearest
470   RP = 1,    // Round towards Plus infinity
471   RM = 2,    // Round towards Minus infinity
472   RZ = 3,    // Round towards Zero
473   rmMask = 3 // Bit mask selecting rounding mode
474 };
475 
476 // Bit position of rounding mode bits in FPCR.
477 const unsigned RoundingBitsPos = 22;
478 } // namespace AArch64
479 
480 class AArch64Subtarget;
481 
482 class AArch64TargetLowering : public TargetLowering {
483 public:
484   explicit AArch64TargetLowering(const TargetMachine &TM,
485                                  const AArch64Subtarget &STI);
486 
487   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
488   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
489   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
490                            SDValue N1) const override;
491 
492   /// Selects the correct CCAssignFn for a given CallingConvention value.
493   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
494 
495   /// Selects the correct CCAssignFn for a given CallingConvention value.
496   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
497 
498   /// Determine which of the bits specified in Mask are known to be either zero
499   /// or one and return them in the KnownZero/KnownOne bitsets.
500   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
501                                      const APInt &DemandedElts,
502                                      const SelectionDAG &DAG,
503                                      unsigned Depth = 0) const override;
504 
505   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
506     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
507     // *DAG* representation of pointers will always be 64-bits. They will be
508     // truncated and extended when transferred to memory, but the 64-bit DAG
509     // allows us to use AArch64's addressing modes much more easily.
510     return MVT::getIntegerVT(64);
511   }
512 
513   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
514                                     const APInt &DemandedElts,
515                                     TargetLoweringOpt &TLO) const override;
516 
517   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
518 
519   /// Returns true if the target allows unaligned memory accesses of the
520   /// specified type.
521   bool allowsMisalignedMemoryAccesses(
522       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
523       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
524       bool *Fast = nullptr) const override;
525   /// LLT variant.
526   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
527                                       Align Alignment,
528                                       MachineMemOperand::Flags Flags,
529                                       bool *Fast = nullptr) const override;
530 
531   /// Provide custom lowering hooks for some operations.
532   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
533 
534   const char *getTargetNodeName(unsigned Opcode) const override;
535 
536   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
537 
538   /// This method returns a target specific FastISel object, or null if the
539   /// target does not support "fast" ISel.
540   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
541                            const TargetLibraryInfo *libInfo) const override;
542 
543   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
544 
545   bool isFPImmLegal(const APFloat &Imm, EVT VT,
546                     bool ForCodeSize) const override;
547 
548   /// Return true if the given shuffle mask can be codegen'd directly, or if it
549   /// should be stack expanded.
550   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
551 
552   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
553   /// shuffle mask can be codegen'd directly.
554   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
555 
556   /// Return the ISD::SETCC ValueType.
557   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
558                          EVT VT) const override;
559 
560   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
561 
562   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
563                                   MachineBasicBlock *BB) const;
564 
565   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
566                                            MachineBasicBlock *BB) const;
567 
568   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
569                                   MachineInstr &MI,
570                                   MachineBasicBlock *BB) const;
571   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
572   MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI,
573                               MachineBasicBlock *BB) const;
574   MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
575                                             MachineInstr &MI,
576                                             MachineBasicBlock *BB) const;
577   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
578   MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
579                                          MachineInstr &MI,
580                                          MachineBasicBlock *BB) const;
581 
582   MachineBasicBlock *
583   EmitInstrWithCustomInserter(MachineInstr &MI,
584                               MachineBasicBlock *MBB) const override;
585 
586   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
587                           MachineFunction &MF,
588                           unsigned Intrinsic) const override;
589 
590   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
591                              EVT NewVT) const override;
592 
593   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
594   bool isTruncateFree(EVT VT1, EVT VT2) const override;
595 
596   bool isProfitableToHoist(Instruction *I) const override;
597 
598   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
599   bool isZExtFree(EVT VT1, EVT VT2) const override;
600   bool isZExtFree(SDValue Val, EVT VT2) const override;
601 
602   bool shouldSinkOperands(Instruction *I,
603                           SmallVectorImpl<Use *> &Ops) const override;
604 
605   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
606 
607   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
608 
609   bool lowerInterleavedLoad(LoadInst *LI,
610                             ArrayRef<ShuffleVectorInst *> Shuffles,
611                             ArrayRef<unsigned> Indices,
612                             unsigned Factor) const override;
613   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
614                              unsigned Factor) const override;
615 
616   bool isLegalAddImmediate(int64_t) const override;
617   bool isLegalICmpImmediate(int64_t) const override;
618 
619   bool isMulAddWithConstProfitable(SDValue AddNode,
620                                    SDValue ConstNode) const override;
621 
622   bool shouldConsiderGEPOffsetSplit() const override;
623 
624   EVT getOptimalMemOpType(const MemOp &Op,
625                           const AttributeList &FuncAttributes) const override;
626 
627   LLT getOptimalMemOpLLT(const MemOp &Op,
628                          const AttributeList &FuncAttributes) const override;
629 
630   /// Return true if the addressing mode represented by AM is legal for this
631   /// target, for a load/store of the specified type.
632   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
633                              unsigned AS,
634                              Instruction *I = nullptr) const override;
635 
636   /// Return the cost of the scaling factor used in the addressing
637   /// mode represented by AM for this target, for a load/store
638   /// of the specified type.
639   /// If the AM is supported, the return value must be >= 0.
640   /// If the AM is not supported, it returns a negative value.
641   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
642                                        Type *Ty, unsigned AS) const override;
643 
644   /// Return true if an FMA operation is faster than a pair of fmul and fadd
645   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
646   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
647   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
648                                   EVT VT) const override;
649   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
650 
651   bool generateFMAsInMachineCombiner(EVT VT,
652                                      CodeGenOpt::Level OptLevel) const override;
653 
654   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
655 
656   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
657   bool isDesirableToCommuteWithShift(const SDNode *N,
658                                      CombineLevel Level) const override;
659 
660   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
661   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
662 
663   /// Return true if it is profitable to fold a pair of shifts into a mask.
664   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
665                                          CombineLevel Level) const override;
666 
667   /// Returns true if it is beneficial to convert a load of a constant
668   /// to just the constant itself.
669   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
670                                          Type *Ty) const override;
671 
672   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
673   /// with this index.
674   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
675                                unsigned Index) const override;
676 
677   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
678                             bool MathUsed) const override {
679     // Using overflow ops for overflow checks only should beneficial on
680     // AArch64.
681     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
682   }
683 
684   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
685                         AtomicOrdering Ord) const override;
686   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
687                               AtomicOrdering Ord) const override;
688 
689   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
690 
691   bool isOpSuitableForLDPSTP(const Instruction *I) const;
692   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
693 
694   TargetLoweringBase::AtomicExpansionKind
695   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
696   TargetLoweringBase::AtomicExpansionKind
697   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
698   TargetLoweringBase::AtomicExpansionKind
699   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
700 
701   TargetLoweringBase::AtomicExpansionKind
702   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
703 
704   bool useLoadStackGuardNode() const override;
705   TargetLoweringBase::LegalizeTypeAction
706   getPreferredVectorAction(MVT VT) const override;
707 
708   /// If the target has a standard location for the stack protector cookie,
709   /// returns the address of that location. Otherwise, returns nullptr.
710   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
711 
712   void insertSSPDeclarations(Module &M) const override;
713   Value *getSDagStackGuard(const Module &M) const override;
714   Function *getSSPStackGuardCheck(const Module &M) const override;
715 
716   /// If the target has a standard location for the unsafe stack pointer,
717   /// returns the address of that location. Otherwise, returns nullptr.
718   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
719 
720   /// If a physical register, this returns the register that receives the
721   /// exception address on entry to an EH pad.
722   Register
723   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
724     // FIXME: This is a guess. Has this been defined yet?
725     return AArch64::X0;
726   }
727 
728   /// If a physical register, this returns the register that receives the
729   /// exception typeid on entry to a landing pad.
730   Register
731   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
732     // FIXME: This is a guess. Has this been defined yet?
733     return AArch64::X1;
734   }
735 
736   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
737 
738   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
739                         const MachineFunction &MF) const override {
740     // Do not merge to float value size (128 bytes) if no implicit
741     // float attribute is set.
742 
743     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
744 
745     if (NoFloat)
746       return (MemVT.getSizeInBits() <= 64);
747     return true;
748   }
749 
750   bool isCheapToSpeculateCttz() const override {
751     return true;
752   }
753 
754   bool isCheapToSpeculateCtlz() const override {
755     return true;
756   }
757 
758   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
759 
760   bool hasAndNotCompare(SDValue V) const override {
761     // We can use bics for any scalar.
762     return V.getValueType().isScalarInteger();
763   }
764 
765   bool hasAndNot(SDValue Y) const override {
766     EVT VT = Y.getValueType();
767 
768     if (!VT.isVector())
769       return hasAndNotCompare(Y);
770 
771     TypeSize TS = VT.getSizeInBits();
772     // TODO: We should be able to use bic/bif too for SVE.
773     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
774   }
775 
776   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
777       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
778       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
779       SelectionDAG &DAG) const override;
780 
781   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
782 
783   bool shouldTransformSignedTruncationCheck(EVT XVT,
784                                             unsigned KeptBits) const override {
785     // For vectors, we don't have a preference..
786     if (XVT.isVector())
787       return false;
788 
789     auto VTIsOk = [](EVT VT) -> bool {
790       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
791              VT == MVT::i64;
792     };
793 
794     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
795     // XVT will be larger than KeptBitsVT.
796     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
797     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
798   }
799 
800   bool preferIncOfAddToSubOfNot(EVT VT) const override;
801 
802   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
803 
804   bool hasBitPreservingFPLogic(EVT VT) const override {
805     // FIXME: Is this always true? It should be true for vectors at least.
806     return VT == MVT::f32 || VT == MVT::f64;
807   }
808 
809   bool supportSplitCSR(MachineFunction *MF) const override {
810     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
811            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
812   }
813   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
814   void insertCopiesSplitCSR(
815       MachineBasicBlock *Entry,
816       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
817 
818   bool supportSwiftError() const override {
819     return true;
820   }
821 
822   /// Enable aggressive FMA fusion on targets that want it.
823   bool enableAggressiveFMAFusion(EVT VT) const override;
824 
825   /// Returns the size of the platform's va_list object.
826   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
827 
828   /// Returns true if \p VecTy is a legal interleaved access type. This
829   /// function checks the vector element type and the overall width of the
830   /// vector.
831   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
832                                     bool &UseScalable) const;
833 
834   /// Returns the number of interleaved accesses that will be generated when
835   /// lowering accesses of the given type.
836   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
837                                      bool UseScalable) const;
838 
839   MachineMemOperand::Flags getTargetMMOFlags(
840     const Instruction &I) const override;
841 
842   bool functionArgumentNeedsConsecutiveRegisters(
843       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
844       const DataLayout &DL) const override;
845 
846   /// Used for exception handling on Win64.
847   bool needsFixedCatchObjects() const override;
848 
849   bool fallBackToDAGISel(const Instruction &Inst) const override;
850 
851   /// SVE code generation for fixed length vectors does not custom lower
852   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
853   /// merge. However, merging them creates a BUILD_VECTOR that is just as
854   /// illegal as the original, thus leading to an infinite legalisation loop.
855   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
856   /// vector types this override can be removed.
857   bool mergeStoresAfterLegalization(EVT VT) const override;
858 
859   // If the platform/function should have a redzone, return the size in bytes.
860   unsigned getRedZoneSize(const Function &F) const {
861     if (F.hasFnAttribute(Attribute::NoRedZone))
862       return 0;
863     return 128;
864   }
865 
866   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
867   EVT getPromotedVTForPredicate(EVT VT) const;
868 
869   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
870                              bool AllowUnknown = false) const override;
871 
872   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
873 
874 private:
875   /// Keep a pointer to the AArch64Subtarget around so that we can
876   /// make the right decision when generating code for different targets.
877   const AArch64Subtarget *Subtarget;
878 
879   bool isExtFreeImpl(const Instruction *Ext) const override;
880 
881   void addTypeForNEON(MVT VT);
882   void addTypeForFixedLengthSVE(MVT VT);
883   void addDRTypeForNEON(MVT VT);
884   void addQRTypeForNEON(MVT VT);
885 
886   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
887                                bool isVarArg,
888                                const SmallVectorImpl<ISD::InputArg> &Ins,
889                                const SDLoc &DL, SelectionDAG &DAG,
890                                SmallVectorImpl<SDValue> &InVals) const override;
891 
892   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
893                     SmallVectorImpl<SDValue> &InVals) const override;
894 
895   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
896                           CallingConv::ID CallConv, bool isVarArg,
897                           const SmallVectorImpl<CCValAssign> &RVLocs,
898                           const SDLoc &DL, SelectionDAG &DAG,
899                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
900                           SDValue ThisVal) const;
901 
902   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
903   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
904   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
905   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
906 
907   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
908   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
909 
910   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
911 
912   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
913   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
914 
915   bool
916   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
917 
918   /// Finds the incoming stack arguments which overlap the given fixed stack
919   /// object and incorporates their load into the current chain. This prevents
920   /// an upcoming store from clobbering the stack argument before it's used.
921   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
922                               MachineFrameInfo &MFI, int ClobberedFI) const;
923 
924   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
925 
926   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
927                            SDValue &Chain) const;
928 
929   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
930                       bool isVarArg,
931                       const SmallVectorImpl<ISD::OutputArg> &Outs,
932                       LLVMContext &Context) const override;
933 
934   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
935                       const SmallVectorImpl<ISD::OutputArg> &Outs,
936                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
937                       SelectionDAG &DAG) const override;
938 
939   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
940                         unsigned Flag) const;
941   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
942                         unsigned Flag) const;
943   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
944                         unsigned Flag) const;
945   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
946                         unsigned Flag) const;
947   template <class NodeTy>
948   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
949   template <class NodeTy>
950   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
951   template <class NodeTy>
952   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
953   template <class NodeTy>
954   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
955   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
956   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
957   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
959   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
960   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
961                                const SDLoc &DL, SelectionDAG &DAG) const;
962   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
963                                  SelectionDAG &DAG) const;
964   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
966   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
967   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
970                          SDValue TVal, SDValue FVal, const SDLoc &dl,
971                          SelectionDAG &DAG) const;
972   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
980   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
981   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
983   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
984   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
985   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
986   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
989   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
990   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
991   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
992   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
993   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
994   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
995                               unsigned NewOp) const;
996   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
997   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
998   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
999   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1000   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1001   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1002   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1003   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1004   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1005   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1006   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1007   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1008   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1009   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1010   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1011   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1012   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1013   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1014   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1015   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1016   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1018   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1019   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1020   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1021   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1022   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1023   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1024   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1025   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1026   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
1027   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1028   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1029   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1030                                          SDValue &Size,
1031                                          SelectionDAG &DAG) const;
1032   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
1033                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
1034 
1035   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1036                                                SelectionDAG &DAG) const;
1037   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1038                                                SelectionDAG &DAG) const;
1039   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1040   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1041   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1042   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1043   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1044                               SelectionDAG &DAG) const;
1045   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1046   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1047   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1048   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1049                                             SelectionDAG &DAG) const;
1050   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1051                                               SelectionDAG &DAG) const;
1052   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1053   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1054   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1055   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1056                                              SelectionDAG &DAG) const;
1057   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1058   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1059   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1060   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1061   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1062                                               SelectionDAG &DAG) const;
1063 
1064   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1065                         SmallVectorImpl<SDNode *> &Created) const override;
1066   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1067                         SmallVectorImpl<SDNode *> &Created) const override;
1068   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1069                           int &ExtraSteps, bool &UseOneConst,
1070                           bool Reciprocal) const override;
1071   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1072                            int &ExtraSteps) const override;
1073   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1074                            const DenormalMode &Mode) const override;
1075   SDValue getSqrtResultForDenormInput(SDValue Operand,
1076                                       SelectionDAG &DAG) const override;
1077   unsigned combineRepeatedFPDivisors() const override;
1078 
1079   ConstraintType getConstraintType(StringRef Constraint) const override;
1080   Register getRegisterByName(const char* RegName, LLT VT,
1081                              const MachineFunction &MF) const override;
1082 
1083   /// Examine constraint string and operand type and determine a weight value.
1084   /// The operand object must already have been set up with the operand type.
1085   ConstraintWeight
1086   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1087                                  const char *constraint) const override;
1088 
1089   std::pair<unsigned, const TargetRegisterClass *>
1090   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1091                                StringRef Constraint, MVT VT) const override;
1092 
1093   const char *LowerXConstraint(EVT ConstraintVT) const override;
1094 
1095   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1096                                     std::vector<SDValue> &Ops,
1097                                     SelectionDAG &DAG) const override;
1098 
1099   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1100     if (ConstraintCode == "Q")
1101       return InlineAsm::Constraint_Q;
1102     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1103     //        followed by llvm_unreachable so we'll leave them unimplemented in
1104     //        the backend for now.
1105     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1106   }
1107 
1108   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1109   bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
1110   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1111   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1112   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1113   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1114                               ISD::MemIndexedMode &AM, bool &IsInc,
1115                               SelectionDAG &DAG) const;
1116   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1117                                  ISD::MemIndexedMode &AM,
1118                                  SelectionDAG &DAG) const override;
1119   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1120                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1121                                   SelectionDAG &DAG) const override;
1122 
1123   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1124                           SelectionDAG &DAG) const override;
1125   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1126                              SelectionDAG &DAG) const;
1127   void ReplaceExtractSubVectorResults(SDNode *N,
1128                                       SmallVectorImpl<SDValue> &Results,
1129                                       SelectionDAG &DAG) const;
1130 
1131   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1132 
1133   void finalizeLowering(MachineFunction &MF) const override;
1134 
1135   bool shouldLocalize(const MachineInstr &MI,
1136                       const TargetTransformInfo *TTI) const override;
1137 
1138   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1139                                          const APInt &OriginalDemandedBits,
1140                                          const APInt &OriginalDemandedElts,
1141                                          KnownBits &Known,
1142                                          TargetLoweringOpt &TLO,
1143                                          unsigned Depth) const override;
1144 
1145   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1146 
1147   // Normally SVE is only used for byte size vectors that do not fit within a
1148   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1149   // used for 64bit and 128bit vectors as well.
1150   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1151 
1152   // With the exception of data-predicate transitions, no instructions are
1153   // required to cast between legal scalable vector types. However:
1154   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1155   //     is not universally useable.
1156   //  2. Most unpacked integer types are not legal and thus integer extends
1157   //     cannot be used to convert between unpacked and packed types.
1158   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1159   // to transition between unpacked and packed types of the same element type,
1160   // with BITCAST used otherwise.
1161   // This function does not handle predicate bitcasts.
1162   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1163 
1164   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
1165                                               LLT Ty2) const override;
1166 };
1167 
1168 namespace AArch64 {
1169 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1170                          const TargetLibraryInfo *libInfo);
1171 } // end namespace AArch64
1172 
1173 } // end namespace llvm
1174 
1175 #endif
1176