xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision 02e9120893770924227138ba49df1edb3896112a)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "Utils/AArch64SMEAttributes.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Instruction.h"
25 
26 namespace llvm {
27 
28 namespace AArch64ISD {
29 
30 // For predicated nodes where the result is a vector, the operation is
31 // controlled by a governing predicate and the inactive lanes are explicitly
32 // defined with a value, please stick the following naming convention:
33 //
34 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35 //                        to source operand OP<n>.
36 //
37 //    _MERGE_ZERO         The result value is a vector with inactive lanes
38 //                        actively zeroed.
39 //
40 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41 //                        to the last source operand which only purpose is being
42 //                        a passthru value.
43 //
44 // For other cases where no explicit action is needed to set the inactive lanes,
45 // or when the result is not a vector and it is needed or helpful to
46 // distinguish a node from similar unpredicated nodes, use:
47 //
48 //    _PRED
49 //
50 enum NodeType : unsigned {
51   FIRST_NUMBER = ISD::BUILTIN_OP_END,
52   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53   CALL,         // Function call.
54 
55   // Pseudo for a OBJC call that gets emitted together with a special `mov
56   // x29, x29` marker instruction.
57   CALL_RVMARKER,
58 
59   CALL_BTI, // Function call followed by a BTI instruction.
60 
61   // Essentially like a normal COPY that works on GPRs, but cannot be
62   // rematerialised by passes like the simple register coalescer. It's
63   // required for SME when lowering calls because we cannot allow frame
64   // index calculations using addvl to slip in between the smstart/smstop
65   // and the bl instruction. The scalable vector length may change across
66   // the smstart/smstop boundary.
67   OBSCURE_COPY,
68   SMSTART,
69   SMSTOP,
70   RESTORE_ZA,
71 
72   // Produces the full sequence of instructions for getting the thread pointer
73   // offset of a variable into X0, using the TLSDesc model.
74   TLSDESC_CALLSEQ,
75   ADRP,     // Page address of a TargetGlobalAddress operand.
76   ADR,      // ADR
77   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
78   LOADgot,  // Load from automatically generated descriptor (e.g. Global
79             // Offset Table, TLS record).
80   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
81   BRCOND,   // Conditional branch instruction; "b.cond".
82   CSEL,
83   CSINV, // Conditional select invert.
84   CSNEG, // Conditional select negate.
85   CSINC, // Conditional select increment.
86 
87   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88   // ELF.
89   THREAD_POINTER,
90   ADC,
91   SBC, // adc, sbc instructions
92 
93   // Predicated instructions where inactive lanes produce undefined results.
94   ABDS_PRED,
95   ABDU_PRED,
96   FADD_PRED,
97   FDIV_PRED,
98   FMA_PRED,
99   FMAX_PRED,
100   FMAXNM_PRED,
101   FMIN_PRED,
102   FMINNM_PRED,
103   FMUL_PRED,
104   FSUB_PRED,
105   HADDS_PRED,
106   HADDU_PRED,
107   MUL_PRED,
108   MULHS_PRED,
109   MULHU_PRED,
110   RHADDS_PRED,
111   RHADDU_PRED,
112   SDIV_PRED,
113   SHL_PRED,
114   SMAX_PRED,
115   SMIN_PRED,
116   SRA_PRED,
117   SRL_PRED,
118   UDIV_PRED,
119   UMAX_PRED,
120   UMIN_PRED,
121 
122   // Unpredicated vector instructions
123   BIC,
124 
125   SRAD_MERGE_OP1,
126 
127   // Predicated instructions with the result of inactive lanes provided by the
128   // last operand.
129   FABS_MERGE_PASSTHRU,
130   FCEIL_MERGE_PASSTHRU,
131   FFLOOR_MERGE_PASSTHRU,
132   FNEARBYINT_MERGE_PASSTHRU,
133   FNEG_MERGE_PASSTHRU,
134   FRECPX_MERGE_PASSTHRU,
135   FRINT_MERGE_PASSTHRU,
136   FROUND_MERGE_PASSTHRU,
137   FROUNDEVEN_MERGE_PASSTHRU,
138   FSQRT_MERGE_PASSTHRU,
139   FTRUNC_MERGE_PASSTHRU,
140   FP_ROUND_MERGE_PASSTHRU,
141   FP_EXTEND_MERGE_PASSTHRU,
142   UINT_TO_FP_MERGE_PASSTHRU,
143   SINT_TO_FP_MERGE_PASSTHRU,
144   FCVTZU_MERGE_PASSTHRU,
145   FCVTZS_MERGE_PASSTHRU,
146   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
147   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
148   ABS_MERGE_PASSTHRU,
149   NEG_MERGE_PASSTHRU,
150 
151   SETCC_MERGE_ZERO,
152 
153   // Arithmetic instructions which write flags.
154   ADDS,
155   SUBS,
156   ADCS,
157   SBCS,
158   ANDS,
159 
160   // Conditional compares. Operands: left,right,falsecc,cc,flags
161   CCMP,
162   CCMN,
163   FCCMP,
164 
165   // Floating point comparison
166   FCMP,
167 
168   // Scalar extract
169   EXTR,
170 
171   // Scalar-to-vector duplication
172   DUP,
173   DUPLANE8,
174   DUPLANE16,
175   DUPLANE32,
176   DUPLANE64,
177   DUPLANE128,
178 
179   // Vector immedate moves
180   MOVI,
181   MOVIshift,
182   MOVIedit,
183   MOVImsl,
184   FMOV,
185   MVNIshift,
186   MVNImsl,
187 
188   // Vector immediate ops
189   BICi,
190   ORRi,
191 
192   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
193   // element must be identical.
194   BSP,
195 
196   // Vector shuffles
197   ZIP1,
198   ZIP2,
199   UZP1,
200   UZP2,
201   TRN1,
202   TRN2,
203   REV16,
204   REV32,
205   REV64,
206   EXT,
207   SPLICE,
208 
209   // Vector shift by scalar
210   VSHL,
211   VLSHR,
212   VASHR,
213 
214   // Vector shift by scalar (again)
215   SQSHL_I,
216   UQSHL_I,
217   SQSHLU_I,
218   SRSHR_I,
219   URSHR_I,
220 
221   // Vector shift by constant and insert
222   VSLI,
223   VSRI,
224 
225   // Vector comparisons
226   CMEQ,
227   CMGE,
228   CMGT,
229   CMHI,
230   CMHS,
231   FCMEQ,
232   FCMGE,
233   FCMGT,
234 
235   // Vector zero comparisons
236   CMEQz,
237   CMGEz,
238   CMGTz,
239   CMLEz,
240   CMLTz,
241   FCMEQz,
242   FCMGEz,
243   FCMGTz,
244   FCMLEz,
245   FCMLTz,
246 
247   // Vector across-lanes addition
248   // Only the lower result lane is defined.
249   SADDV,
250   UADDV,
251 
252   // Add Pairwise of two vectors
253   ADDP,
254   // Add Long Pairwise
255   SADDLP,
256   UADDLP,
257 
258   // udot/sdot instructions
259   UDOT,
260   SDOT,
261 
262   // Vector across-lanes min/max
263   // Only the lower result lane is defined.
264   SMINV,
265   UMINV,
266   SMAXV,
267   UMAXV,
268 
269   SADDV_PRED,
270   UADDV_PRED,
271   SMAXV_PRED,
272   UMAXV_PRED,
273   SMINV_PRED,
274   UMINV_PRED,
275   ORV_PRED,
276   EORV_PRED,
277   ANDV_PRED,
278 
279   // Vector bitwise insertion
280   BIT,
281 
282   // Compare-and-branch
283   CBZ,
284   CBNZ,
285   TBZ,
286   TBNZ,
287 
288   // Tail calls
289   TC_RETURN,
290 
291   // Custom prefetch handling
292   PREFETCH,
293 
294   // {s|u}int to FP within a FP register.
295   SITOF,
296   UITOF,
297 
298   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
299   /// world w.r.t vectors; which causes additional REV instructions to be
300   /// generated to compensate for the byte-swapping. But sometimes we do
301   /// need to re-interpret the data in SIMD vector registers in big-endian
302   /// mode without emitting such REV instructions.
303   NVCAST,
304 
305   MRS, // MRS, also sets the flags via a glue.
306 
307   SMULL,
308   UMULL,
309 
310   PMULL,
311 
312   // Reciprocal estimates and steps.
313   FRECPE,
314   FRECPS,
315   FRSQRTE,
316   FRSQRTS,
317 
318   SUNPKHI,
319   SUNPKLO,
320   UUNPKHI,
321   UUNPKLO,
322 
323   CLASTA_N,
324   CLASTB_N,
325   LASTA,
326   LASTB,
327   TBL,
328 
329   // Floating-point reductions.
330   FADDA_PRED,
331   FADDV_PRED,
332   FMAXV_PRED,
333   FMAXNMV_PRED,
334   FMINV_PRED,
335   FMINNMV_PRED,
336 
337   INSR,
338   PTEST,
339   PTEST_ANY,
340   PTRUE,
341 
342   BITREVERSE_MERGE_PASSTHRU,
343   BSWAP_MERGE_PASSTHRU,
344   REVH_MERGE_PASSTHRU,
345   REVW_MERGE_PASSTHRU,
346   CTLZ_MERGE_PASSTHRU,
347   CTPOP_MERGE_PASSTHRU,
348   DUP_MERGE_PASSTHRU,
349   INDEX_VECTOR,
350 
351   // Cast between vectors of the same element type but differ in length.
352   REINTERPRET_CAST,
353 
354   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
355   LS64_BUILD,
356   LS64_EXTRACT,
357 
358   LD1_MERGE_ZERO,
359   LD1S_MERGE_ZERO,
360   LDNF1_MERGE_ZERO,
361   LDNF1S_MERGE_ZERO,
362   LDFF1_MERGE_ZERO,
363   LDFF1S_MERGE_ZERO,
364   LD1RQ_MERGE_ZERO,
365   LD1RO_MERGE_ZERO,
366 
367   // Structured loads.
368   SVE_LD2_MERGE_ZERO,
369   SVE_LD3_MERGE_ZERO,
370   SVE_LD4_MERGE_ZERO,
371 
372   // Unsigned gather loads.
373   GLD1_MERGE_ZERO,
374   GLD1_SCALED_MERGE_ZERO,
375   GLD1_UXTW_MERGE_ZERO,
376   GLD1_SXTW_MERGE_ZERO,
377   GLD1_UXTW_SCALED_MERGE_ZERO,
378   GLD1_SXTW_SCALED_MERGE_ZERO,
379   GLD1_IMM_MERGE_ZERO,
380 
381   // Signed gather loads
382   GLD1S_MERGE_ZERO,
383   GLD1S_SCALED_MERGE_ZERO,
384   GLD1S_UXTW_MERGE_ZERO,
385   GLD1S_SXTW_MERGE_ZERO,
386   GLD1S_UXTW_SCALED_MERGE_ZERO,
387   GLD1S_SXTW_SCALED_MERGE_ZERO,
388   GLD1S_IMM_MERGE_ZERO,
389 
390   // Unsigned gather loads.
391   GLDFF1_MERGE_ZERO,
392   GLDFF1_SCALED_MERGE_ZERO,
393   GLDFF1_UXTW_MERGE_ZERO,
394   GLDFF1_SXTW_MERGE_ZERO,
395   GLDFF1_UXTW_SCALED_MERGE_ZERO,
396   GLDFF1_SXTW_SCALED_MERGE_ZERO,
397   GLDFF1_IMM_MERGE_ZERO,
398 
399   // Signed gather loads.
400   GLDFF1S_MERGE_ZERO,
401   GLDFF1S_SCALED_MERGE_ZERO,
402   GLDFF1S_UXTW_MERGE_ZERO,
403   GLDFF1S_SXTW_MERGE_ZERO,
404   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
405   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
406   GLDFF1S_IMM_MERGE_ZERO,
407 
408   // Non-temporal gather loads
409   GLDNT1_MERGE_ZERO,
410   GLDNT1_INDEX_MERGE_ZERO,
411   GLDNT1S_MERGE_ZERO,
412 
413   // Contiguous masked store.
414   ST1_PRED,
415 
416   // Scatter store
417   SST1_PRED,
418   SST1_SCALED_PRED,
419   SST1_UXTW_PRED,
420   SST1_SXTW_PRED,
421   SST1_UXTW_SCALED_PRED,
422   SST1_SXTW_SCALED_PRED,
423   SST1_IMM_PRED,
424 
425   // Non-temporal scatter store
426   SSTNT1_PRED,
427   SSTNT1_INDEX_PRED,
428 
429   // SME
430   RDSVL,
431   REVD_MERGE_PASSTHRU,
432 
433   // Asserts that a function argument (i32) is zero-extended to i8 by
434   // the caller
435   ASSERT_ZEXT_BOOL,
436 
437   // 128-bit system register accesses
438   // lo64, hi64, chain = MRRS(chain, sysregname)
439   MRRS,
440   // chain = MSRR(chain, sysregname, lo64, hi64)
441   MSRR,
442 
443   // Strict (exception-raising) floating point comparison
444   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
445   STRICT_FCMPE,
446 
447   // NEON Load/Store with post-increment base updates
448   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
449   LD3post,
450   LD4post,
451   ST2post,
452   ST3post,
453   ST4post,
454   LD1x2post,
455   LD1x3post,
456   LD1x4post,
457   ST1x2post,
458   ST1x3post,
459   ST1x4post,
460   LD1DUPpost,
461   LD2DUPpost,
462   LD3DUPpost,
463   LD4DUPpost,
464   LD1LANEpost,
465   LD2LANEpost,
466   LD3LANEpost,
467   LD4LANEpost,
468   ST2LANEpost,
469   ST3LANEpost,
470   ST4LANEpost,
471 
472   STG,
473   STZG,
474   ST2G,
475   STZ2G,
476 
477   LDP,
478   LDIAPP,
479   LDNP,
480   STP,
481   STILP,
482   STNP,
483 
484   // Memory Operations
485   MOPS_MEMSET,
486   MOPS_MEMSET_TAGGING,
487   MOPS_MEMCOPY,
488   MOPS_MEMMOVE,
489 };
490 
491 } // end namespace AArch64ISD
492 
493 namespace AArch64 {
494 /// Possible values of current rounding mode, which is specified in bits
495 /// 23:22 of FPCR.
496 enum Rounding {
497   RN = 0,    // Round to Nearest
498   RP = 1,    // Round towards Plus infinity
499   RM = 2,    // Round towards Minus infinity
500   RZ = 3,    // Round towards Zero
501   rmMask = 3 // Bit mask selecting rounding mode
502 };
503 
504 // Bit position of rounding mode bits in FPCR.
505 const unsigned RoundingBitsPos = 22;
506 
507 // Registers used to pass function arguments.
508 const ArrayRef<MCPhysReg> getGPRArgRegs();
509 const ArrayRef<MCPhysReg> getFPRArgRegs();
510 
511 } // namespace AArch64
512 
513 class AArch64Subtarget;
514 
515 class AArch64TargetLowering : public TargetLowering {
516 public:
517   explicit AArch64TargetLowering(const TargetMachine &TM,
518                                  const AArch64Subtarget &STI);
519 
520   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
521   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
522   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
523                            SDValue N1) const override;
524 
525   /// Selects the correct CCAssignFn for a given CallingConvention value.
526   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
527 
528   /// Selects the correct CCAssignFn for a given CallingConvention value.
529   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
530 
531   /// Determine which of the bits specified in Mask are known to be either zero
532   /// or one and return them in the KnownZero/KnownOne bitsets.
533   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
534                                      const APInt &DemandedElts,
535                                      const SelectionDAG &DAG,
536                                      unsigned Depth = 0) const override;
537 
538   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
539                                            const APInt &DemandedElts,
540                                            const SelectionDAG &DAG,
541                                            unsigned Depth) const override;
542 
543   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
544     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
545     // *DAG* representation of pointers will always be 64-bits. They will be
546     // truncated and extended when transferred to memory, but the 64-bit DAG
547     // allows us to use AArch64's addressing modes much more easily.
548     return MVT::getIntegerVT(64);
549   }
550 
551   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
552                                     const APInt &DemandedElts,
553                                     TargetLoweringOpt &TLO) const override;
554 
555   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
556 
557   /// Returns true if the target allows unaligned memory accesses of the
558   /// specified type.
559   bool allowsMisalignedMemoryAccesses(
560       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
561       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
562       unsigned *Fast = nullptr) const override;
563   /// LLT variant.
564   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
565                                       Align Alignment,
566                                       MachineMemOperand::Flags Flags,
567                                       unsigned *Fast = nullptr) const override;
568 
569   /// Provide custom lowering hooks for some operations.
570   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
571 
572   const char *getTargetNodeName(unsigned Opcode) const override;
573 
574   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
575 
576   /// This method returns a target specific FastISel object, or null if the
577   /// target does not support "fast" ISel.
578   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
579                            const TargetLibraryInfo *libInfo) const override;
580 
581   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
582 
583   bool isFPImmLegal(const APFloat &Imm, EVT VT,
584                     bool ForCodeSize) const override;
585 
586   /// Return true if the given shuffle mask can be codegen'd directly, or if it
587   /// should be stack expanded.
588   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
589 
590   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
591   /// shuffle mask can be codegen'd directly.
592   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
593 
594   /// Return the ISD::SETCC ValueType.
595   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
596                          EVT VT) const override;
597 
598   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
599 
600   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
601                                   MachineBasicBlock *BB) const;
602 
603   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
604                                            MachineBasicBlock *BB) const;
605 
606   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
607                                   MachineInstr &MI,
608                                   MachineBasicBlock *BB) const;
609   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
610   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
611                                  MachineInstr &MI, MachineBasicBlock *BB,
612                                  bool HasTile) const;
613   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
614 
615   MachineBasicBlock *
616   EmitInstrWithCustomInserter(MachineInstr &MI,
617                               MachineBasicBlock *MBB) const override;
618 
619   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
620                           MachineFunction &MF,
621                           unsigned Intrinsic) const override;
622 
623   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
624                              EVT NewVT) const override;
625 
626   bool shouldRemoveRedundantExtend(SDValue Op) const override;
627 
628   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
629   bool isTruncateFree(EVT VT1, EVT VT2) const override;
630 
631   bool isProfitableToHoist(Instruction *I) const override;
632 
633   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
634   bool isZExtFree(EVT VT1, EVT VT2) const override;
635   bool isZExtFree(SDValue Val, EVT VT2) const override;
636 
637   bool shouldSinkOperands(Instruction *I,
638                           SmallVectorImpl<Use *> &Ops) const override;
639 
640   bool optimizeExtendOrTruncateConversion(
641       Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
642 
643   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
644 
645   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
646 
647   bool lowerInterleavedLoad(LoadInst *LI,
648                             ArrayRef<ShuffleVectorInst *> Shuffles,
649                             ArrayRef<unsigned> Indices,
650                             unsigned Factor) const override;
651   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
652                              unsigned Factor) const override;
653 
654   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
655                                         LoadInst *LI) const override;
656 
657   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
658                                        StoreInst *SI) const override;
659 
660   bool isLegalAddImmediate(int64_t) const override;
661   bool isLegalICmpImmediate(int64_t) const override;
662 
663   bool isMulAddWithConstProfitable(SDValue AddNode,
664                                    SDValue ConstNode) const override;
665 
666   bool shouldConsiderGEPOffsetSplit() const override;
667 
668   EVT getOptimalMemOpType(const MemOp &Op,
669                           const AttributeList &FuncAttributes) const override;
670 
671   LLT getOptimalMemOpLLT(const MemOp &Op,
672                          const AttributeList &FuncAttributes) const override;
673 
674   /// Return true if the addressing mode represented by AM is legal for this
675   /// target, for a load/store of the specified type.
676   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
677                              unsigned AS,
678                              Instruction *I = nullptr) const override;
679 
680   /// Return true if an FMA operation is faster than a pair of fmul and fadd
681   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
682   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
683   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
684                                   EVT VT) const override;
685   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
686 
687   bool generateFMAsInMachineCombiner(EVT VT,
688                                      CodeGenOpt::Level OptLevel) const override;
689 
690   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
691   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
692 
693   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
694   bool isDesirableToCommuteWithShift(const SDNode *N,
695                                      CombineLevel Level) const override;
696 
697   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
698   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
699 
700   /// Return true if it is profitable to fold a pair of shifts into a mask.
701   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
702                                          CombineLevel Level) const override;
703 
704   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
705                                             EVT VT) const override;
706 
707   /// Returns true if it is beneficial to convert a load of a constant
708   /// to just the constant itself.
709   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
710                                          Type *Ty) const override;
711 
712   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
713   /// with this index.
714   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
715                                unsigned Index) const override;
716 
717   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
718                             bool MathUsed) const override {
719     // Using overflow ops for overflow checks only should beneficial on
720     // AArch64.
721     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
722   }
723 
724   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
725                         AtomicOrdering Ord) const override;
726   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
727                               AtomicOrdering Ord) const override;
728 
729   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
730 
731   bool isOpSuitableForLDPSTP(const Instruction *I) const;
732   bool isOpSuitableForLSE128(const Instruction *I) const;
733   bool isOpSuitableForRCPC3(const Instruction *I) const;
734   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
735   bool
736   shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
737 
738   TargetLoweringBase::AtomicExpansionKind
739   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
740   TargetLoweringBase::AtomicExpansionKind
741   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
742   TargetLoweringBase::AtomicExpansionKind
743   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
744 
745   TargetLoweringBase::AtomicExpansionKind
746   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
747 
748   bool useLoadStackGuardNode() const override;
749   TargetLoweringBase::LegalizeTypeAction
750   getPreferredVectorAction(MVT VT) const override;
751 
752   /// If the target has a standard location for the stack protector cookie,
753   /// returns the address of that location. Otherwise, returns nullptr.
754   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
755 
756   void insertSSPDeclarations(Module &M) const override;
757   Value *getSDagStackGuard(const Module &M) const override;
758   Function *getSSPStackGuardCheck(const Module &M) const override;
759 
760   /// If the target has a standard location for the unsafe stack pointer,
761   /// returns the address of that location. Otherwise, returns nullptr.
762   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
763 
764   /// If a physical register, this returns the register that receives the
765   /// exception address on entry to an EH pad.
766   Register
767   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
768     // FIXME: This is a guess. Has this been defined yet?
769     return AArch64::X0;
770   }
771 
772   /// If a physical register, this returns the register that receives the
773   /// exception typeid on entry to a landing pad.
774   Register
775   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
776     // FIXME: This is a guess. Has this been defined yet?
777     return AArch64::X1;
778   }
779 
780   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
781 
782   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
783                         const MachineFunction &MF) const override {
784     // Do not merge to float value size (128 bytes) if no implicit
785     // float attribute is set.
786 
787     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
788 
789     if (NoFloat)
790       return (MemVT.getSizeInBits() <= 64);
791     return true;
792   }
793 
794   bool isCheapToSpeculateCttz(Type *) const override {
795     return true;
796   }
797 
798   bool isCheapToSpeculateCtlz(Type *) const override {
799     return true;
800   }
801 
802   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
803 
804   bool hasAndNotCompare(SDValue V) const override {
805     // We can use bics for any scalar.
806     return V.getValueType().isScalarInteger();
807   }
808 
809   bool hasAndNot(SDValue Y) const override {
810     EVT VT = Y.getValueType();
811 
812     if (!VT.isVector())
813       return hasAndNotCompare(Y);
814 
815     TypeSize TS = VT.getSizeInBits();
816     // TODO: We should be able to use bic/bif too for SVE.
817     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
818   }
819 
820   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
821       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
822       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
823       SelectionDAG &DAG) const override;
824 
825   ShiftLegalizationStrategy
826   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
827                                      unsigned ExpansionFactor) const override;
828 
829   bool shouldTransformSignedTruncationCheck(EVT XVT,
830                                             unsigned KeptBits) const override {
831     // For vectors, we don't have a preference..
832     if (XVT.isVector())
833       return false;
834 
835     auto VTIsOk = [](EVT VT) -> bool {
836       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
837              VT == MVT::i64;
838     };
839 
840     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
841     // XVT will be larger than KeptBitsVT.
842     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
843     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
844   }
845 
846   bool preferIncOfAddToSubOfNot(EVT VT) const override;
847 
848   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
849 
850   bool isComplexDeinterleavingSupported() const override;
851   bool isComplexDeinterleavingOperationSupported(
852       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
853 
854   Value *createComplexDeinterleavingIR(
855       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
856       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
857       Value *Accumulator = nullptr) const override;
858 
859   bool supportSplitCSR(MachineFunction *MF) const override {
860     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
861            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
862   }
863   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
864   void insertCopiesSplitCSR(
865       MachineBasicBlock *Entry,
866       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
867 
868   bool supportSwiftError() const override {
869     return true;
870   }
871 
872   bool supportKCFIBundles() const override { return true; }
873 
874   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
875                               MachineBasicBlock::instr_iterator &MBBI,
876                               const TargetInstrInfo *TII) const override;
877 
878   /// Enable aggressive FMA fusion on targets that want it.
879   bool enableAggressiveFMAFusion(EVT VT) const override;
880 
881   /// Returns the size of the platform's va_list object.
882   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
883 
884   /// Returns true if \p VecTy is a legal interleaved access type. This
885   /// function checks the vector element type and the overall width of the
886   /// vector.
887   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
888                                     bool &UseScalable) const;
889 
890   /// Returns the number of interleaved accesses that will be generated when
891   /// lowering accesses of the given type.
892   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
893                                      bool UseScalable) const;
894 
895   MachineMemOperand::Flags getTargetMMOFlags(
896     const Instruction &I) const override;
897 
898   bool functionArgumentNeedsConsecutiveRegisters(
899       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
900       const DataLayout &DL) const override;
901 
902   /// Used for exception handling on Win64.
903   bool needsFixedCatchObjects() const override;
904 
905   bool fallBackToDAGISel(const Instruction &Inst) const override;
906 
907   /// SVE code generation for fixed length vectors does not custom lower
908   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
909   /// merge. However, merging them creates a BUILD_VECTOR that is just as
910   /// illegal as the original, thus leading to an infinite legalisation loop.
911   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
912   /// vector types this override can be removed.
913   bool mergeStoresAfterLegalization(EVT VT) const override;
914 
915   // If the platform/function should have a redzone, return the size in bytes.
916   unsigned getRedZoneSize(const Function &F) const {
917     if (F.hasFnAttribute(Attribute::NoRedZone))
918       return 0;
919     return 128;
920   }
921 
922   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
923   EVT getPromotedVTForPredicate(EVT VT) const;
924 
925   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
926                              bool AllowUnknown = false) const override;
927 
928   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
929 
930   /// If a change in streaming mode is required on entry to/return from a
931   /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
932   /// \p Entry tells whether this is before/after the Call, which is necessary
933   /// because PSTATE.SM is only queried once.
934   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
935                               SDValue Chain, SDValue InGlue,
936                               SDValue PStateSM, bool Entry) const;
937 
938   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
939 
940   // Normally SVE is only used for byte size vectors that do not fit within a
941   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
942   // used for 64bit and 128bit vectors as well.
943   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
944 
945 private:
946   /// Keep a pointer to the AArch64Subtarget around so that we can
947   /// make the right decision when generating code for different targets.
948   const AArch64Subtarget *Subtarget;
949 
950   bool isExtFreeImpl(const Instruction *Ext) const override;
951 
952   void addTypeForNEON(MVT VT);
953   void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE);
954   void addDRTypeForNEON(MVT VT);
955   void addQRTypeForNEON(MVT VT);
956 
957   unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
958                                   SelectionDAG &DAG) const;
959 
960   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
961                                bool isVarArg,
962                                const SmallVectorImpl<ISD::InputArg> &Ins,
963                                const SDLoc &DL, SelectionDAG &DAG,
964                                SmallVectorImpl<SDValue> &InVals) const override;
965 
966   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
967                     SmallVectorImpl<SDValue> &InVals) const override;
968 
969   SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
970                           CallingConv::ID CallConv, bool isVarArg,
971                           const SmallVectorImpl<CCValAssign> &RVLocs,
972                           const SDLoc &DL, SelectionDAG &DAG,
973                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
974                           SDValue ThisVal) const;
975 
976   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
980 
981   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
983 
984   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
985 
986   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
989 
990   bool
991   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
992 
993   /// Finds the incoming stack arguments which overlap the given fixed stack
994   /// object and incorporates their load into the current chain. This prevents
995   /// an upcoming store from clobbering the stack argument before it's used.
996   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
997                               MachineFrameInfo &MFI, int ClobberedFI) const;
998 
999   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1000 
1001   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1002                            SDValue &Chain) const;
1003 
1004   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1005                       bool isVarArg,
1006                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1007                       LLVMContext &Context) const override;
1008 
1009   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1010                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1011                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1012                       SelectionDAG &DAG) const override;
1013 
1014   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1015                         unsigned Flag) const;
1016   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1017                         unsigned Flag) const;
1018   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1019                         unsigned Flag) const;
1020   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1021                         unsigned Flag) const;
1022   template <class NodeTy>
1023   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1024   template <class NodeTy>
1025   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1026   template <class NodeTy>
1027   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1028   template <class NodeTy>
1029   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1030   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1031   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1032   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1033   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1034   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1035   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1036                                const SDLoc &DL, SelectionDAG &DAG) const;
1037   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1038                                  SelectionDAG &DAG) const;
1039   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1040   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1041   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1042   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1043   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1044   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1045   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1046                          SDValue TVal, SDValue FVal, const SDLoc &dl,
1047                          SelectionDAG &DAG) const;
1048   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1049   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1050   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1051   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1052   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1053   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1054   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1055   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1056   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1057   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1058   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1059   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1060   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1061   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1062   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1063   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1064   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1065   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1066   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1067   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1068   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1069   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1070   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1071                               unsigned NewOp) const;
1072   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1073   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1074   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1075   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1076   SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1077   SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1078   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1079   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1080   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1081   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1082   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1083   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1084   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1085   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1086   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1087   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1088   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1089   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1090   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1091   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1092   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1093   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1094   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1095   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1096   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1097   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1098   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1099   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1100   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1101   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1102   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1103   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1104   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
1105   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1106   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1107   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1108                                          SDValue &Size,
1109                                          SelectionDAG &DAG) const;
1110   SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1111 
1112   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1113                                                SelectionDAG &DAG) const;
1114   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1115                                                SelectionDAG &DAG) const;
1116   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1117   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1118   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1119   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1120   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1121                               SelectionDAG &DAG) const;
1122   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1123   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1124   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1125   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1126                                             SelectionDAG &DAG) const;
1127   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1128                                               SelectionDAG &DAG) const;
1129   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1130   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1131   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1132   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1133                                              SelectionDAG &DAG) const;
1134   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1135   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1136   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1137   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1138   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1139                                               SelectionDAG &DAG) const;
1140 
1141   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1142                         SmallVectorImpl<SDNode *> &Created) const override;
1143   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1144                         SmallVectorImpl<SDNode *> &Created) const override;
1145   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1146                           int &ExtraSteps, bool &UseOneConst,
1147                           bool Reciprocal) const override;
1148   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1149                            int &ExtraSteps) const override;
1150   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1151                            const DenormalMode &Mode) const override;
1152   SDValue getSqrtResultForDenormInput(SDValue Operand,
1153                                       SelectionDAG &DAG) const override;
1154   unsigned combineRepeatedFPDivisors() const override;
1155 
1156   ConstraintType getConstraintType(StringRef Constraint) const override;
1157   Register getRegisterByName(const char* RegName, LLT VT,
1158                              const MachineFunction &MF) const override;
1159 
1160   /// Examine constraint string and operand type and determine a weight value.
1161   /// The operand object must already have been set up with the operand type.
1162   ConstraintWeight
1163   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1164                                  const char *constraint) const override;
1165 
1166   std::pair<unsigned, const TargetRegisterClass *>
1167   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1168                                StringRef Constraint, MVT VT) const override;
1169 
1170   const char *LowerXConstraint(EVT ConstraintVT) const override;
1171 
1172   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1173                                     std::vector<SDValue> &Ops,
1174                                     SelectionDAG &DAG) const override;
1175 
1176   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1177     if (ConstraintCode == "Q")
1178       return InlineAsm::Constraint_Q;
1179     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1180     //        followed by llvm_unreachable so we'll leave them unimplemented in
1181     //        the backend for now.
1182     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1183   }
1184 
1185   /// Handle Lowering flag assembly outputs.
1186   SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1187                                       const SDLoc &DL,
1188                                       const AsmOperandInfo &Constraint,
1189                                       SelectionDAG &DAG) const override;
1190 
1191   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1192   bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
1193   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1194   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1195   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1196   bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1197                               SDValue &Offset, SelectionDAG &DAG) const;
1198   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1199                                  ISD::MemIndexedMode &AM,
1200                                  SelectionDAG &DAG) const override;
1201   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1202                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1203                                   SelectionDAG &DAG) const override;
1204 
1205   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1206                           SelectionDAG &DAG) const override;
1207   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1208                              SelectionDAG &DAG) const;
1209   void ReplaceExtractSubVectorResults(SDNode *N,
1210                                       SmallVectorImpl<SDValue> &Results,
1211                                       SelectionDAG &DAG) const;
1212 
1213   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1214 
1215   void finalizeLowering(MachineFunction &MF) const override;
1216 
1217   bool shouldLocalize(const MachineInstr &MI,
1218                       const TargetTransformInfo *TTI) const override;
1219 
1220   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1221                                          const APInt &OriginalDemandedBits,
1222                                          const APInt &OriginalDemandedElts,
1223                                          KnownBits &Known,
1224                                          TargetLoweringOpt &TLO,
1225                                          unsigned Depth) const override;
1226 
1227   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1228 
1229   // With the exception of data-predicate transitions, no instructions are
1230   // required to cast between legal scalable vector types. However:
1231   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1232   //     is not universally useable.
1233   //  2. Most unpacked integer types are not legal and thus integer extends
1234   //     cannot be used to convert between unpacked and packed types.
1235   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1236   // to transition between unpacked and packed types of the same element type,
1237   // with BITCAST used otherwise.
1238   // This function does not handle predicate bitcasts.
1239   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1240 
1241   // Returns the runtime value for PSTATE.SM. When the function is streaming-
1242   // compatible, this generates a call to __arm_sme_state.
1243   SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
1244                       SDLoc DL, EVT VT) const;
1245 
1246   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
1247                                               LLT Ty2) const override;
1248 
1249   bool preferScalarizeSplat(SDNode *N) const override;
1250 };
1251 
1252 namespace AArch64 {
1253 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1254                          const TargetLibraryInfo *libInfo);
1255 } // end namespace AArch64
1256 
1257 } // end namespace llvm
1258 
1259 #endif
1260