xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "Utils/AArch64SMEAttributes.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Instruction.h"
25 
26 namespace llvm {
27 
28 namespace AArch64ISD {
29 
30 // For predicated nodes where the result is a vector, the operation is
31 // controlled by a governing predicate and the inactive lanes are explicitly
32 // defined with a value, please stick the following naming convention:
33 //
34 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35 //                        to source operand OP<n>.
36 //
37 //    _MERGE_ZERO         The result value is a vector with inactive lanes
38 //                        actively zeroed.
39 //
40 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41 //                        to the last source operand which only purpose is being
42 //                        a passthru value.
43 //
44 // For other cases where no explicit action is needed to set the inactive lanes,
45 // or when the result is not a vector and it is needed or helpful to
46 // distinguish a node from similar unpredicated nodes, use:
47 //
48 //    _PRED
49 //
50 enum NodeType : unsigned {
51   FIRST_NUMBER = ISD::BUILTIN_OP_END,
52   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53   CALL,         // Function call.
54 
55   // Pseudo for a OBJC call that gets emitted together with a special `mov
56   // x29, x29` marker instruction.
57   CALL_RVMARKER,
58 
59   CALL_BTI, // Function call followed by a BTI instruction.
60 
61   // Function call, authenticating the callee value first:
62   // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
63   AUTH_CALL,
64   // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
65   // operands.
66   AUTH_TC_RETURN,
67 
68   // Authenticated variant of CALL_RVMARKER.
69   AUTH_CALL_RVMARKER,
70 
71   COALESCER_BARRIER,
72 
73   VG_SAVE,
74   VG_RESTORE,
75 
76   SMSTART,
77   SMSTOP,
78   RESTORE_ZA,
79   RESTORE_ZT,
80   SAVE_ZT,
81 
82   // A call with the callee in x16, i.e. "blr x16".
83   CALL_ARM64EC_TO_X64,
84 
85   // Produces the full sequence of instructions for getting the thread pointer
86   // offset of a variable into X0, using the TLSDesc model.
87   TLSDESC_CALLSEQ,
88   ADRP,     // Page address of a TargetGlobalAddress operand.
89   ADR,      // ADR
90   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
91   LOADgot,  // Load from automatically generated descriptor (e.g. Global
92             // Offset Table, TLS record).
93   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
94   BRCOND,   // Conditional branch instruction; "b.cond".
95   CSEL,
96   CSINV, // Conditional select invert.
97   CSNEG, // Conditional select negate.
98   CSINC, // Conditional select increment.
99 
100   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
101   // ELF.
102   THREAD_POINTER,
103   ADC,
104   SBC, // adc, sbc instructions
105 
106   // To avoid stack clash, allocation is performed by block and each block is
107   // probed.
108   PROBED_ALLOCA,
109 
110   // Predicated instructions where inactive lanes produce undefined results.
111   ABDS_PRED,
112   ABDU_PRED,
113   FADD_PRED,
114   FDIV_PRED,
115   FMA_PRED,
116   FMAX_PRED,
117   FMAXNM_PRED,
118   FMIN_PRED,
119   FMINNM_PRED,
120   FMUL_PRED,
121   FSUB_PRED,
122   HADDS_PRED,
123   HADDU_PRED,
124   MUL_PRED,
125   MULHS_PRED,
126   MULHU_PRED,
127   RHADDS_PRED,
128   RHADDU_PRED,
129   SDIV_PRED,
130   SHL_PRED,
131   SMAX_PRED,
132   SMIN_PRED,
133   SRA_PRED,
134   SRL_PRED,
135   UDIV_PRED,
136   UMAX_PRED,
137   UMIN_PRED,
138 
139   // Unpredicated vector instructions
140   BIC,
141 
142   SRAD_MERGE_OP1,
143 
144   // Predicated instructions with the result of inactive lanes provided by the
145   // last operand.
146   FABS_MERGE_PASSTHRU,
147   FCEIL_MERGE_PASSTHRU,
148   FFLOOR_MERGE_PASSTHRU,
149   FNEARBYINT_MERGE_PASSTHRU,
150   FNEG_MERGE_PASSTHRU,
151   FRECPX_MERGE_PASSTHRU,
152   FRINT_MERGE_PASSTHRU,
153   FROUND_MERGE_PASSTHRU,
154   FROUNDEVEN_MERGE_PASSTHRU,
155   FSQRT_MERGE_PASSTHRU,
156   FTRUNC_MERGE_PASSTHRU,
157   FP_ROUND_MERGE_PASSTHRU,
158   FP_EXTEND_MERGE_PASSTHRU,
159   UINT_TO_FP_MERGE_PASSTHRU,
160   SINT_TO_FP_MERGE_PASSTHRU,
161   FCVTZU_MERGE_PASSTHRU,
162   FCVTZS_MERGE_PASSTHRU,
163   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165   ABS_MERGE_PASSTHRU,
166   NEG_MERGE_PASSTHRU,
167 
168   SETCC_MERGE_ZERO,
169 
170   // Arithmetic instructions which write flags.
171   ADDS,
172   SUBS,
173   ADCS,
174   SBCS,
175   ANDS,
176 
177   // Conditional compares. Operands: left,right,falsecc,cc,flags
178   CCMP,
179   CCMN,
180   FCCMP,
181 
182   // Floating point comparison
183   FCMP,
184 
185   // Scalar-to-vector duplication
186   DUP,
187   DUPLANE8,
188   DUPLANE16,
189   DUPLANE32,
190   DUPLANE64,
191   DUPLANE128,
192 
193   // Vector immedate moves
194   MOVI,
195   MOVIshift,
196   MOVIedit,
197   MOVImsl,
198   FMOV,
199   MVNIshift,
200   MVNImsl,
201 
202   // Vector immediate ops
203   BICi,
204   ORRi,
205 
206   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207   // element must be identical.
208   BSP,
209 
210   // Vector shuffles
211   ZIP1,
212   ZIP2,
213   UZP1,
214   UZP2,
215   TRN1,
216   TRN2,
217   REV16,
218   REV32,
219   REV64,
220   EXT,
221   SPLICE,
222 
223   // Vector shift by scalar
224   VSHL,
225   VLSHR,
226   VASHR,
227 
228   // Vector shift by scalar (again)
229   SQSHL_I,
230   UQSHL_I,
231   SQSHLU_I,
232   SRSHR_I,
233   URSHR_I,
234   URSHR_I_PRED,
235 
236   // Vector narrowing shift by immediate (bottom)
237   RSHRNB_I,
238 
239   // Vector shift by constant and insert
240   VSLI,
241   VSRI,
242 
243   // Vector comparisons
244   CMEQ,
245   CMGE,
246   CMGT,
247   CMHI,
248   CMHS,
249   FCMEQ,
250   FCMGE,
251   FCMGT,
252 
253   // Vector zero comparisons
254   CMEQz,
255   CMGEz,
256   CMGTz,
257   CMLEz,
258   CMLTz,
259   FCMEQz,
260   FCMGEz,
261   FCMGTz,
262   FCMLEz,
263   FCMLTz,
264 
265   // Round wide FP to narrow FP with inexact results to odd.
266   FCVTXN,
267 
268   // Vector across-lanes addition
269   // Only the lower result lane is defined.
270   SADDV,
271   UADDV,
272 
273   // Unsigned sum Long across Vector
274   UADDLV,
275   SADDLV,
276 
277   // Add Pairwise of two vectors
278   ADDP,
279   // Add Long Pairwise
280   SADDLP,
281   UADDLP,
282 
283   // udot/sdot instructions
284   UDOT,
285   SDOT,
286 
287   // Vector across-lanes min/max
288   // Only the lower result lane is defined.
289   SMINV,
290   UMINV,
291   SMAXV,
292   UMAXV,
293 
294   SADDV_PRED,
295   UADDV_PRED,
296   SMAXV_PRED,
297   UMAXV_PRED,
298   SMINV_PRED,
299   UMINV_PRED,
300   ORV_PRED,
301   EORV_PRED,
302   ANDV_PRED,
303 
304   // Compare-and-branch
305   CBZ,
306   CBNZ,
307   TBZ,
308   TBNZ,
309 
310   // Tail calls
311   TC_RETURN,
312 
313   // Custom prefetch handling
314   PREFETCH,
315 
316   // {s|u}int to FP within a FP register.
317   SITOF,
318   UITOF,
319 
320   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
321   /// world w.r.t vectors; which causes additional REV instructions to be
322   /// generated to compensate for the byte-swapping. But sometimes we do
323   /// need to re-interpret the data in SIMD vector registers in big-endian
324   /// mode without emitting such REV instructions.
325   NVCAST,
326 
327   MRS, // MRS, also sets the flags via a glue.
328 
329   SMULL,
330   UMULL,
331 
332   PMULL,
333 
334   // Reciprocal estimates and steps.
335   FRECPE,
336   FRECPS,
337   FRSQRTE,
338   FRSQRTS,
339 
340   SUNPKHI,
341   SUNPKLO,
342   UUNPKHI,
343   UUNPKLO,
344 
345   CLASTA_N,
346   CLASTB_N,
347   LASTA,
348   LASTB,
349   TBL,
350 
351   // Floating-point reductions.
352   FADDA_PRED,
353   FADDV_PRED,
354   FMAXV_PRED,
355   FMAXNMV_PRED,
356   FMINV_PRED,
357   FMINNMV_PRED,
358 
359   INSR,
360   PTEST,
361   PTEST_ANY,
362   PTRUE,
363 
364   CTTZ_ELTS,
365 
366   BITREVERSE_MERGE_PASSTHRU,
367   BSWAP_MERGE_PASSTHRU,
368   REVH_MERGE_PASSTHRU,
369   REVW_MERGE_PASSTHRU,
370   CTLZ_MERGE_PASSTHRU,
371   CTPOP_MERGE_PASSTHRU,
372   DUP_MERGE_PASSTHRU,
373   INDEX_VECTOR,
374 
375   // Cast between vectors of the same element type but differ in length.
376   REINTERPRET_CAST,
377 
378   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
379   LS64_BUILD,
380   LS64_EXTRACT,
381 
382   LD1_MERGE_ZERO,
383   LD1S_MERGE_ZERO,
384   LDNF1_MERGE_ZERO,
385   LDNF1S_MERGE_ZERO,
386   LDFF1_MERGE_ZERO,
387   LDFF1S_MERGE_ZERO,
388   LD1RQ_MERGE_ZERO,
389   LD1RO_MERGE_ZERO,
390 
391   // Structured loads.
392   SVE_LD2_MERGE_ZERO,
393   SVE_LD3_MERGE_ZERO,
394   SVE_LD4_MERGE_ZERO,
395 
396   // Unsigned gather loads.
397   GLD1_MERGE_ZERO,
398   GLD1_SCALED_MERGE_ZERO,
399   GLD1_UXTW_MERGE_ZERO,
400   GLD1_SXTW_MERGE_ZERO,
401   GLD1_UXTW_SCALED_MERGE_ZERO,
402   GLD1_SXTW_SCALED_MERGE_ZERO,
403   GLD1_IMM_MERGE_ZERO,
404   GLD1Q_MERGE_ZERO,
405   GLD1Q_INDEX_MERGE_ZERO,
406 
407   // Signed gather loads
408   GLD1S_MERGE_ZERO,
409   GLD1S_SCALED_MERGE_ZERO,
410   GLD1S_UXTW_MERGE_ZERO,
411   GLD1S_SXTW_MERGE_ZERO,
412   GLD1S_UXTW_SCALED_MERGE_ZERO,
413   GLD1S_SXTW_SCALED_MERGE_ZERO,
414   GLD1S_IMM_MERGE_ZERO,
415 
416   // Unsigned gather loads.
417   GLDFF1_MERGE_ZERO,
418   GLDFF1_SCALED_MERGE_ZERO,
419   GLDFF1_UXTW_MERGE_ZERO,
420   GLDFF1_SXTW_MERGE_ZERO,
421   GLDFF1_UXTW_SCALED_MERGE_ZERO,
422   GLDFF1_SXTW_SCALED_MERGE_ZERO,
423   GLDFF1_IMM_MERGE_ZERO,
424 
425   // Signed gather loads.
426   GLDFF1S_MERGE_ZERO,
427   GLDFF1S_SCALED_MERGE_ZERO,
428   GLDFF1S_UXTW_MERGE_ZERO,
429   GLDFF1S_SXTW_MERGE_ZERO,
430   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
431   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
432   GLDFF1S_IMM_MERGE_ZERO,
433 
434   // Non-temporal gather loads
435   GLDNT1_MERGE_ZERO,
436   GLDNT1_INDEX_MERGE_ZERO,
437   GLDNT1S_MERGE_ZERO,
438 
439   // Contiguous masked store.
440   ST1_PRED,
441 
442   // Scatter store
443   SST1_PRED,
444   SST1_SCALED_PRED,
445   SST1_UXTW_PRED,
446   SST1_SXTW_PRED,
447   SST1_UXTW_SCALED_PRED,
448   SST1_SXTW_SCALED_PRED,
449   SST1_IMM_PRED,
450   SST1Q_PRED,
451   SST1Q_INDEX_PRED,
452 
453   // Non-temporal scatter store
454   SSTNT1_PRED,
455   SSTNT1_INDEX_PRED,
456 
457   // SME
458   RDSVL,
459   REVD_MERGE_PASSTHRU,
460   ALLOCATE_ZA_BUFFER,
461   INIT_TPIDR2OBJ,
462 
463   // Asserts that a function argument (i32) is zero-extended to i8 by
464   // the caller
465   ASSERT_ZEXT_BOOL,
466 
467   // 128-bit system register accesses
468   // lo64, hi64, chain = MRRS(chain, sysregname)
469   MRRS,
470   // chain = MSRR(chain, sysregname, lo64, hi64)
471   MSRR,
472 
473   // Strict (exception-raising) floating point comparison
474   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
475   STRICT_FCMPE,
476 
477   // SME ZA loads and stores
478   SME_ZA_LDR,
479   SME_ZA_STR,
480 
481   // NEON Load/Store with post-increment base updates
482   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
483   LD3post,
484   LD4post,
485   ST2post,
486   ST3post,
487   ST4post,
488   LD1x2post,
489   LD1x3post,
490   LD1x4post,
491   ST1x2post,
492   ST1x3post,
493   ST1x4post,
494   LD1DUPpost,
495   LD2DUPpost,
496   LD3DUPpost,
497   LD4DUPpost,
498   LD1LANEpost,
499   LD2LANEpost,
500   LD3LANEpost,
501   LD4LANEpost,
502   ST2LANEpost,
503   ST3LANEpost,
504   ST4LANEpost,
505 
506   STG,
507   STZG,
508   ST2G,
509   STZ2G,
510 
511   LDP,
512   LDIAPP,
513   LDNP,
514   STP,
515   STILP,
516   STNP,
517 
518   // Memory Operations
519   MOPS_MEMSET,
520   MOPS_MEMSET_TAGGING,
521   MOPS_MEMCOPY,
522   MOPS_MEMMOVE,
523 };
524 
525 } // end namespace AArch64ISD
526 
527 namespace AArch64 {
528 /// Possible values of current rounding mode, which is specified in bits
529 /// 23:22 of FPCR.
530 enum Rounding {
531   RN = 0,    // Round to Nearest
532   RP = 1,    // Round towards Plus infinity
533   RM = 2,    // Round towards Minus infinity
534   RZ = 3,    // Round towards Zero
535   rmMask = 3 // Bit mask selecting rounding mode
536 };
537 
538 // Bit position of rounding mode bits in FPCR.
539 const unsigned RoundingBitsPos = 22;
540 
541 // Reserved bits should be preserved when modifying FPCR.
542 const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
543 
544 // Registers used to pass function arguments.
545 ArrayRef<MCPhysReg> getGPRArgRegs();
546 ArrayRef<MCPhysReg> getFPRArgRegs();
547 
548 /// Maximum allowed number of unprobed bytes above SP at an ABI
549 /// boundary.
550 const unsigned StackProbeMaxUnprobedStack = 1024;
551 
552 /// Maximum number of iterations to unroll for a constant size probing loop.
553 const unsigned StackProbeMaxLoopUnroll = 4;
554 
555 } // namespace AArch64
556 
557 class AArch64Subtarget;
558 
559 class AArch64TargetLowering : public TargetLowering {
560 public:
561   explicit AArch64TargetLowering(const TargetMachine &TM,
562                                  const AArch64Subtarget &STI);
563 
564   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
565   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
566   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
567                            SDValue N1) const override;
568 
569   /// Selects the correct CCAssignFn for a given CallingConvention value.
570   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
571 
572   /// Selects the correct CCAssignFn for a given CallingConvention value.
573   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
574 
575   /// Determine which of the bits specified in Mask are known to be either zero
576   /// or one and return them in the KnownZero/KnownOne bitsets.
577   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
578                                      const APInt &DemandedElts,
579                                      const SelectionDAG &DAG,
580                                      unsigned Depth = 0) const override;
581 
582   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
583                                            const APInt &DemandedElts,
584                                            const SelectionDAG &DAG,
585                                            unsigned Depth) const override;
586 
587   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
588     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
589     // *DAG* representation of pointers will always be 64-bits. They will be
590     // truncated and extended when transferred to memory, but the 64-bit DAG
591     // allows us to use AArch64's addressing modes much more easily.
592     return MVT::getIntegerVT(64);
593   }
594 
595   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
596                                     const APInt &DemandedElts,
597                                     TargetLoweringOpt &TLO) const override;
598 
599   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
600 
601   /// Returns true if the target allows unaligned memory accesses of the
602   /// specified type.
603   bool allowsMisalignedMemoryAccesses(
604       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
605       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
606       unsigned *Fast = nullptr) const override;
607   /// LLT variant.
608   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
609                                       Align Alignment,
610                                       MachineMemOperand::Flags Flags,
611                                       unsigned *Fast = nullptr) const override;
612 
613   /// Provide custom lowering hooks for some operations.
614   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
615 
616   const char *getTargetNodeName(unsigned Opcode) const override;
617 
618   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
619 
620   /// This method returns a target specific FastISel object, or null if the
621   /// target does not support "fast" ISel.
622   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
623                            const TargetLibraryInfo *libInfo) const override;
624 
625   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
626 
627   bool isFPImmLegal(const APFloat &Imm, EVT VT,
628                     bool ForCodeSize) const override;
629 
630   /// Return true if the given shuffle mask can be codegen'd directly, or if it
631   /// should be stack expanded.
632   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
633 
634   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
635   /// shuffle mask can be codegen'd directly.
636   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
637 
638   /// Return the ISD::SETCC ValueType.
639   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
640                          EVT VT) const override;
641 
642   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
643 
644   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
645                                   MachineBasicBlock *BB) const;
646 
647   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
648                                            MachineBasicBlock *BB) const;
649 
650   MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
651                                             MachineBasicBlock *MBB) const;
652 
653   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654                                   MachineInstr &MI,
655                                   MachineBasicBlock *BB) const;
656   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658                                  MachineInstr &MI, MachineBasicBlock *BB) const;
659   MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
660                                  unsigned Opcode, bool Op0IsDef) const;
661   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
662   MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
663                                           MachineBasicBlock *BB) const;
664   MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
665                                           MachineBasicBlock *BB) const;
666 
667   MachineBasicBlock *
668   EmitInstrWithCustomInserter(MachineInstr &MI,
669                               MachineBasicBlock *MBB) const override;
670 
671   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
672                           MachineFunction &MF,
673                           unsigned Intrinsic) const override;
674 
675   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
676                              EVT NewVT) const override;
677 
678   bool shouldRemoveRedundantExtend(SDValue Op) const override;
679 
680   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
681   bool isTruncateFree(EVT VT1, EVT VT2) const override;
682 
683   bool isProfitableToHoist(Instruction *I) const override;
684 
685   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
686   bool isZExtFree(EVT VT1, EVT VT2) const override;
687   bool isZExtFree(SDValue Val, EVT VT2) const override;
688 
689   bool shouldSinkOperands(Instruction *I,
690                           SmallVectorImpl<Use *> &Ops) const override;
691 
692   bool optimizeExtendOrTruncateConversion(
693       Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
694 
695   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
696 
697   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
698 
699   bool lowerInterleavedLoad(LoadInst *LI,
700                             ArrayRef<ShuffleVectorInst *> Shuffles,
701                             ArrayRef<unsigned> Indices,
702                             unsigned Factor) const override;
703   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
704                              unsigned Factor) const override;
705 
706   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707                                         LoadInst *LI) const override;
708 
709   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710                                        StoreInst *SI) const override;
711 
712   bool isLegalAddImmediate(int64_t) const override;
713   bool isLegalAddScalableImmediate(int64_t) const override;
714   bool isLegalICmpImmediate(int64_t) const override;
715 
716   bool isMulAddWithConstProfitable(SDValue AddNode,
717                                    SDValue ConstNode) const override;
718 
719   bool shouldConsiderGEPOffsetSplit() const override;
720 
721   EVT getOptimalMemOpType(const MemOp &Op,
722                           const AttributeList &FuncAttributes) const override;
723 
724   LLT getOptimalMemOpLLT(const MemOp &Op,
725                          const AttributeList &FuncAttributes) const override;
726 
727   /// Return true if the addressing mode represented by AM is legal for this
728   /// target, for a load/store of the specified type.
729   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
730                              unsigned AS,
731                              Instruction *I = nullptr) const override;
732 
733   int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
734                                          int64_t MaxOffset) const override;
735 
736   /// Return true if an FMA operation is faster than a pair of fmul and fadd
737   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
738   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
739   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
740                                   EVT VT) const override;
741   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
742 
743   bool generateFMAsInMachineCombiner(EVT VT,
744                                      CodeGenOptLevel OptLevel) const override;
745 
746   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748 
749   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
750   bool isDesirableToCommuteWithShift(const SDNode *N,
751                                      CombineLevel Level) const override;
752 
753   bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
754     return false;
755   }
756 
757   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
758   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
759 
760   /// Return true if it is profitable to fold a pair of shifts into a mask.
761   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
762                                          CombineLevel Level) const override;
763 
764   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
765                                             EVT VT) const override;
766 
767   /// Returns true if it is beneficial to convert a load of a constant
768   /// to just the constant itself.
769   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
770                                          Type *Ty) const override;
771 
772   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
773   /// with this index.
774   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
775                                unsigned Index) const override;
776 
777   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
778                             bool MathUsed) const override {
779     // Using overflow ops for overflow checks only should beneficial on
780     // AArch64.
781     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
782   }
783 
784   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
785                         AtomicOrdering Ord) const override;
786   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
787                               AtomicOrdering Ord) const override;
788 
789   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
790 
791   bool isOpSuitableForLDPSTP(const Instruction *I) const;
792   bool isOpSuitableForLSE128(const Instruction *I) const;
793   bool isOpSuitableForRCPC3(const Instruction *I) const;
794   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
795   bool
796   shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
797 
798   TargetLoweringBase::AtomicExpansionKind
799   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
800   TargetLoweringBase::AtomicExpansionKind
801   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
802   TargetLoweringBase::AtomicExpansionKind
803   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
804 
805   TargetLoweringBase::AtomicExpansionKind
806   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
807 
808   bool useLoadStackGuardNode() const override;
809   TargetLoweringBase::LegalizeTypeAction
810   getPreferredVectorAction(MVT VT) const override;
811 
812   /// If the target has a standard location for the stack protector cookie,
813   /// returns the address of that location. Otherwise, returns nullptr.
814   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
815 
816   void insertSSPDeclarations(Module &M) const override;
817   Value *getSDagStackGuard(const Module &M) const override;
818   Function *getSSPStackGuardCheck(const Module &M) const override;
819 
820   /// If the target has a standard location for the unsafe stack pointer,
821   /// returns the address of that location. Otherwise, returns nullptr.
822   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
823 
824   /// If a physical register, this returns the register that receives the
825   /// exception address on entry to an EH pad.
826   Register
827   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
828     // FIXME: This is a guess. Has this been defined yet?
829     return AArch64::X0;
830   }
831 
832   /// If a physical register, this returns the register that receives the
833   /// exception typeid on entry to a landing pad.
834   Register
835   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
836     // FIXME: This is a guess. Has this been defined yet?
837     return AArch64::X1;
838   }
839 
840   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
841 
842   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
843                         const MachineFunction &MF) const override {
844     // Do not merge to float value size (128 bytes) if no implicit
845     // float attribute is set.
846 
847     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
848 
849     if (NoFloat)
850       return (MemVT.getSizeInBits() <= 64);
851     return true;
852   }
853 
854   bool isCheapToSpeculateCttz(Type *) const override {
855     return true;
856   }
857 
858   bool isCheapToSpeculateCtlz(Type *) const override {
859     return true;
860   }
861 
862   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863 
864   bool hasAndNotCompare(SDValue V) const override {
865     // We can use bics for any scalar.
866     return V.getValueType().isScalarInteger();
867   }
868 
869   bool hasAndNot(SDValue Y) const override {
870     EVT VT = Y.getValueType();
871 
872     if (!VT.isVector())
873       return hasAndNotCompare(Y);
874 
875     TypeSize TS = VT.getSizeInBits();
876     // TODO: We should be able to use bic/bif too for SVE.
877     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
878   }
879 
880   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
882       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883       SelectionDAG &DAG) const override;
884 
885   ShiftLegalizationStrategy
886   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887                                      unsigned ExpansionFactor) const override;
888 
889   bool shouldTransformSignedTruncationCheck(EVT XVT,
890                                             unsigned KeptBits) const override {
891     // For vectors, we don't have a preference..
892     if (XVT.isVector())
893       return false;
894 
895     auto VTIsOk = [](EVT VT) -> bool {
896       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
897              VT == MVT::i64;
898     };
899 
900     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901     // XVT will be larger than KeptBitsVT.
902     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
903     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904   }
905 
906   bool preferIncOfAddToSubOfNot(EVT VT) const override;
907 
908   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909 
910   bool shouldExpandCmpUsingSelects() const override { return true; }
911 
912   bool isComplexDeinterleavingSupported() const override;
913   bool isComplexDeinterleavingOperationSupported(
914       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
915 
916   Value *createComplexDeinterleavingIR(
917       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
919       Value *Accumulator = nullptr) const override;
920 
921   bool supportSplitCSR(MachineFunction *MF) const override {
922     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
924   }
925   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
926   void insertCopiesSplitCSR(
927       MachineBasicBlock *Entry,
928       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
929 
930   bool supportSwiftError() const override {
931     return true;
932   }
933 
934   bool supportPtrAuthBundles() const override { return true; }
935 
936   bool supportKCFIBundles() const override { return true; }
937 
938   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939                               MachineBasicBlock::instr_iterator &MBBI,
940                               const TargetInstrInfo *TII) const override;
941 
942   /// Enable aggressive FMA fusion on targets that want it.
943   bool enableAggressiveFMAFusion(EVT VT) const override;
944 
945   /// Returns the size of the platform's va_list object.
946   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947 
948   /// Returns true if \p VecTy is a legal interleaved access type. This
949   /// function checks the vector element type and the overall width of the
950   /// vector.
951   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
952                                     bool &UseScalable) const;
953 
954   /// Returns the number of interleaved accesses that will be generated when
955   /// lowering accesses of the given type.
956   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
957                                      bool UseScalable) const;
958 
959   MachineMemOperand::Flags getTargetMMOFlags(
960     const Instruction &I) const override;
961 
962   bool functionArgumentNeedsConsecutiveRegisters(
963       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
964       const DataLayout &DL) const override;
965 
966   /// Used for exception handling on Win64.
967   bool needsFixedCatchObjects() const override;
968 
969   bool fallBackToDAGISel(const Instruction &Inst) const override;
970 
971   /// SVE code generation for fixed length vectors does not custom lower
972   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973   /// merge. However, merging them creates a BUILD_VECTOR that is just as
974   /// illegal as the original, thus leading to an infinite legalisation loop.
975   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976   /// vector types this override can be removed.
977   bool mergeStoresAfterLegalization(EVT VT) const override;
978 
979   // If the platform/function should have a redzone, return the size in bytes.
980   unsigned getRedZoneSize(const Function &F) const {
981     if (F.hasFnAttribute(Attribute::NoRedZone))
982       return 0;
983     return 128;
984   }
985 
986   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987   EVT getPromotedVTForPredicate(EVT VT) const;
988 
989   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990                              bool AllowUnknown = false) const override;
991 
992   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993 
994   bool shouldExpandCttzElements(EVT VT) const override;
995 
996   /// If a change in streaming mode is required on entry to/return from a
997   /// function call it emits and returns the corresponding SMSTART or SMSTOP
998   /// node. \p Condition should be one of the enum values from
999   /// AArch64SME::ToggleCondition.
1000   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1001                               SDValue Chain, SDValue InGlue, unsigned Condition,
1002                               SDValue PStateSM = SDValue()) const;
1003 
1004   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1005 
1006   // Normally SVE is only used for byte size vectors that do not fit within a
1007   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1008   // used for 64bit and 128bit vectors as well.
1009   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1010 
1011   // Follow NEON ABI rules even when using SVE for fixed length vectors.
1012   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1013                                     EVT VT) const override;
1014   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1015                                          CallingConv::ID CC,
1016                                          EVT VT) const override;
1017   unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1018                                                 CallingConv::ID CC, EVT VT,
1019                                                 EVT &IntermediateVT,
1020                                                 unsigned &NumIntermediates,
1021                                                 MVT &RegisterVT) const override;
1022 
1023   /// True if stack clash protection is enabled for this functions.
1024   bool hasInlineStackProbe(const MachineFunction &MF) const override;
1025 
1026 #ifndef NDEBUG
1027   void verifyTargetSDNode(const SDNode *N) const override;
1028 #endif
1029 
1030 private:
1031   /// Keep a pointer to the AArch64Subtarget around so that we can
1032   /// make the right decision when generating code for different targets.
1033   const AArch64Subtarget *Subtarget;
1034 
1035   llvm::BumpPtrAllocator BumpAlloc;
1036   llvm::StringSaver Saver{BumpAlloc};
1037 
1038   bool isExtFreeImpl(const Instruction *Ext) const override;
1039 
1040   void addTypeForNEON(MVT VT);
1041   void addTypeForFixedLengthSVE(MVT VT);
1042   void addDRType(MVT VT);
1043   void addQRType(MVT VT);
1044 
1045   bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1046 
1047   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1048                                bool isVarArg,
1049                                const SmallVectorImpl<ISD::InputArg> &Ins,
1050                                const SDLoc &DL, SelectionDAG &DAG,
1051                                SmallVectorImpl<SDValue> &InVals) const override;
1052 
1053   void AdjustInstrPostInstrSelection(MachineInstr &MI,
1054                                      SDNode *Node) const override;
1055 
1056   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1057                     SmallVectorImpl<SDValue> &InVals) const override;
1058 
1059   SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1060                           CallingConv::ID CallConv, bool isVarArg,
1061                           const SmallVectorImpl<CCValAssign> &RVLocs,
1062                           const SDLoc &DL, SelectionDAG &DAG,
1063                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1064                           SDValue ThisVal, bool RequiresSMChange) const;
1065 
1066   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1067   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1068   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1069   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1070 
1071   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1072   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1073 
1074   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1075 
1076   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1077   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1078   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1079 
1080   bool
1081   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1082 
1083   /// Finds the incoming stack arguments which overlap the given fixed stack
1084   /// object and incorporates their load into the current chain. This prevents
1085   /// an upcoming store from clobbering the stack argument before it's used.
1086   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1087                               MachineFrameInfo &MFI, int ClobberedFI) const;
1088 
1089   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1090 
1091   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1092                            SDValue &Chain) const;
1093 
1094   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1095                       bool isVarArg,
1096                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1097                       LLVMContext &Context) const override;
1098 
1099   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1100                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1101                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1102                       SelectionDAG &DAG) const override;
1103 
1104   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1105                         unsigned Flag) const;
1106   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1107                         unsigned Flag) const;
1108   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1109                         unsigned Flag) const;
1110   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1111                         unsigned Flag) const;
1112   SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1113                         unsigned Flag) const;
1114   template <class NodeTy>
1115   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1116   template <class NodeTy>
1117   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1118   template <class NodeTy>
1119   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1120   template <class NodeTy>
1121   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1122   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1124   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1125   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1126   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1127   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1128                                const SDLoc &DL, SelectionDAG &DAG) const;
1129   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1130                                  SelectionDAG &DAG) const;
1131   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132   SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1133   SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
1134                                               AArch64PACKey::ID Key,
1135                                               SDValue Discriminator,
1136                                               SDValue AddrDiscriminator,
1137                                               SelectionDAG &DAG) const;
1138   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1139   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1140   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1141   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1142   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1143   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1144                          SDValue TVal, SDValue FVal, const SDLoc &dl,
1145                          SelectionDAG &DAG) const;
1146   SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147   SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1148   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1149   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1150   SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1151   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1152   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1153   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1154   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1155   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1157   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1158   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1159   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1160   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1161   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1162   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1163   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1164   SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1165   SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1166   SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1168   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1169   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1170   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1171   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1172   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1173   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1174   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1175                               unsigned NewOp) const;
1176   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1177   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1178   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1179   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1180   SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1181   SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1182   SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1183   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1184   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1185   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1186   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1187   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1188   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1189   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1190   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1191   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1192   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1193   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1194   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1195   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1196   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1197   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199   SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1200   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1201   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1202   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1203   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1204   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1205   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1206   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1207   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1208   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1209   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1210   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1211   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1212   SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1213   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214 
1215   SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1216 
1217   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1218                                                SelectionDAG &DAG) const;
1219   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1220                                                SelectionDAG &DAG) const;
1221   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1222   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1223   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1224   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1225   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1226                               SelectionDAG &DAG) const;
1227   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1228   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1229   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1230   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1231                                             SelectionDAG &DAG) const;
1232   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1233                                               SelectionDAG &DAG) const;
1234   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1235   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1236   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1237   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1238                                              SelectionDAG &DAG) const;
1239   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1240   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1241   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1242   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1243   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1244                                               SelectionDAG &DAG) const;
1245 
1246   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1247                         SmallVectorImpl<SDNode *> &Created) const override;
1248   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1249                         SmallVectorImpl<SDNode *> &Created) const override;
1250   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1251                           int &ExtraSteps, bool &UseOneConst,
1252                           bool Reciprocal) const override;
1253   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254                            int &ExtraSteps) const override;
1255   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1256                            const DenormalMode &Mode) const override;
1257   SDValue getSqrtResultForDenormInput(SDValue Operand,
1258                                       SelectionDAG &DAG) const override;
1259   unsigned combineRepeatedFPDivisors() const override;
1260 
1261   ConstraintType getConstraintType(StringRef Constraint) const override;
1262   Register getRegisterByName(const char* RegName, LLT VT,
1263                              const MachineFunction &MF) const override;
1264 
1265   /// Examine constraint string and operand type and determine a weight value.
1266   /// The operand object must already have been set up with the operand type.
1267   ConstraintWeight
1268   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1269                                  const char *constraint) const override;
1270 
1271   std::pair<unsigned, const TargetRegisterClass *>
1272   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1273                                StringRef Constraint, MVT VT) const override;
1274 
1275   const char *LowerXConstraint(EVT ConstraintVT) const override;
1276 
1277   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1278                                     std::vector<SDValue> &Ops,
1279                                     SelectionDAG &DAG) const override;
1280 
1281   InlineAsm::ConstraintCode
1282   getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1283     if (ConstraintCode == "Q")
1284       return InlineAsm::ConstraintCode::Q;
1285     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1286     //        followed by llvm_unreachable so we'll leave them unimplemented in
1287     //        the backend for now.
1288     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1289   }
1290 
1291   /// Handle Lowering flag assembly outputs.
1292   SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1293                                       const SDLoc &DL,
1294                                       const AsmOperandInfo &Constraint,
1295                                       SelectionDAG &DAG) const override;
1296 
1297   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1298   bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1299   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1300   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1301   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1302   bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1303                               SDValue &Offset, SelectionDAG &DAG) const;
1304   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1305                                  ISD::MemIndexedMode &AM,
1306                                  SelectionDAG &DAG) const override;
1307   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1308                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1309                                   SelectionDAG &DAG) const override;
1310   bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1311                        bool IsPre, MachineRegisterInfo &MRI) const override;
1312 
1313   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1314                           SelectionDAG &DAG) const override;
1315   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1316                              SelectionDAG &DAG) const;
1317   void ReplaceExtractSubVectorResults(SDNode *N,
1318                                       SmallVectorImpl<SDValue> &Results,
1319                                       SelectionDAG &DAG) const;
1320 
1321   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1322 
1323   void finalizeLowering(MachineFunction &MF) const override;
1324 
1325   bool shouldLocalize(const MachineInstr &MI,
1326                       const TargetTransformInfo *TTI) const override;
1327 
1328   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1329                                          const APInt &OriginalDemandedBits,
1330                                          const APInt &OriginalDemandedElts,
1331                                          KnownBits &Known,
1332                                          TargetLoweringOpt &TLO,
1333                                          unsigned Depth) const override;
1334 
1335   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1336 
1337   // With the exception of data-predicate transitions, no instructions are
1338   // required to cast between legal scalable vector types. However:
1339   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1340   //     is not universally useable.
1341   //  2. Most unpacked integer types are not legal and thus integer extends
1342   //     cannot be used to convert between unpacked and packed types.
1343   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1344   // to transition between unpacked and packed types of the same element type,
1345   // with BITCAST used otherwise.
1346   // This function does not handle predicate bitcasts.
1347   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1348 
1349   // Returns the runtime value for PSTATE.SM by generating a call to
1350   // __arm_sme_state.
1351   SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1352                              EVT VT) const;
1353 
1354   bool preferScalarizeSplat(SDNode *N) const override;
1355 
1356   unsigned getMinimumJumpTableEntries() const override;
1357 
1358   bool softPromoteHalfType() const override { return true; }
1359 };
1360 
1361 namespace AArch64 {
1362 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1363                          const TargetLibraryInfo *libInfo);
1364 } // end namespace AArch64
1365 
1366 } // end namespace llvm
1367 
1368 #endif
1369