xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "Utils/AArch64SMEAttributes.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/IR/CallingConv.h"
24 #include "llvm/IR/Instruction.h"
25 
26 namespace llvm {
27 
28 namespace AArch64ISD {
29 
30 // For predicated nodes where the result is a vector, the operation is
31 // controlled by a governing predicate and the inactive lanes are explicitly
32 // defined with a value, please stick the following naming convention:
33 //
34 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35 //                        to source operand OP<n>.
36 //
37 //    _MERGE_ZERO         The result value is a vector with inactive lanes
38 //                        actively zeroed.
39 //
40 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41 //                        to the last source operand which only purpose is being
42 //                        a passthru value.
43 //
44 // For other cases where no explicit action is needed to set the inactive lanes,
45 // or when the result is not a vector and it is needed or helpful to
46 // distinguish a node from similar unpredicated nodes, use:
47 //
48 //    _PRED
49 //
50 enum NodeType : unsigned {
51   FIRST_NUMBER = ISD::BUILTIN_OP_END,
52   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53   CALL,         // Function call.
54 
55   // Pseudo for a OBJC call that gets emitted together with a special `mov
56   // x29, x29` marker instruction.
57   CALL_RVMARKER,
58 
59   CALL_BTI, // Function call followed by a BTI instruction.
60 
61   COALESCER_BARRIER,
62 
63   SMSTART,
64   SMSTOP,
65   RESTORE_ZA,
66   RESTORE_ZT,
67   SAVE_ZT,
68 
69   // A call with the callee in x16, i.e. "blr x16".
70   CALL_ARM64EC_TO_X64,
71 
72   // Produces the full sequence of instructions for getting the thread pointer
73   // offset of a variable into X0, using the TLSDesc model.
74   TLSDESC_CALLSEQ,
75   ADRP,     // Page address of a TargetGlobalAddress operand.
76   ADR,      // ADR
77   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
78   LOADgot,  // Load from automatically generated descriptor (e.g. Global
79             // Offset Table, TLS record).
80   RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
81   BRCOND,   // Conditional branch instruction; "b.cond".
82   CSEL,
83   CSINV, // Conditional select invert.
84   CSNEG, // Conditional select negate.
85   CSINC, // Conditional select increment.
86 
87   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
88   // ELF.
89   THREAD_POINTER,
90   ADC,
91   SBC, // adc, sbc instructions
92 
93   // To avoid stack clash, allocation is performed by block and each block is
94   // probed.
95   PROBED_ALLOCA,
96 
97   // Predicated instructions where inactive lanes produce undefined results.
98   ABDS_PRED,
99   ABDU_PRED,
100   FADD_PRED,
101   FDIV_PRED,
102   FMA_PRED,
103   FMAX_PRED,
104   FMAXNM_PRED,
105   FMIN_PRED,
106   FMINNM_PRED,
107   FMUL_PRED,
108   FSUB_PRED,
109   HADDS_PRED,
110   HADDU_PRED,
111   MUL_PRED,
112   MULHS_PRED,
113   MULHU_PRED,
114   RHADDS_PRED,
115   RHADDU_PRED,
116   SDIV_PRED,
117   SHL_PRED,
118   SMAX_PRED,
119   SMIN_PRED,
120   SRA_PRED,
121   SRL_PRED,
122   UDIV_PRED,
123   UMAX_PRED,
124   UMIN_PRED,
125 
126   // Unpredicated vector instructions
127   BIC,
128 
129   SRAD_MERGE_OP1,
130 
131   // Predicated instructions with the result of inactive lanes provided by the
132   // last operand.
133   FABS_MERGE_PASSTHRU,
134   FCEIL_MERGE_PASSTHRU,
135   FFLOOR_MERGE_PASSTHRU,
136   FNEARBYINT_MERGE_PASSTHRU,
137   FNEG_MERGE_PASSTHRU,
138   FRECPX_MERGE_PASSTHRU,
139   FRINT_MERGE_PASSTHRU,
140   FROUND_MERGE_PASSTHRU,
141   FROUNDEVEN_MERGE_PASSTHRU,
142   FSQRT_MERGE_PASSTHRU,
143   FTRUNC_MERGE_PASSTHRU,
144   FP_ROUND_MERGE_PASSTHRU,
145   FP_EXTEND_MERGE_PASSTHRU,
146   UINT_TO_FP_MERGE_PASSTHRU,
147   SINT_TO_FP_MERGE_PASSTHRU,
148   FCVTZU_MERGE_PASSTHRU,
149   FCVTZS_MERGE_PASSTHRU,
150   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
151   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
152   ABS_MERGE_PASSTHRU,
153   NEG_MERGE_PASSTHRU,
154 
155   SETCC_MERGE_ZERO,
156 
157   // Arithmetic instructions which write flags.
158   ADDS,
159   SUBS,
160   ADCS,
161   SBCS,
162   ANDS,
163 
164   // Conditional compares. Operands: left,right,falsecc,cc,flags
165   CCMP,
166   CCMN,
167   FCCMP,
168 
169   // Floating point comparison
170   FCMP,
171 
172   // Scalar-to-vector duplication
173   DUP,
174   DUPLANE8,
175   DUPLANE16,
176   DUPLANE32,
177   DUPLANE64,
178   DUPLANE128,
179 
180   // Vector immedate moves
181   MOVI,
182   MOVIshift,
183   MOVIedit,
184   MOVImsl,
185   FMOV,
186   MVNIshift,
187   MVNImsl,
188 
189   // Vector immediate ops
190   BICi,
191   ORRi,
192 
193   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
194   // element must be identical.
195   BSP,
196 
197   // Vector shuffles
198   ZIP1,
199   ZIP2,
200   UZP1,
201   UZP2,
202   TRN1,
203   TRN2,
204   REV16,
205   REV32,
206   REV64,
207   EXT,
208   SPLICE,
209 
210   // Vector shift by scalar
211   VSHL,
212   VLSHR,
213   VASHR,
214 
215   // Vector shift by scalar (again)
216   SQSHL_I,
217   UQSHL_I,
218   SQSHLU_I,
219   SRSHR_I,
220   URSHR_I,
221 
222   // Vector narrowing shift by immediate (bottom)
223   RSHRNB_I,
224 
225   // Vector shift by constant and insert
226   VSLI,
227   VSRI,
228 
229   // Vector comparisons
230   CMEQ,
231   CMGE,
232   CMGT,
233   CMHI,
234   CMHS,
235   FCMEQ,
236   FCMGE,
237   FCMGT,
238 
239   // Vector zero comparisons
240   CMEQz,
241   CMGEz,
242   CMGTz,
243   CMLEz,
244   CMLTz,
245   FCMEQz,
246   FCMGEz,
247   FCMGTz,
248   FCMLEz,
249   FCMLTz,
250 
251   // Vector across-lanes addition
252   // Only the lower result lane is defined.
253   SADDV,
254   UADDV,
255 
256   // Unsigned sum Long across Vector
257   UADDLV,
258   SADDLV,
259 
260   // Add Pairwise of two vectors
261   ADDP,
262   // Add Long Pairwise
263   SADDLP,
264   UADDLP,
265 
266   // udot/sdot instructions
267   UDOT,
268   SDOT,
269 
270   // Vector across-lanes min/max
271   // Only the lower result lane is defined.
272   SMINV,
273   UMINV,
274   SMAXV,
275   UMAXV,
276 
277   SADDV_PRED,
278   UADDV_PRED,
279   SMAXV_PRED,
280   UMAXV_PRED,
281   SMINV_PRED,
282   UMINV_PRED,
283   ORV_PRED,
284   EORV_PRED,
285   ANDV_PRED,
286 
287   // Vector bitwise insertion
288   BIT,
289 
290   // Compare-and-branch
291   CBZ,
292   CBNZ,
293   TBZ,
294   TBNZ,
295 
296   // Tail calls
297   TC_RETURN,
298 
299   // Custom prefetch handling
300   PREFETCH,
301 
302   // {s|u}int to FP within a FP register.
303   SITOF,
304   UITOF,
305 
306   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
307   /// world w.r.t vectors; which causes additional REV instructions to be
308   /// generated to compensate for the byte-swapping. But sometimes we do
309   /// need to re-interpret the data in SIMD vector registers in big-endian
310   /// mode without emitting such REV instructions.
311   NVCAST,
312 
313   MRS, // MRS, also sets the flags via a glue.
314 
315   SMULL,
316   UMULL,
317 
318   PMULL,
319 
320   // Reciprocal estimates and steps.
321   FRECPE,
322   FRECPS,
323   FRSQRTE,
324   FRSQRTS,
325 
326   SUNPKHI,
327   SUNPKLO,
328   UUNPKHI,
329   UUNPKLO,
330 
331   CLASTA_N,
332   CLASTB_N,
333   LASTA,
334   LASTB,
335   TBL,
336 
337   // Floating-point reductions.
338   FADDA_PRED,
339   FADDV_PRED,
340   FMAXV_PRED,
341   FMAXNMV_PRED,
342   FMINV_PRED,
343   FMINNMV_PRED,
344 
345   INSR,
346   PTEST,
347   PTEST_ANY,
348   PTRUE,
349 
350   CTTZ_ELTS,
351 
352   BITREVERSE_MERGE_PASSTHRU,
353   BSWAP_MERGE_PASSTHRU,
354   REVH_MERGE_PASSTHRU,
355   REVW_MERGE_PASSTHRU,
356   CTLZ_MERGE_PASSTHRU,
357   CTPOP_MERGE_PASSTHRU,
358   DUP_MERGE_PASSTHRU,
359   INDEX_VECTOR,
360 
361   // Cast between vectors of the same element type but differ in length.
362   REINTERPRET_CAST,
363 
364   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
365   LS64_BUILD,
366   LS64_EXTRACT,
367 
368   LD1_MERGE_ZERO,
369   LD1S_MERGE_ZERO,
370   LDNF1_MERGE_ZERO,
371   LDNF1S_MERGE_ZERO,
372   LDFF1_MERGE_ZERO,
373   LDFF1S_MERGE_ZERO,
374   LD1RQ_MERGE_ZERO,
375   LD1RO_MERGE_ZERO,
376 
377   // Structured loads.
378   SVE_LD2_MERGE_ZERO,
379   SVE_LD3_MERGE_ZERO,
380   SVE_LD4_MERGE_ZERO,
381 
382   // Unsigned gather loads.
383   GLD1_MERGE_ZERO,
384   GLD1_SCALED_MERGE_ZERO,
385   GLD1_UXTW_MERGE_ZERO,
386   GLD1_SXTW_MERGE_ZERO,
387   GLD1_UXTW_SCALED_MERGE_ZERO,
388   GLD1_SXTW_SCALED_MERGE_ZERO,
389   GLD1_IMM_MERGE_ZERO,
390   GLD1Q_MERGE_ZERO,
391   GLD1Q_INDEX_MERGE_ZERO,
392 
393   // Signed gather loads
394   GLD1S_MERGE_ZERO,
395   GLD1S_SCALED_MERGE_ZERO,
396   GLD1S_UXTW_MERGE_ZERO,
397   GLD1S_SXTW_MERGE_ZERO,
398   GLD1S_UXTW_SCALED_MERGE_ZERO,
399   GLD1S_SXTW_SCALED_MERGE_ZERO,
400   GLD1S_IMM_MERGE_ZERO,
401 
402   // Unsigned gather loads.
403   GLDFF1_MERGE_ZERO,
404   GLDFF1_SCALED_MERGE_ZERO,
405   GLDFF1_UXTW_MERGE_ZERO,
406   GLDFF1_SXTW_MERGE_ZERO,
407   GLDFF1_UXTW_SCALED_MERGE_ZERO,
408   GLDFF1_SXTW_SCALED_MERGE_ZERO,
409   GLDFF1_IMM_MERGE_ZERO,
410 
411   // Signed gather loads.
412   GLDFF1S_MERGE_ZERO,
413   GLDFF1S_SCALED_MERGE_ZERO,
414   GLDFF1S_UXTW_MERGE_ZERO,
415   GLDFF1S_SXTW_MERGE_ZERO,
416   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
417   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
418   GLDFF1S_IMM_MERGE_ZERO,
419 
420   // Non-temporal gather loads
421   GLDNT1_MERGE_ZERO,
422   GLDNT1_INDEX_MERGE_ZERO,
423   GLDNT1S_MERGE_ZERO,
424 
425   // Contiguous masked store.
426   ST1_PRED,
427 
428   // Scatter store
429   SST1_PRED,
430   SST1_SCALED_PRED,
431   SST1_UXTW_PRED,
432   SST1_SXTW_PRED,
433   SST1_UXTW_SCALED_PRED,
434   SST1_SXTW_SCALED_PRED,
435   SST1_IMM_PRED,
436   SST1Q_PRED,
437   SST1Q_INDEX_PRED,
438 
439   // Non-temporal scatter store
440   SSTNT1_PRED,
441   SSTNT1_INDEX_PRED,
442 
443   // SME
444   RDSVL,
445   REVD_MERGE_PASSTHRU,
446 
447   // Asserts that a function argument (i32) is zero-extended to i8 by
448   // the caller
449   ASSERT_ZEXT_BOOL,
450 
451   // 128-bit system register accesses
452   // lo64, hi64, chain = MRRS(chain, sysregname)
453   MRRS,
454   // chain = MSRR(chain, sysregname, lo64, hi64)
455   MSRR,
456 
457   // Strict (exception-raising) floating point comparison
458   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
459   STRICT_FCMPE,
460 
461   // SME ZA loads and stores
462   SME_ZA_LDR,
463   SME_ZA_STR,
464 
465   // NEON Load/Store with post-increment base updates
466   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
467   LD3post,
468   LD4post,
469   ST2post,
470   ST3post,
471   ST4post,
472   LD1x2post,
473   LD1x3post,
474   LD1x4post,
475   ST1x2post,
476   ST1x3post,
477   ST1x4post,
478   LD1DUPpost,
479   LD2DUPpost,
480   LD3DUPpost,
481   LD4DUPpost,
482   LD1LANEpost,
483   LD2LANEpost,
484   LD3LANEpost,
485   LD4LANEpost,
486   ST2LANEpost,
487   ST3LANEpost,
488   ST4LANEpost,
489 
490   STG,
491   STZG,
492   ST2G,
493   STZ2G,
494 
495   LDP,
496   LDIAPP,
497   LDNP,
498   STP,
499   STILP,
500   STNP,
501 
502   // Memory Operations
503   MOPS_MEMSET,
504   MOPS_MEMSET_TAGGING,
505   MOPS_MEMCOPY,
506   MOPS_MEMMOVE,
507 };
508 
509 } // end namespace AArch64ISD
510 
511 namespace AArch64 {
512 /// Possible values of current rounding mode, which is specified in bits
513 /// 23:22 of FPCR.
514 enum Rounding {
515   RN = 0,    // Round to Nearest
516   RP = 1,    // Round towards Plus infinity
517   RM = 2,    // Round towards Minus infinity
518   RZ = 3,    // Round towards Zero
519   rmMask = 3 // Bit mask selecting rounding mode
520 };
521 
522 // Bit position of rounding mode bits in FPCR.
523 const unsigned RoundingBitsPos = 22;
524 
525 // Registers used to pass function arguments.
526 ArrayRef<MCPhysReg> getGPRArgRegs();
527 ArrayRef<MCPhysReg> getFPRArgRegs();
528 
529 /// Maximum allowed number of unprobed bytes above SP at an ABI
530 /// boundary.
531 const unsigned StackProbeMaxUnprobedStack = 1024;
532 
533 /// Maximum number of iterations to unroll for a constant size probing loop.
534 const unsigned StackProbeMaxLoopUnroll = 4;
535 
536 } // namespace AArch64
537 
538 class AArch64Subtarget;
539 
540 class AArch64TargetLowering : public TargetLowering {
541 public:
542   explicit AArch64TargetLowering(const TargetMachine &TM,
543                                  const AArch64Subtarget &STI);
544 
545   /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
546   /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
547   bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
548                            SDValue N1) const override;
549 
550   /// Selects the correct CCAssignFn for a given CallingConvention value.
551   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
552 
553   /// Selects the correct CCAssignFn for a given CallingConvention value.
554   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
555 
556   /// Determine which of the bits specified in Mask are known to be either zero
557   /// or one and return them in the KnownZero/KnownOne bitsets.
558   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
559                                      const APInt &DemandedElts,
560                                      const SelectionDAG &DAG,
561                                      unsigned Depth = 0) const override;
562 
563   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
564                                            const APInt &DemandedElts,
565                                            const SelectionDAG &DAG,
566                                            unsigned Depth) const override;
567 
568   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
569     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
570     // *DAG* representation of pointers will always be 64-bits. They will be
571     // truncated and extended when transferred to memory, but the 64-bit DAG
572     // allows us to use AArch64's addressing modes much more easily.
573     return MVT::getIntegerVT(64);
574   }
575 
576   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
577                                     const APInt &DemandedElts,
578                                     TargetLoweringOpt &TLO) const override;
579 
580   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
581 
582   /// Returns true if the target allows unaligned memory accesses of the
583   /// specified type.
584   bool allowsMisalignedMemoryAccesses(
585       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
586       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
587       unsigned *Fast = nullptr) const override;
588   /// LLT variant.
589   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
590                                       Align Alignment,
591                                       MachineMemOperand::Flags Flags,
592                                       unsigned *Fast = nullptr) const override;
593 
594   /// Provide custom lowering hooks for some operations.
595   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
596 
597   const char *getTargetNodeName(unsigned Opcode) const override;
598 
599   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
600 
601   /// This method returns a target specific FastISel object, or null if the
602   /// target does not support "fast" ISel.
603   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
604                            const TargetLibraryInfo *libInfo) const override;
605 
606   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
607 
608   bool isFPImmLegal(const APFloat &Imm, EVT VT,
609                     bool ForCodeSize) const override;
610 
611   /// Return true if the given shuffle mask can be codegen'd directly, or if it
612   /// should be stack expanded.
613   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
614 
615   /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
616   /// shuffle mask can be codegen'd directly.
617   bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
618 
619   /// Return the ISD::SETCC ValueType.
620   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
621                          EVT VT) const override;
622 
623   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
624 
625   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
626                                   MachineBasicBlock *BB) const;
627 
628   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
629                                            MachineBasicBlock *BB) const;
630 
631   MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
632                                             MachineBasicBlock *MBB) const;
633 
634   MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
635                                   MachineInstr &MI,
636                                   MachineBasicBlock *BB) const;
637   MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
638   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
639                                  MachineInstr &MI, MachineBasicBlock *BB,
640                                  bool HasTile) const;
641   MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
642                                  unsigned Opcode, bool Op0IsDef) const;
643   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
644 
645   MachineBasicBlock *
646   EmitInstrWithCustomInserter(MachineInstr &MI,
647                               MachineBasicBlock *MBB) const override;
648 
649   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
650                           MachineFunction &MF,
651                           unsigned Intrinsic) const override;
652 
653   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
654                              EVT NewVT) const override;
655 
656   bool shouldRemoveRedundantExtend(SDValue Op) const override;
657 
658   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
659   bool isTruncateFree(EVT VT1, EVT VT2) const override;
660 
661   bool isProfitableToHoist(Instruction *I) const override;
662 
663   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
664   bool isZExtFree(EVT VT1, EVT VT2) const override;
665   bool isZExtFree(SDValue Val, EVT VT2) const override;
666 
667   bool shouldSinkOperands(Instruction *I,
668                           SmallVectorImpl<Use *> &Ops) const override;
669 
670   bool optimizeExtendOrTruncateConversion(
671       Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
672 
673   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
674 
675   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
676 
677   bool lowerInterleavedLoad(LoadInst *LI,
678                             ArrayRef<ShuffleVectorInst *> Shuffles,
679                             ArrayRef<unsigned> Indices,
680                             unsigned Factor) const override;
681   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
682                              unsigned Factor) const override;
683 
684   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
685                                         LoadInst *LI) const override;
686 
687   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
688                                        StoreInst *SI) const override;
689 
690   bool isLegalAddImmediate(int64_t) const override;
691   bool isLegalICmpImmediate(int64_t) const override;
692 
693   bool isMulAddWithConstProfitable(SDValue AddNode,
694                                    SDValue ConstNode) const override;
695 
696   bool shouldConsiderGEPOffsetSplit() const override;
697 
698   EVT getOptimalMemOpType(const MemOp &Op,
699                           const AttributeList &FuncAttributes) const override;
700 
701   LLT getOptimalMemOpLLT(const MemOp &Op,
702                          const AttributeList &FuncAttributes) const override;
703 
704   /// Return true if the addressing mode represented by AM is legal for this
705   /// target, for a load/store of the specified type.
706   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
707                              unsigned AS,
708                              Instruction *I = nullptr) const override;
709 
710   int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
711                                          int64_t MaxOffset) const override;
712 
713   /// Return true if an FMA operation is faster than a pair of fmul and fadd
714   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
715   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
716   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
717                                   EVT VT) const override;
718   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
719 
720   bool generateFMAsInMachineCombiner(EVT VT,
721                                      CodeGenOptLevel OptLevel) const override;
722 
723   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
724   ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
725 
726   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
727   bool isDesirableToCommuteWithShift(const SDNode *N,
728                                      CombineLevel Level) const override;
729 
730   bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
731     return false;
732   }
733 
734   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
735   bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
736 
737   /// Return true if it is profitable to fold a pair of shifts into a mask.
738   bool shouldFoldConstantShiftPairToMask(const SDNode *N,
739                                          CombineLevel Level) const override;
740 
741   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
742                                             EVT VT) const override;
743 
744   /// Returns true if it is beneficial to convert a load of a constant
745   /// to just the constant itself.
746   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
747                                          Type *Ty) const override;
748 
749   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
750   /// with this index.
751   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
752                                unsigned Index) const override;
753 
754   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
755                             bool MathUsed) const override {
756     // Using overflow ops for overflow checks only should beneficial on
757     // AArch64.
758     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
759   }
760 
761   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
762                         AtomicOrdering Ord) const override;
763   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
764                               AtomicOrdering Ord) const override;
765 
766   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
767 
768   bool isOpSuitableForLDPSTP(const Instruction *I) const;
769   bool isOpSuitableForLSE128(const Instruction *I) const;
770   bool isOpSuitableForRCPC3(const Instruction *I) const;
771   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
772   bool
773   shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
774 
775   TargetLoweringBase::AtomicExpansionKind
776   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
777   TargetLoweringBase::AtomicExpansionKind
778   shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
779   TargetLoweringBase::AtomicExpansionKind
780   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
781 
782   TargetLoweringBase::AtomicExpansionKind
783   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
784 
785   bool useLoadStackGuardNode() const override;
786   TargetLoweringBase::LegalizeTypeAction
787   getPreferredVectorAction(MVT VT) const override;
788 
789   /// If the target has a standard location for the stack protector cookie,
790   /// returns the address of that location. Otherwise, returns nullptr.
791   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
792 
793   void insertSSPDeclarations(Module &M) const override;
794   Value *getSDagStackGuard(const Module &M) const override;
795   Function *getSSPStackGuardCheck(const Module &M) const override;
796 
797   /// If the target has a standard location for the unsafe stack pointer,
798   /// returns the address of that location. Otherwise, returns nullptr.
799   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
800 
801   /// If a physical register, this returns the register that receives the
802   /// exception address on entry to an EH pad.
803   Register
804   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
805     // FIXME: This is a guess. Has this been defined yet?
806     return AArch64::X0;
807   }
808 
809   /// If a physical register, this returns the register that receives the
810   /// exception typeid on entry to a landing pad.
811   Register
812   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
813     // FIXME: This is a guess. Has this been defined yet?
814     return AArch64::X1;
815   }
816 
817   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
818 
819   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
820                         const MachineFunction &MF) const override {
821     // Do not merge to float value size (128 bytes) if no implicit
822     // float attribute is set.
823 
824     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
825 
826     if (NoFloat)
827       return (MemVT.getSizeInBits() <= 64);
828     return true;
829   }
830 
831   bool isCheapToSpeculateCttz(Type *) const override {
832     return true;
833   }
834 
835   bool isCheapToSpeculateCtlz(Type *) const override {
836     return true;
837   }
838 
839   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
840 
841   bool hasAndNotCompare(SDValue V) const override {
842     // We can use bics for any scalar.
843     return V.getValueType().isScalarInteger();
844   }
845 
846   bool hasAndNot(SDValue Y) const override {
847     EVT VT = Y.getValueType();
848 
849     if (!VT.isVector())
850       return hasAndNotCompare(Y);
851 
852     TypeSize TS = VT.getSizeInBits();
853     // TODO: We should be able to use bic/bif too for SVE.
854     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
855   }
856 
857   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
858       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
859       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
860       SelectionDAG &DAG) const override;
861 
862   ShiftLegalizationStrategy
863   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
864                                      unsigned ExpansionFactor) const override;
865 
866   bool shouldTransformSignedTruncationCheck(EVT XVT,
867                                             unsigned KeptBits) const override {
868     // For vectors, we don't have a preference..
869     if (XVT.isVector())
870       return false;
871 
872     auto VTIsOk = [](EVT VT) -> bool {
873       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
874              VT == MVT::i64;
875     };
876 
877     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
878     // XVT will be larger than KeptBitsVT.
879     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
880     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
881   }
882 
883   bool preferIncOfAddToSubOfNot(EVT VT) const override;
884 
885   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
886 
887   bool isComplexDeinterleavingSupported() const override;
888   bool isComplexDeinterleavingOperationSupported(
889       ComplexDeinterleavingOperation Operation, Type *Ty) const override;
890 
891   Value *createComplexDeinterleavingIR(
892       IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
893       ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
894       Value *Accumulator = nullptr) const override;
895 
896   bool supportSplitCSR(MachineFunction *MF) const override {
897     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
898            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
899   }
900   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
901   void insertCopiesSplitCSR(
902       MachineBasicBlock *Entry,
903       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
904 
905   bool supportSwiftError() const override {
906     return true;
907   }
908 
909   bool supportKCFIBundles() const override { return true; }
910 
911   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
912                               MachineBasicBlock::instr_iterator &MBBI,
913                               const TargetInstrInfo *TII) const override;
914 
915   /// Enable aggressive FMA fusion on targets that want it.
916   bool enableAggressiveFMAFusion(EVT VT) const override;
917 
918   /// Returns the size of the platform's va_list object.
919   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
920 
921   /// Returns true if \p VecTy is a legal interleaved access type. This
922   /// function checks the vector element type and the overall width of the
923   /// vector.
924   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
925                                     bool &UseScalable) const;
926 
927   /// Returns the number of interleaved accesses that will be generated when
928   /// lowering accesses of the given type.
929   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
930                                      bool UseScalable) const;
931 
932   MachineMemOperand::Flags getTargetMMOFlags(
933     const Instruction &I) const override;
934 
935   bool functionArgumentNeedsConsecutiveRegisters(
936       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
937       const DataLayout &DL) const override;
938 
939   /// Used for exception handling on Win64.
940   bool needsFixedCatchObjects() const override;
941 
942   bool fallBackToDAGISel(const Instruction &Inst) const override;
943 
944   /// SVE code generation for fixed length vectors does not custom lower
945   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
946   /// merge. However, merging them creates a BUILD_VECTOR that is just as
947   /// illegal as the original, thus leading to an infinite legalisation loop.
948   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
949   /// vector types this override can be removed.
950   bool mergeStoresAfterLegalization(EVT VT) const override;
951 
952   // If the platform/function should have a redzone, return the size in bytes.
953   unsigned getRedZoneSize(const Function &F) const {
954     if (F.hasFnAttribute(Attribute::NoRedZone))
955       return 0;
956     return 128;
957   }
958 
959   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
960   EVT getPromotedVTForPredicate(EVT VT) const;
961 
962   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
963                              bool AllowUnknown = false) const override;
964 
965   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
966 
967   bool shouldExpandCttzElements(EVT VT) const override;
968 
969   /// If a change in streaming mode is required on entry to/return from a
970   /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
971   /// \p Entry tells whether this is before/after the Call, which is necessary
972   /// because PSTATE.SM is only queried once.
973   SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
974                               SDValue Chain, SDValue InGlue,
975                               SDValue PStateSM, bool Entry) const;
976 
977   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
978 
979   // Normally SVE is only used for byte size vectors that do not fit within a
980   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
981   // used for 64bit and 128bit vectors as well.
982   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
983 
984   // Follow NEON ABI rules even when using SVE for fixed length vectors.
985   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
986                                     EVT VT) const override;
987   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
988                                          CallingConv::ID CC,
989                                          EVT VT) const override;
990   unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
991                                                 CallingConv::ID CC, EVT VT,
992                                                 EVT &IntermediateVT,
993                                                 unsigned &NumIntermediates,
994                                                 MVT &RegisterVT) const override;
995 
996   /// True if stack clash protection is enabled for this functions.
997   bool hasInlineStackProbe(const MachineFunction &MF) const override;
998 
999 private:
1000   /// Keep a pointer to the AArch64Subtarget around so that we can
1001   /// make the right decision when generating code for different targets.
1002   const AArch64Subtarget *Subtarget;
1003 
1004   llvm::BumpPtrAllocator BumpAlloc;
1005   llvm::StringSaver Saver{BumpAlloc};
1006 
1007   bool isExtFreeImpl(const Instruction *Ext) const override;
1008 
1009   void addTypeForNEON(MVT VT);
1010   void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE);
1011   void addDRTypeForNEON(MVT VT);
1012   void addQRTypeForNEON(MVT VT);
1013 
1014   unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
1015                                   SelectionDAG &DAG) const;
1016 
1017   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1018                                bool isVarArg,
1019                                const SmallVectorImpl<ISD::InputArg> &Ins,
1020                                const SDLoc &DL, SelectionDAG &DAG,
1021                                SmallVectorImpl<SDValue> &InVals) const override;
1022 
1023   void AdjustInstrPostInstrSelection(MachineInstr &MI,
1024                                      SDNode *Node) const override;
1025 
1026   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1027                     SmallVectorImpl<SDValue> &InVals) const override;
1028 
1029   SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1030                           CallingConv::ID CallConv, bool isVarArg,
1031                           const SmallVectorImpl<CCValAssign> &RVLocs,
1032                           const SDLoc &DL, SelectionDAG &DAG,
1033                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1034                           SDValue ThisVal, bool RequiresSMChange) const;
1035 
1036   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1037   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1038   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1039   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1040 
1041   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1042   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1043 
1044   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1045 
1046   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1047   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1048   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1049 
1050   bool
1051   isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1052 
1053   /// Finds the incoming stack arguments which overlap the given fixed stack
1054   /// object and incorporates their load into the current chain. This prevents
1055   /// an upcoming store from clobbering the stack argument before it's used.
1056   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1057                               MachineFrameInfo &MFI, int ClobberedFI) const;
1058 
1059   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1060 
1061   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1062                            SDValue &Chain) const;
1063 
1064   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1065                       bool isVarArg,
1066                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1067                       LLVMContext &Context) const override;
1068 
1069   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1070                       const SmallVectorImpl<ISD::OutputArg> &Outs,
1071                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1072                       SelectionDAG &DAG) const override;
1073 
1074   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1075                         unsigned Flag) const;
1076   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1077                         unsigned Flag) const;
1078   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1079                         unsigned Flag) const;
1080   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1081                         unsigned Flag) const;
1082   SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1083                         unsigned Flag) const;
1084   template <class NodeTy>
1085   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1086   template <class NodeTy>
1087   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1088   template <class NodeTy>
1089   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1090   template <class NodeTy>
1091   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1092   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1093   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1094   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1095   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1096   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1097   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1098                                const SDLoc &DL, SelectionDAG &DAG) const;
1099   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1100                                  SelectionDAG &DAG) const;
1101   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1102   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1103   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1104   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1105   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1106   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1107   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1108                          SDValue TVal, SDValue FVal, const SDLoc &dl,
1109                          SelectionDAG &DAG) const;
1110   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1111   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1112   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1113   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1114   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1115   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1116   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1117   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1118   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1119   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1120   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1121   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1122   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123   SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1124   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1125   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1126   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1127   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1128   SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1129   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1130   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1131   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1132   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1133                               unsigned NewOp) const;
1134   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1135   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1136   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1137   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1138   SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1139   SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1140   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1141   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1142   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1143   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1144   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1145   SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1146   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1147   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1148   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1149   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1150   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1151   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1152   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1153   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1154   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1155   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1156   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1157   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1158   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1159   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1160   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1161   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1162   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1163   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1164   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1165   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1166   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1167   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1168   SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1169   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1170 
1171   SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1172 
1173   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1174                                                SelectionDAG &DAG) const;
1175   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1176                                                SelectionDAG &DAG) const;
1177   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1178   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1179   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1180   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1181   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1182                               SelectionDAG &DAG) const;
1183   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1184   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1185   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1186   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1187                                             SelectionDAG &DAG) const;
1188   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1189                                               SelectionDAG &DAG) const;
1190   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1191   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1192   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1193   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1194                                              SelectionDAG &DAG) const;
1195   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1196   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1197   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1198   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1199   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1200                                               SelectionDAG &DAG) const;
1201 
1202   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1203                         SmallVectorImpl<SDNode *> &Created) const override;
1204   SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1205                         SmallVectorImpl<SDNode *> &Created) const override;
1206   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1207                           int &ExtraSteps, bool &UseOneConst,
1208                           bool Reciprocal) const override;
1209   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1210                            int &ExtraSteps) const override;
1211   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1212                            const DenormalMode &Mode) const override;
1213   SDValue getSqrtResultForDenormInput(SDValue Operand,
1214                                       SelectionDAG &DAG) const override;
1215   unsigned combineRepeatedFPDivisors() const override;
1216 
1217   ConstraintType getConstraintType(StringRef Constraint) const override;
1218   Register getRegisterByName(const char* RegName, LLT VT,
1219                              const MachineFunction &MF) const override;
1220 
1221   /// Examine constraint string and operand type and determine a weight value.
1222   /// The operand object must already have been set up with the operand type.
1223   ConstraintWeight
1224   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1225                                  const char *constraint) const override;
1226 
1227   std::pair<unsigned, const TargetRegisterClass *>
1228   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1229                                StringRef Constraint, MVT VT) const override;
1230 
1231   const char *LowerXConstraint(EVT ConstraintVT) const override;
1232 
1233   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1234                                     std::vector<SDValue> &Ops,
1235                                     SelectionDAG &DAG) const override;
1236 
1237   InlineAsm::ConstraintCode
1238   getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1239     if (ConstraintCode == "Q")
1240       return InlineAsm::ConstraintCode::Q;
1241     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1242     //        followed by llvm_unreachable so we'll leave them unimplemented in
1243     //        the backend for now.
1244     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1245   }
1246 
1247   /// Handle Lowering flag assembly outputs.
1248   SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1249                                       const SDLoc &DL,
1250                                       const AsmOperandInfo &Constraint,
1251                                       SelectionDAG &DAG) const override;
1252 
1253   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1254   bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1255   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1256   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1257   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1258   bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1259                               SDValue &Offset, SelectionDAG &DAG) const;
1260   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1261                                  ISD::MemIndexedMode &AM,
1262                                  SelectionDAG &DAG) const override;
1263   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1264                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1265                                   SelectionDAG &DAG) const override;
1266   bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1267                        bool IsPre, MachineRegisterInfo &MRI) const override;
1268 
1269   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1270                           SelectionDAG &DAG) const override;
1271   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1272                              SelectionDAG &DAG) const;
1273   void ReplaceExtractSubVectorResults(SDNode *N,
1274                                       SmallVectorImpl<SDValue> &Results,
1275                                       SelectionDAG &DAG) const;
1276 
1277   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1278 
1279   void finalizeLowering(MachineFunction &MF) const override;
1280 
1281   bool shouldLocalize(const MachineInstr &MI,
1282                       const TargetTransformInfo *TTI) const override;
1283 
1284   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1285                                          const APInt &OriginalDemandedBits,
1286                                          const APInt &OriginalDemandedElts,
1287                                          KnownBits &Known,
1288                                          TargetLoweringOpt &TLO,
1289                                          unsigned Depth) const override;
1290 
1291   bool isTargetCanonicalConstantNode(SDValue Op) const override;
1292 
1293   // With the exception of data-predicate transitions, no instructions are
1294   // required to cast between legal scalable vector types. However:
1295   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1296   //     is not universally useable.
1297   //  2. Most unpacked integer types are not legal and thus integer extends
1298   //     cannot be used to convert between unpacked and packed types.
1299   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1300   // to transition between unpacked and packed types of the same element type,
1301   // with BITCAST used otherwise.
1302   // This function does not handle predicate bitcasts.
1303   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1304 
1305   // Returns the runtime value for PSTATE.SM by generating a call to
1306   // __arm_sme_state.
1307   SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1308                              EVT VT) const;
1309 
1310   bool preferScalarizeSplat(SDNode *N) const override;
1311 
1312   unsigned getMinimumJumpTableEntries() const override;
1313 };
1314 
1315 namespace AArch64 {
1316 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1317                          const TargetLibraryInfo *libInfo);
1318 } // end namespace AArch64
1319 
1320 } // end namespace llvm
1321 
1322 #endif
1323