xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision e9e8876a4d6afc1ad5315faaa191b25121a813d7)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   // Produces the full sequence of instructions for getting the thread pointer
59   // offset of a variable into X0, using the TLSDesc model.
60   TLSDESC_CALLSEQ,
61   ADRP,     // Page address of a TargetGlobalAddress operand.
62   ADR,      // ADR
63   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
64   LOADgot,  // Load from automatically generated descriptor (e.g. Global
65             // Offset Table, TLS record).
66   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67   BRCOND,   // Conditional branch instruction; "b.cond".
68   CSEL,
69   CSINV, // Conditional select invert.
70   CSNEG, // Conditional select negate.
71   CSINC, // Conditional select increment.
72 
73   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74   // ELF.
75   THREAD_POINTER,
76   ADC,
77   SBC, // adc, sbc instructions
78 
79   // Predicated instructions where inactive lanes produce undefined results.
80   ADD_PRED,
81   FADD_PRED,
82   FDIV_PRED,
83   FMA_PRED,
84   FMAXNM_PRED,
85   FMINNM_PRED,
86   FMAX_PRED,
87   FMIN_PRED,
88   FMUL_PRED,
89   FSUB_PRED,
90   MUL_PRED,
91   MULHS_PRED,
92   MULHU_PRED,
93   SDIV_PRED,
94   SHL_PRED,
95   SMAX_PRED,
96   SMIN_PRED,
97   SRA_PRED,
98   SRL_PRED,
99   SUB_PRED,
100   UDIV_PRED,
101   UMAX_PRED,
102   UMIN_PRED,
103 
104   // Unpredicated vector instructions
105   BIC,
106 
107   // Predicated instructions with the result of inactive lanes provided by the
108   // last operand.
109   FABS_MERGE_PASSTHRU,
110   FCEIL_MERGE_PASSTHRU,
111   FFLOOR_MERGE_PASSTHRU,
112   FNEARBYINT_MERGE_PASSTHRU,
113   FNEG_MERGE_PASSTHRU,
114   FRECPX_MERGE_PASSTHRU,
115   FRINT_MERGE_PASSTHRU,
116   FROUND_MERGE_PASSTHRU,
117   FROUNDEVEN_MERGE_PASSTHRU,
118   FSQRT_MERGE_PASSTHRU,
119   FTRUNC_MERGE_PASSTHRU,
120   FP_ROUND_MERGE_PASSTHRU,
121   FP_EXTEND_MERGE_PASSTHRU,
122   UINT_TO_FP_MERGE_PASSTHRU,
123   SINT_TO_FP_MERGE_PASSTHRU,
124   FCVTZU_MERGE_PASSTHRU,
125   FCVTZS_MERGE_PASSTHRU,
126   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
127   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
128   ABS_MERGE_PASSTHRU,
129   NEG_MERGE_PASSTHRU,
130 
131   SETCC_MERGE_ZERO,
132 
133   // Arithmetic instructions which write flags.
134   ADDS,
135   SUBS,
136   ADCS,
137   SBCS,
138   ANDS,
139 
140   // Conditional compares. Operands: left,right,falsecc,cc,flags
141   CCMP,
142   CCMN,
143   FCCMP,
144 
145   // Floating point comparison
146   FCMP,
147 
148   // Scalar extract
149   EXTR,
150 
151   // Scalar-to-vector duplication
152   DUP,
153   DUPLANE8,
154   DUPLANE16,
155   DUPLANE32,
156   DUPLANE64,
157 
158   // Vector immedate moves
159   MOVI,
160   MOVIshift,
161   MOVIedit,
162   MOVImsl,
163   FMOV,
164   MVNIshift,
165   MVNImsl,
166 
167   // Vector immediate ops
168   BICi,
169   ORRi,
170 
171   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
172   // element must be identical.
173   BSP,
174 
175   // Vector shuffles
176   ZIP1,
177   ZIP2,
178   UZP1,
179   UZP2,
180   TRN1,
181   TRN2,
182   REV16,
183   REV32,
184   REV64,
185   EXT,
186   SPLICE,
187 
188   // Vector shift by scalar
189   VSHL,
190   VLSHR,
191   VASHR,
192 
193   // Vector shift by scalar (again)
194   SQSHL_I,
195   UQSHL_I,
196   SQSHLU_I,
197   SRSHR_I,
198   URSHR_I,
199 
200   // Vector shift by constant and insert
201   VSLI,
202   VSRI,
203 
204   // Vector comparisons
205   CMEQ,
206   CMGE,
207   CMGT,
208   CMHI,
209   CMHS,
210   FCMEQ,
211   FCMGE,
212   FCMGT,
213 
214   // Vector zero comparisons
215   CMEQz,
216   CMGEz,
217   CMGTz,
218   CMLEz,
219   CMLTz,
220   FCMEQz,
221   FCMGEz,
222   FCMGTz,
223   FCMLEz,
224   FCMLTz,
225 
226   // Vector across-lanes addition
227   // Only the lower result lane is defined.
228   SADDV,
229   UADDV,
230 
231   // Vector halving addition
232   SHADD,
233   UHADD,
234 
235   // Vector rounding halving addition
236   SRHADD,
237   URHADD,
238 
239   // Unsigned Add Long Pairwise
240   UADDLP,
241 
242   // udot/sdot instructions
243   UDOT,
244   SDOT,
245 
246   // Vector across-lanes min/max
247   // Only the lower result lane is defined.
248   SMINV,
249   UMINV,
250   SMAXV,
251   UMAXV,
252 
253   SADDV_PRED,
254   UADDV_PRED,
255   SMAXV_PRED,
256   UMAXV_PRED,
257   SMINV_PRED,
258   UMINV_PRED,
259   ORV_PRED,
260   EORV_PRED,
261   ANDV_PRED,
262 
263   // Vector bitwise insertion
264   BIT,
265 
266   // Compare-and-branch
267   CBZ,
268   CBNZ,
269   TBZ,
270   TBNZ,
271 
272   // Tail calls
273   TC_RETURN,
274 
275   // Custom prefetch handling
276   PREFETCH,
277 
278   // {s|u}int to FP within a FP register.
279   SITOF,
280   UITOF,
281 
282   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
283   /// world w.r.t vectors; which causes additional REV instructions to be
284   /// generated to compensate for the byte-swapping. But sometimes we do
285   /// need to re-interpret the data in SIMD vector registers in big-endian
286   /// mode without emitting such REV instructions.
287   NVCAST,
288 
289   MRS, // MRS, also sets the flags via a glue.
290 
291   SMULL,
292   UMULL,
293 
294   // Reciprocal estimates and steps.
295   FRECPE,
296   FRECPS,
297   FRSQRTE,
298   FRSQRTS,
299 
300   SUNPKHI,
301   SUNPKLO,
302   UUNPKHI,
303   UUNPKLO,
304 
305   CLASTA_N,
306   CLASTB_N,
307   LASTA,
308   LASTB,
309   TBL,
310 
311   // Floating-point reductions.
312   FADDA_PRED,
313   FADDV_PRED,
314   FMAXV_PRED,
315   FMAXNMV_PRED,
316   FMINV_PRED,
317   FMINNMV_PRED,
318 
319   INSR,
320   PTEST,
321   PTRUE,
322 
323   BITREVERSE_MERGE_PASSTHRU,
324   BSWAP_MERGE_PASSTHRU,
325   CTLZ_MERGE_PASSTHRU,
326   CTPOP_MERGE_PASSTHRU,
327   DUP_MERGE_PASSTHRU,
328   INDEX_VECTOR,
329 
330   // Cast between vectors of the same element type but differ in length.
331   REINTERPRET_CAST,
332 
333   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
334   LS64_BUILD,
335   LS64_EXTRACT,
336 
337   LD1_MERGE_ZERO,
338   LD1S_MERGE_ZERO,
339   LDNF1_MERGE_ZERO,
340   LDNF1S_MERGE_ZERO,
341   LDFF1_MERGE_ZERO,
342   LDFF1S_MERGE_ZERO,
343   LD1RQ_MERGE_ZERO,
344   LD1RO_MERGE_ZERO,
345 
346   // Structured loads.
347   SVE_LD2_MERGE_ZERO,
348   SVE_LD3_MERGE_ZERO,
349   SVE_LD4_MERGE_ZERO,
350 
351   // Unsigned gather loads.
352   GLD1_MERGE_ZERO,
353   GLD1_SCALED_MERGE_ZERO,
354   GLD1_UXTW_MERGE_ZERO,
355   GLD1_SXTW_MERGE_ZERO,
356   GLD1_UXTW_SCALED_MERGE_ZERO,
357   GLD1_SXTW_SCALED_MERGE_ZERO,
358   GLD1_IMM_MERGE_ZERO,
359 
360   // Signed gather loads
361   GLD1S_MERGE_ZERO,
362   GLD1S_SCALED_MERGE_ZERO,
363   GLD1S_UXTW_MERGE_ZERO,
364   GLD1S_SXTW_MERGE_ZERO,
365   GLD1S_UXTW_SCALED_MERGE_ZERO,
366   GLD1S_SXTW_SCALED_MERGE_ZERO,
367   GLD1S_IMM_MERGE_ZERO,
368 
369   // Unsigned gather loads.
370   GLDFF1_MERGE_ZERO,
371   GLDFF1_SCALED_MERGE_ZERO,
372   GLDFF1_UXTW_MERGE_ZERO,
373   GLDFF1_SXTW_MERGE_ZERO,
374   GLDFF1_UXTW_SCALED_MERGE_ZERO,
375   GLDFF1_SXTW_SCALED_MERGE_ZERO,
376   GLDFF1_IMM_MERGE_ZERO,
377 
378   // Signed gather loads.
379   GLDFF1S_MERGE_ZERO,
380   GLDFF1S_SCALED_MERGE_ZERO,
381   GLDFF1S_UXTW_MERGE_ZERO,
382   GLDFF1S_SXTW_MERGE_ZERO,
383   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
384   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
385   GLDFF1S_IMM_MERGE_ZERO,
386 
387   // Non-temporal gather loads
388   GLDNT1_MERGE_ZERO,
389   GLDNT1_INDEX_MERGE_ZERO,
390   GLDNT1S_MERGE_ZERO,
391 
392   // Contiguous masked store.
393   ST1_PRED,
394 
395   // Scatter store
396   SST1_PRED,
397   SST1_SCALED_PRED,
398   SST1_UXTW_PRED,
399   SST1_SXTW_PRED,
400   SST1_UXTW_SCALED_PRED,
401   SST1_SXTW_SCALED_PRED,
402   SST1_IMM_PRED,
403 
404   // Non-temporal scatter store
405   SSTNT1_PRED,
406   SSTNT1_INDEX_PRED,
407 
408   // Strict (exception-raising) floating point comparison
409   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
410   STRICT_FCMPE,
411 
412   // NEON Load/Store with post-increment base updates
413   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
414   LD3post,
415   LD4post,
416   ST2post,
417   ST3post,
418   ST4post,
419   LD1x2post,
420   LD1x3post,
421   LD1x4post,
422   ST1x2post,
423   ST1x3post,
424   ST1x4post,
425   LD1DUPpost,
426   LD2DUPpost,
427   LD3DUPpost,
428   LD4DUPpost,
429   LD1LANEpost,
430   LD2LANEpost,
431   LD3LANEpost,
432   LD4LANEpost,
433   ST2LANEpost,
434   ST3LANEpost,
435   ST4LANEpost,
436 
437   STG,
438   STZG,
439   ST2G,
440   STZ2G,
441 
442   LDP,
443   STP,
444   STNP,
445 };
446 
447 } // end namespace AArch64ISD
448 
449 namespace {
450 
451 // Any instruction that defines a 32-bit result zeros out the high half of the
452 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
453 // be copying from a truncate. But any other 32-bit operation will zero-extend
454 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
455 // 32 bits, they're probably just qualifying a CopyFromReg.
456 static inline bool isDef32(const SDNode &N) {
457   unsigned Opc = N.getOpcode();
458   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
459          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
460          Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
461          Opc != ISD::FREEZE;
462 }
463 
464 } // end anonymous namespace
465 
466 namespace AArch64 {
467 /// Possible values of current rounding mode, which is specified in bits
468 /// 23:22 of FPCR.
469 enum Rounding {
470   RN = 0,    // Round to Nearest
471   RP = 1,    // Round towards Plus infinity
472   RM = 2,    // Round towards Minus infinity
473   RZ = 3,    // Round towards Zero
474   rmMask = 3 // Bit mask selecting rounding mode
475 };
476 
477 // Bit position of rounding mode bits in FPCR.
478 const unsigned RoundingBitsPos = 22;
479 } // namespace AArch64
480 
481 class AArch64Subtarget;
482 class AArch64TargetMachine;
483 
484 class AArch64TargetLowering : public TargetLowering {
485 public:
486   explicit AArch64TargetLowering(const TargetMachine &TM,
487                                  const AArch64Subtarget &STI);
488 
489   /// Selects the correct CCAssignFn for a given CallingConvention value.
490   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
491 
492   /// Selects the correct CCAssignFn for a given CallingConvention value.
493   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
494 
495   /// Determine which of the bits specified in Mask are known to be either zero
496   /// or one and return them in the KnownZero/KnownOne bitsets.
497   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
498                                      const APInt &DemandedElts,
499                                      const SelectionDAG &DAG,
500                                      unsigned Depth = 0) const override;
501 
502   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
503     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
504     // *DAG* representation of pointers will always be 64-bits. They will be
505     // truncated and extended when transferred to memory, but the 64-bit DAG
506     // allows us to use AArch64's addressing modes much more easily.
507     return MVT::getIntegerVT(64);
508   }
509 
510   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
511                                     const APInt &DemandedElts,
512                                     TargetLoweringOpt &TLO) const override;
513 
514   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
515 
516   /// Returns true if the target allows unaligned memory accesses of the
517   /// specified type.
518   bool allowsMisalignedMemoryAccesses(
519       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
520       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
521       bool *Fast = nullptr) const override;
522   /// LLT variant.
523   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
524                                       Align Alignment,
525                                       MachineMemOperand::Flags Flags,
526                                       bool *Fast = nullptr) const override;
527 
528   /// Provide custom lowering hooks for some operations.
529   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
530 
531   const char *getTargetNodeName(unsigned Opcode) const override;
532 
533   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
534 
535   /// This method returns a target specific FastISel object, or null if the
536   /// target does not support "fast" ISel.
537   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
538                            const TargetLibraryInfo *libInfo) const override;
539 
540   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
541 
542   bool isFPImmLegal(const APFloat &Imm, EVT VT,
543                     bool ForCodeSize) const override;
544 
545   /// Return true if the given shuffle mask can be codegen'd directly, or if it
546   /// should be stack expanded.
547   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
548 
549   /// Return the ISD::SETCC ValueType.
550   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
551                          EVT VT) const override;
552 
553   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
554 
555   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
556                                   MachineBasicBlock *BB) const;
557 
558   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
559                                            MachineBasicBlock *BB) const;
560 
561   MachineBasicBlock *
562   EmitInstrWithCustomInserter(MachineInstr &MI,
563                               MachineBasicBlock *MBB) const override;
564 
565   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
566                           MachineFunction &MF,
567                           unsigned Intrinsic) const override;
568 
569   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
570                              EVT NewVT) const override;
571 
572   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
573   bool isTruncateFree(EVT VT1, EVT VT2) const override;
574 
575   bool isProfitableToHoist(Instruction *I) const override;
576 
577   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
578   bool isZExtFree(EVT VT1, EVT VT2) const override;
579   bool isZExtFree(SDValue Val, EVT VT2) const override;
580 
581   bool shouldSinkOperands(Instruction *I,
582                           SmallVectorImpl<Use *> &Ops) const override;
583 
584   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
585 
586   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
587 
588   bool lowerInterleavedLoad(LoadInst *LI,
589                             ArrayRef<ShuffleVectorInst *> Shuffles,
590                             ArrayRef<unsigned> Indices,
591                             unsigned Factor) const override;
592   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
593                              unsigned Factor) const override;
594 
595   bool isLegalAddImmediate(int64_t) const override;
596   bool isLegalICmpImmediate(int64_t) const override;
597 
598   bool shouldConsiderGEPOffsetSplit() const override;
599 
600   EVT getOptimalMemOpType(const MemOp &Op,
601                           const AttributeList &FuncAttributes) const override;
602 
603   LLT getOptimalMemOpLLT(const MemOp &Op,
604                          const AttributeList &FuncAttributes) const override;
605 
606   /// Return true if the addressing mode represented by AM is legal for this
607   /// target, for a load/store of the specified type.
608   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
609                              unsigned AS,
610                              Instruction *I = nullptr) const override;
611 
612   /// Return the cost of the scaling factor used in the addressing
613   /// mode represented by AM for this target, for a load/store
614   /// of the specified type.
615   /// If the AM is supported, the return value must be >= 0.
616   /// If the AM is not supported, it returns a negative value.
617   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
618                                        Type *Ty, unsigned AS) const override;
619 
620   /// Return true if an FMA operation is faster than a pair of fmul and fadd
621   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
622   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
623   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
624                                   EVT VT) const override;
625   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
626 
627   bool generateFMAsInMachineCombiner(EVT VT,
628                                      CodeGenOpt::Level OptLevel) const override;
629 
630   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
631 
632   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
633   bool isDesirableToCommuteWithShift(const SDNode *N,
634                                      CombineLevel Level) const override;
635 
636   /// Returns true if it is beneficial to convert a load of a constant
637   /// to just the constant itself.
638   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
639                                          Type *Ty) const override;
640 
641   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
642   /// with this index.
643   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
644                                unsigned Index) const override;
645 
646   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
647                             bool MathUsed) const override {
648     // Using overflow ops for overflow checks only should beneficial on
649     // AArch64.
650     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
651   }
652 
653   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
654                         AtomicOrdering Ord) const override;
655   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
656                               AtomicOrdering Ord) const override;
657 
658   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
659 
660   TargetLoweringBase::AtomicExpansionKind
661   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
662   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
663   TargetLoweringBase::AtomicExpansionKind
664   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
665 
666   TargetLoweringBase::AtomicExpansionKind
667   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
668 
669   bool useLoadStackGuardNode() const override;
670   TargetLoweringBase::LegalizeTypeAction
671   getPreferredVectorAction(MVT VT) const override;
672 
673   /// If the target has a standard location for the stack protector cookie,
674   /// returns the address of that location. Otherwise, returns nullptr.
675   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
676 
677   void insertSSPDeclarations(Module &M) const override;
678   Value *getSDagStackGuard(const Module &M) const override;
679   Function *getSSPStackGuardCheck(const Module &M) const override;
680 
681   /// If the target has a standard location for the unsafe stack pointer,
682   /// returns the address of that location. Otherwise, returns nullptr.
683   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
684 
685   /// If a physical register, this returns the register that receives the
686   /// exception address on entry to an EH pad.
687   Register
688   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
689     // FIXME: This is a guess. Has this been defined yet?
690     return AArch64::X0;
691   }
692 
693   /// If a physical register, this returns the register that receives the
694   /// exception typeid on entry to a landing pad.
695   Register
696   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
697     // FIXME: This is a guess. Has this been defined yet?
698     return AArch64::X1;
699   }
700 
701   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
702 
703   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
704                         const SelectionDAG &DAG) const override {
705     // Do not merge to float value size (128 bytes) if no implicit
706     // float attribute is set.
707 
708     bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
709         Attribute::NoImplicitFloat);
710 
711     if (NoFloat)
712       return (MemVT.getSizeInBits() <= 64);
713     return true;
714   }
715 
716   bool isCheapToSpeculateCttz() const override {
717     return true;
718   }
719 
720   bool isCheapToSpeculateCtlz() const override {
721     return true;
722   }
723 
724   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
725 
726   bool hasAndNotCompare(SDValue V) const override {
727     // We can use bics for any scalar.
728     return V.getValueType().isScalarInteger();
729   }
730 
731   bool hasAndNot(SDValue Y) const override {
732     EVT VT = Y.getValueType();
733 
734     if (!VT.isVector())
735       return hasAndNotCompare(Y);
736 
737     return VT.getSizeInBits() >= 64; // vector 'bic'
738   }
739 
740   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
741       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
742       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
743       SelectionDAG &DAG) const override;
744 
745   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
746 
747   bool shouldTransformSignedTruncationCheck(EVT XVT,
748                                             unsigned KeptBits) const override {
749     // For vectors, we don't have a preference..
750     if (XVT.isVector())
751       return false;
752 
753     auto VTIsOk = [](EVT VT) -> bool {
754       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
755              VT == MVT::i64;
756     };
757 
758     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
759     // XVT will be larger than KeptBitsVT.
760     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
761     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
762   }
763 
764   bool preferIncOfAddToSubOfNot(EVT VT) const override;
765 
766   bool hasBitPreservingFPLogic(EVT VT) const override {
767     // FIXME: Is this always true? It should be true for vectors at least.
768     return VT == MVT::f32 || VT == MVT::f64;
769   }
770 
771   bool supportSplitCSR(MachineFunction *MF) const override {
772     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
773            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
774   }
775   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
776   void insertCopiesSplitCSR(
777       MachineBasicBlock *Entry,
778       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
779 
780   bool supportSwiftError() const override {
781     return true;
782   }
783 
784   /// Enable aggressive FMA fusion on targets that want it.
785   bool enableAggressiveFMAFusion(EVT VT) const override;
786 
787   /// Returns the size of the platform's va_list object.
788   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
789 
790   /// Returns true if \p VecTy is a legal interleaved access type. This
791   /// function checks the vector element type and the overall width of the
792   /// vector.
793   bool isLegalInterleavedAccessType(VectorType *VecTy,
794                                     const DataLayout &DL) const;
795 
796   /// Returns the number of interleaved accesses that will be generated when
797   /// lowering accesses of the given type.
798   unsigned getNumInterleavedAccesses(VectorType *VecTy,
799                                      const DataLayout &DL) const;
800 
801   MachineMemOperand::Flags getTargetMMOFlags(
802     const Instruction &I) const override;
803 
804   bool functionArgumentNeedsConsecutiveRegisters(
805       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
806       const DataLayout &DL) const override;
807 
808   /// Used for exception handling on Win64.
809   bool needsFixedCatchObjects() const override;
810 
811   bool fallBackToDAGISel(const Instruction &Inst) const override;
812 
813   /// SVE code generation for fixed length vectors does not custom lower
814   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
815   /// merge. However, merging them creates a BUILD_VECTOR that is just as
816   /// illegal as the original, thus leading to an infinite legalisation loop.
817   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
818   /// vector types this override can be removed.
819   bool mergeStoresAfterLegalization(EVT VT) const override;
820 
821   // If the platform/function should have a redzone, return the size in bytes.
822   unsigned getRedZoneSize(const Function &F) const {
823     if (F.hasFnAttribute(Attribute::NoRedZone))
824       return 0;
825     return 128;
826   }
827 
828   bool isAllActivePredicate(SDValue N) const;
829   EVT getPromotedVTForPredicate(EVT VT) const;
830 
831   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
832                              bool AllowUnknown = false) const override;
833 
834 private:
835   /// Keep a pointer to the AArch64Subtarget around so that we can
836   /// make the right decision when generating code for different targets.
837   const AArch64Subtarget *Subtarget;
838 
839   bool isExtFreeImpl(const Instruction *Ext) const override;
840 
841   void addTypeForNEON(MVT VT);
842   void addTypeForFixedLengthSVE(MVT VT);
843   void addDRTypeForNEON(MVT VT);
844   void addQRTypeForNEON(MVT VT);
845 
846   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
847                                bool isVarArg,
848                                const SmallVectorImpl<ISD::InputArg> &Ins,
849                                const SDLoc &DL, SelectionDAG &DAG,
850                                SmallVectorImpl<SDValue> &InVals) const override;
851 
852   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
853                     SmallVectorImpl<SDValue> &InVals) const override;
854 
855   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
856                           CallingConv::ID CallConv, bool isVarArg,
857                           const SmallVectorImpl<ISD::InputArg> &Ins,
858                           const SDLoc &DL, SelectionDAG &DAG,
859                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
860                           SDValue ThisVal) const;
861 
862   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
863   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
864   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
865 
866   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
867   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
868 
869   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
870 
871   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
872 
873   bool isEligibleForTailCallOptimization(
874       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
875       const SmallVectorImpl<ISD::OutputArg> &Outs,
876       const SmallVectorImpl<SDValue> &OutVals,
877       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
878 
879   /// Finds the incoming stack arguments which overlap the given fixed stack
880   /// object and incorporates their load into the current chain. This prevents
881   /// an upcoming store from clobbering the stack argument before it's used.
882   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
883                               MachineFrameInfo &MFI, int ClobberedFI) const;
884 
885   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
886 
887   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
888                            SDValue &Chain) const;
889 
890   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
891                       bool isVarArg,
892                       const SmallVectorImpl<ISD::OutputArg> &Outs,
893                       LLVMContext &Context) const override;
894 
895   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
896                       const SmallVectorImpl<ISD::OutputArg> &Outs,
897                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
898                       SelectionDAG &DAG) const override;
899 
900   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
901                         unsigned Flag) const;
902   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
903                         unsigned Flag) const;
904   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
905                         unsigned Flag) const;
906   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
907                         unsigned Flag) const;
908   template <class NodeTy>
909   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
910   template <class NodeTy>
911   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
912   template <class NodeTy>
913   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
914   template <class NodeTy>
915   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
916   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
917   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
918   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
919   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
920   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
921   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
922                                const SDLoc &DL, SelectionDAG &DAG) const;
923   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
924                                  SelectionDAG &DAG) const;
925   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
926   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
927   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
928   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
929   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
930   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
931                          SDValue TVal, SDValue FVal, const SDLoc &dl,
932                          SelectionDAG &DAG) const;
933   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
934   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
935   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
936   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
937   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
938   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
939   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
940   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
941   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
942   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
943   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
944   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
946   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
947   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
948   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
949   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
950   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
951   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
952   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
953   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
956                               bool OverrideNEON = false) const;
957   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
959   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
960   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
961   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
962   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
966   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
967   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
970   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
972   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
980   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
981   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
983   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
984   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
985   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
986   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
989                                          SDValue &Size,
990                                          SelectionDAG &DAG) const;
991   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
992                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
993 
994   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
995                                                SelectionDAG &DAG) const;
996   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
997                                                SelectionDAG &DAG) const;
998   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
999   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1000   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1001   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1002   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1003                               SelectionDAG &DAG) const;
1004   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1005   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1006   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1007   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1008                                             SelectionDAG &DAG) const;
1009   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1010                                               SelectionDAG &DAG) const;
1011   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1012   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1013   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1014   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1015                                              SelectionDAG &DAG) const;
1016   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1018   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1019   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1020   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1021                                               SelectionDAG &DAG) const;
1022 
1023   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1024                         SmallVectorImpl<SDNode *> &Created) const override;
1025   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1026                           int &ExtraSteps, bool &UseOneConst,
1027                           bool Reciprocal) const override;
1028   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1029                            int &ExtraSteps) const override;
1030   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1031                            const DenormalMode &Mode) const override;
1032   SDValue getSqrtResultForDenormInput(SDValue Operand,
1033                                       SelectionDAG &DAG) const override;
1034   unsigned combineRepeatedFPDivisors() const override;
1035 
1036   ConstraintType getConstraintType(StringRef Constraint) const override;
1037   Register getRegisterByName(const char* RegName, LLT VT,
1038                              const MachineFunction &MF) const override;
1039 
1040   /// Examine constraint string and operand type and determine a weight value.
1041   /// The operand object must already have been set up with the operand type.
1042   ConstraintWeight
1043   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1044                                  const char *constraint) const override;
1045 
1046   std::pair<unsigned, const TargetRegisterClass *>
1047   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1048                                StringRef Constraint, MVT VT) const override;
1049 
1050   const char *LowerXConstraint(EVT ConstraintVT) const override;
1051 
1052   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1053                                     std::vector<SDValue> &Ops,
1054                                     SelectionDAG &DAG) const override;
1055 
1056   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1057     if (ConstraintCode == "Q")
1058       return InlineAsm::Constraint_Q;
1059     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1060     //        followed by llvm_unreachable so we'll leave them unimplemented in
1061     //        the backend for now.
1062     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1063   }
1064 
1065   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1066   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1067   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1068   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1069   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1070   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1071                               ISD::MemIndexedMode &AM, bool &IsInc,
1072                               SelectionDAG &DAG) const;
1073   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1074                                  ISD::MemIndexedMode &AM,
1075                                  SelectionDAG &DAG) const override;
1076   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1077                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1078                                   SelectionDAG &DAG) const override;
1079 
1080   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1081                           SelectionDAG &DAG) const override;
1082   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1083                              SelectionDAG &DAG) const;
1084   void ReplaceExtractSubVectorResults(SDNode *N,
1085                                       SmallVectorImpl<SDValue> &Results,
1086                                       SelectionDAG &DAG) const;
1087 
1088   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1089 
1090   void finalizeLowering(MachineFunction &MF) const override;
1091 
1092   bool shouldLocalize(const MachineInstr &MI,
1093                       const TargetTransformInfo *TTI) const override;
1094 
1095   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1096                                          const APInt &OriginalDemandedBits,
1097                                          const APInt &OriginalDemandedElts,
1098                                          KnownBits &Known,
1099                                          TargetLoweringOpt &TLO,
1100                                          unsigned Depth) const override;
1101 
1102   // Normally SVE is only used for byte size vectors that do not fit within a
1103   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1104   // used for 64bit and 128bit vectors as well.
1105   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1106 
1107   // With the exception of data-predicate transitions, no instructions are
1108   // required to cast between legal scalable vector types. However:
1109   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1110   //     is not universally useable.
1111   //  2. Most unpacked integer types are not legal and thus integer extends
1112   //     cannot be used to convert between unpacked and packed types.
1113   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1114   // to transition between unpacked and packed types of the same element type,
1115   // with BITCAST used otherwise.
1116   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1117 
1118   bool isConstantUnsignedBitfieldExtactLegal(unsigned Opc, LLT Ty1,
1119                                              LLT Ty2) const override;
1120 };
1121 
1122 namespace AArch64 {
1123 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1124                          const TargetLibraryInfo *libInfo);
1125 } // end namespace AArch64
1126 
1127 } // end namespace llvm
1128 
1129 #endif
1130