xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision d56accc7c3dcc897489b6a07834763a03b9f3d68)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/IR/Instruction.h"
24 
25 namespace llvm {
26 
27 namespace AArch64ISD {
28 
29 // For predicated nodes where the result is a vector, the operation is
30 // controlled by a governing predicate and the inactive lanes are explicitly
31 // defined with a value, please stick the following naming convention:
32 //
33 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
34 //                        to source operand OP<n>.
35 //
36 //    _MERGE_ZERO         The result value is a vector with inactive lanes
37 //                        actively zeroed.
38 //
39 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
40 //                        to the last source operand which only purpose is being
41 //                        a passthru value.
42 //
43 // For other cases where no explicit action is needed to set the inactive lanes,
44 // or when the result is not a vector and it is needed or helpful to
45 // distinguish a node from similar unpredicated nodes, use:
46 //
47 //    _PRED
48 //
49 enum NodeType : unsigned {
50   FIRST_NUMBER = ISD::BUILTIN_OP_END,
51   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
52   CALL,         // Function call.
53 
54   // Pseudo for a OBJC call that gets emitted together with a special `mov
55   // x29, x29` marker instruction.
56   CALL_RVMARKER,
57 
58   // Produces the full sequence of instructions for getting the thread pointer
59   // offset of a variable into X0, using the TLSDesc model.
60   TLSDESC_CALLSEQ,
61   ADRP,     // Page address of a TargetGlobalAddress operand.
62   ADR,      // ADR
63   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
64   LOADgot,  // Load from automatically generated descriptor (e.g. Global
65             // Offset Table, TLS record).
66   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
67   BRCOND,   // Conditional branch instruction; "b.cond".
68   CSEL,
69   CSINV, // Conditional select invert.
70   CSNEG, // Conditional select negate.
71   CSINC, // Conditional select increment.
72 
73   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
74   // ELF.
75   THREAD_POINTER,
76   ADC,
77   SBC, // adc, sbc instructions
78 
79   // Predicated instructions where inactive lanes produce undefined results.
80   ABDS_PRED,
81   ABDU_PRED,
82   ADD_PRED,
83   FADD_PRED,
84   FDIV_PRED,
85   FMA_PRED,
86   FMAX_PRED,
87   FMAXNM_PRED,
88   FMIN_PRED,
89   FMINNM_PRED,
90   FMUL_PRED,
91   FSUB_PRED,
92   MUL_PRED,
93   MULHS_PRED,
94   MULHU_PRED,
95   SDIV_PRED,
96   SHL_PRED,
97   SMAX_PRED,
98   SMIN_PRED,
99   SRA_PRED,
100   SRL_PRED,
101   SUB_PRED,
102   UDIV_PRED,
103   UMAX_PRED,
104   UMIN_PRED,
105 
106   // Unpredicated vector instructions
107   BIC,
108 
109   SRAD_MERGE_OP1,
110 
111   // Predicated instructions with the result of inactive lanes provided by the
112   // last operand.
113   FABS_MERGE_PASSTHRU,
114   FCEIL_MERGE_PASSTHRU,
115   FFLOOR_MERGE_PASSTHRU,
116   FNEARBYINT_MERGE_PASSTHRU,
117   FNEG_MERGE_PASSTHRU,
118   FRECPX_MERGE_PASSTHRU,
119   FRINT_MERGE_PASSTHRU,
120   FROUND_MERGE_PASSTHRU,
121   FROUNDEVEN_MERGE_PASSTHRU,
122   FSQRT_MERGE_PASSTHRU,
123   FTRUNC_MERGE_PASSTHRU,
124   FP_ROUND_MERGE_PASSTHRU,
125   FP_EXTEND_MERGE_PASSTHRU,
126   UINT_TO_FP_MERGE_PASSTHRU,
127   SINT_TO_FP_MERGE_PASSTHRU,
128   FCVTZU_MERGE_PASSTHRU,
129   FCVTZS_MERGE_PASSTHRU,
130   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
131   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
132   ABS_MERGE_PASSTHRU,
133   NEG_MERGE_PASSTHRU,
134 
135   SETCC_MERGE_ZERO,
136 
137   // Arithmetic instructions which write flags.
138   ADDS,
139   SUBS,
140   ADCS,
141   SBCS,
142   ANDS,
143 
144   // Conditional compares. Operands: left,right,falsecc,cc,flags
145   CCMP,
146   CCMN,
147   FCCMP,
148 
149   // Floating point comparison
150   FCMP,
151 
152   // Scalar extract
153   EXTR,
154 
155   // Scalar-to-vector duplication
156   DUP,
157   DUPLANE8,
158   DUPLANE16,
159   DUPLANE32,
160   DUPLANE64,
161 
162   // Vector immedate moves
163   MOVI,
164   MOVIshift,
165   MOVIedit,
166   MOVImsl,
167   FMOV,
168   MVNIshift,
169   MVNImsl,
170 
171   // Vector immediate ops
172   BICi,
173   ORRi,
174 
175   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
176   // element must be identical.
177   BSP,
178 
179   // Vector shuffles
180   ZIP1,
181   ZIP2,
182   UZP1,
183   UZP2,
184   TRN1,
185   TRN2,
186   REV16,
187   REV32,
188   REV64,
189   EXT,
190   SPLICE,
191 
192   // Vector shift by scalar
193   VSHL,
194   VLSHR,
195   VASHR,
196 
197   // Vector shift by scalar (again)
198   SQSHL_I,
199   UQSHL_I,
200   SQSHLU_I,
201   SRSHR_I,
202   URSHR_I,
203 
204   // Vector shift by constant and insert
205   VSLI,
206   VSRI,
207 
208   // Vector comparisons
209   CMEQ,
210   CMGE,
211   CMGT,
212   CMHI,
213   CMHS,
214   FCMEQ,
215   FCMGE,
216   FCMGT,
217 
218   // Vector zero comparisons
219   CMEQz,
220   CMGEz,
221   CMGTz,
222   CMLEz,
223   CMLTz,
224   FCMEQz,
225   FCMGEz,
226   FCMGTz,
227   FCMLEz,
228   FCMLTz,
229 
230   // Vector across-lanes addition
231   // Only the lower result lane is defined.
232   SADDV,
233   UADDV,
234 
235   // Vector halving addition
236   SHADD,
237   UHADD,
238 
239   // Vector rounding halving addition
240   SRHADD,
241   URHADD,
242 
243   // Unsigned Add Long Pairwise
244   UADDLP,
245 
246   // udot/sdot instructions
247   UDOT,
248   SDOT,
249 
250   // Vector across-lanes min/max
251   // Only the lower result lane is defined.
252   SMINV,
253   UMINV,
254   SMAXV,
255   UMAXV,
256 
257   SADDV_PRED,
258   UADDV_PRED,
259   SMAXV_PRED,
260   UMAXV_PRED,
261   SMINV_PRED,
262   UMINV_PRED,
263   ORV_PRED,
264   EORV_PRED,
265   ANDV_PRED,
266 
267   // Vector bitwise insertion
268   BIT,
269 
270   // Compare-and-branch
271   CBZ,
272   CBNZ,
273   TBZ,
274   TBNZ,
275 
276   // Tail calls
277   TC_RETURN,
278 
279   // Custom prefetch handling
280   PREFETCH,
281 
282   // {s|u}int to FP within a FP register.
283   SITOF,
284   UITOF,
285 
286   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
287   /// world w.r.t vectors; which causes additional REV instructions to be
288   /// generated to compensate for the byte-swapping. But sometimes we do
289   /// need to re-interpret the data in SIMD vector registers in big-endian
290   /// mode without emitting such REV instructions.
291   NVCAST,
292 
293   MRS, // MRS, also sets the flags via a glue.
294 
295   SMULL,
296   UMULL,
297 
298   // Reciprocal estimates and steps.
299   FRECPE,
300   FRECPS,
301   FRSQRTE,
302   FRSQRTS,
303 
304   SUNPKHI,
305   SUNPKLO,
306   UUNPKHI,
307   UUNPKLO,
308 
309   CLASTA_N,
310   CLASTB_N,
311   LASTA,
312   LASTB,
313   TBL,
314 
315   // Floating-point reductions.
316   FADDA_PRED,
317   FADDV_PRED,
318   FMAXV_PRED,
319   FMAXNMV_PRED,
320   FMINV_PRED,
321   FMINNMV_PRED,
322 
323   INSR,
324   PTEST,
325   PTRUE,
326 
327   BITREVERSE_MERGE_PASSTHRU,
328   BSWAP_MERGE_PASSTHRU,
329   REVH_MERGE_PASSTHRU,
330   REVW_MERGE_PASSTHRU,
331   CTLZ_MERGE_PASSTHRU,
332   CTPOP_MERGE_PASSTHRU,
333   DUP_MERGE_PASSTHRU,
334   INDEX_VECTOR,
335 
336   // Cast between vectors of the same element type but differ in length.
337   REINTERPRET_CAST,
338 
339   // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
340   LS64_BUILD,
341   LS64_EXTRACT,
342 
343   LD1_MERGE_ZERO,
344   LD1S_MERGE_ZERO,
345   LDNF1_MERGE_ZERO,
346   LDNF1S_MERGE_ZERO,
347   LDFF1_MERGE_ZERO,
348   LDFF1S_MERGE_ZERO,
349   LD1RQ_MERGE_ZERO,
350   LD1RO_MERGE_ZERO,
351 
352   // Structured loads.
353   SVE_LD2_MERGE_ZERO,
354   SVE_LD3_MERGE_ZERO,
355   SVE_LD4_MERGE_ZERO,
356 
357   // Unsigned gather loads.
358   GLD1_MERGE_ZERO,
359   GLD1_SCALED_MERGE_ZERO,
360   GLD1_UXTW_MERGE_ZERO,
361   GLD1_SXTW_MERGE_ZERO,
362   GLD1_UXTW_SCALED_MERGE_ZERO,
363   GLD1_SXTW_SCALED_MERGE_ZERO,
364   GLD1_IMM_MERGE_ZERO,
365 
366   // Signed gather loads
367   GLD1S_MERGE_ZERO,
368   GLD1S_SCALED_MERGE_ZERO,
369   GLD1S_UXTW_MERGE_ZERO,
370   GLD1S_SXTW_MERGE_ZERO,
371   GLD1S_UXTW_SCALED_MERGE_ZERO,
372   GLD1S_SXTW_SCALED_MERGE_ZERO,
373   GLD1S_IMM_MERGE_ZERO,
374 
375   // Unsigned gather loads.
376   GLDFF1_MERGE_ZERO,
377   GLDFF1_SCALED_MERGE_ZERO,
378   GLDFF1_UXTW_MERGE_ZERO,
379   GLDFF1_SXTW_MERGE_ZERO,
380   GLDFF1_UXTW_SCALED_MERGE_ZERO,
381   GLDFF1_SXTW_SCALED_MERGE_ZERO,
382   GLDFF1_IMM_MERGE_ZERO,
383 
384   // Signed gather loads.
385   GLDFF1S_MERGE_ZERO,
386   GLDFF1S_SCALED_MERGE_ZERO,
387   GLDFF1S_UXTW_MERGE_ZERO,
388   GLDFF1S_SXTW_MERGE_ZERO,
389   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
390   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
391   GLDFF1S_IMM_MERGE_ZERO,
392 
393   // Non-temporal gather loads
394   GLDNT1_MERGE_ZERO,
395   GLDNT1_INDEX_MERGE_ZERO,
396   GLDNT1S_MERGE_ZERO,
397 
398   // Contiguous masked store.
399   ST1_PRED,
400 
401   // Scatter store
402   SST1_PRED,
403   SST1_SCALED_PRED,
404   SST1_UXTW_PRED,
405   SST1_SXTW_PRED,
406   SST1_UXTW_SCALED_PRED,
407   SST1_SXTW_SCALED_PRED,
408   SST1_IMM_PRED,
409 
410   // Non-temporal scatter store
411   SSTNT1_PRED,
412   SSTNT1_INDEX_PRED,
413 
414   // Asserts that a function argument (i32) is zero-extended to i8 by
415   // the caller
416   ASSERT_ZEXT_BOOL,
417 
418   // Strict (exception-raising) floating point comparison
419   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
420   STRICT_FCMPE,
421 
422   // NEON Load/Store with post-increment base updates
423   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
424   LD3post,
425   LD4post,
426   ST2post,
427   ST3post,
428   ST4post,
429   LD1x2post,
430   LD1x3post,
431   LD1x4post,
432   ST1x2post,
433   ST1x3post,
434   ST1x4post,
435   LD1DUPpost,
436   LD2DUPpost,
437   LD3DUPpost,
438   LD4DUPpost,
439   LD1LANEpost,
440   LD2LANEpost,
441   LD3LANEpost,
442   LD4LANEpost,
443   ST2LANEpost,
444   ST3LANEpost,
445   ST4LANEpost,
446 
447   STG,
448   STZG,
449   ST2G,
450   STZ2G,
451 
452   LDP,
453   STP,
454   STNP,
455 
456   // Memory Operations
457   MOPS_MEMSET,
458   MOPS_MEMSET_TAGGING,
459   MOPS_MEMCOPY,
460   MOPS_MEMMOVE,
461 };
462 
463 } // end namespace AArch64ISD
464 
465 namespace {
466 
467 // Any instruction that defines a 32-bit result zeros out the high half of the
468 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
469 // be copying from a truncate. But any other 32-bit operation will zero-extend
470 // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
471 // 32 bits, they're probably just qualifying a CopyFromReg.
472 static inline bool isDef32(const SDNode &N) {
473   unsigned Opc = N.getOpcode();
474   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
475          Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
476          Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
477          Opc != ISD::FREEZE;
478 }
479 
480 } // end anonymous namespace
481 
482 namespace AArch64 {
483 /// Possible values of current rounding mode, which is specified in bits
484 /// 23:22 of FPCR.
485 enum Rounding {
486   RN = 0,    // Round to Nearest
487   RP = 1,    // Round towards Plus infinity
488   RM = 2,    // Round towards Minus infinity
489   RZ = 3,    // Round towards Zero
490   rmMask = 3 // Bit mask selecting rounding mode
491 };
492 
493 // Bit position of rounding mode bits in FPCR.
494 const unsigned RoundingBitsPos = 22;
495 } // namespace AArch64
496 
497 class AArch64Subtarget;
498 
499 class AArch64TargetLowering : public TargetLowering {
500 public:
501   explicit AArch64TargetLowering(const TargetMachine &TM,
502                                  const AArch64Subtarget &STI);
503 
504   /// Selects the correct CCAssignFn for a given CallingConvention value.
505   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
506 
507   /// Selects the correct CCAssignFn for a given CallingConvention value.
508   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
509 
510   /// Determine which of the bits specified in Mask are known to be either zero
511   /// or one and return them in the KnownZero/KnownOne bitsets.
512   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
513                                      const APInt &DemandedElts,
514                                      const SelectionDAG &DAG,
515                                      unsigned Depth = 0) const override;
516 
517   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
518     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
519     // *DAG* representation of pointers will always be 64-bits. They will be
520     // truncated and extended when transferred to memory, but the 64-bit DAG
521     // allows us to use AArch64's addressing modes much more easily.
522     return MVT::getIntegerVT(64);
523   }
524 
525   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
526                                     const APInt &DemandedElts,
527                                     TargetLoweringOpt &TLO) const override;
528 
529   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
530 
531   /// Returns true if the target allows unaligned memory accesses of the
532   /// specified type.
533   bool allowsMisalignedMemoryAccesses(
534       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
535       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
536       bool *Fast = nullptr) const override;
537   /// LLT variant.
538   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
539                                       Align Alignment,
540                                       MachineMemOperand::Flags Flags,
541                                       bool *Fast = nullptr) const override;
542 
543   /// Provide custom lowering hooks for some operations.
544   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
545 
546   const char *getTargetNodeName(unsigned Opcode) const override;
547 
548   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
549 
550   /// This method returns a target specific FastISel object, or null if the
551   /// target does not support "fast" ISel.
552   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
553                            const TargetLibraryInfo *libInfo) const override;
554 
555   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
556 
557   bool isFPImmLegal(const APFloat &Imm, EVT VT,
558                     bool ForCodeSize) const override;
559 
560   /// Return true if the given shuffle mask can be codegen'd directly, or if it
561   /// should be stack expanded.
562   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
563 
564   /// Return the ISD::SETCC ValueType.
565   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
566                          EVT VT) const override;
567 
568   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
569 
570   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
571                                   MachineBasicBlock *BB) const;
572 
573   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
574                                            MachineBasicBlock *BB) const;
575 
576   MachineBasicBlock *
577   EmitInstrWithCustomInserter(MachineInstr &MI,
578                               MachineBasicBlock *MBB) const override;
579 
580   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
581                           MachineFunction &MF,
582                           unsigned Intrinsic) const override;
583 
584   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
585                              EVT NewVT) const override;
586 
587   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
588   bool isTruncateFree(EVT VT1, EVT VT2) const override;
589 
590   bool isProfitableToHoist(Instruction *I) const override;
591 
592   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
593   bool isZExtFree(EVT VT1, EVT VT2) const override;
594   bool isZExtFree(SDValue Val, EVT VT2) const override;
595 
596   bool shouldSinkOperands(Instruction *I,
597                           SmallVectorImpl<Use *> &Ops) const override;
598 
599   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
600 
601   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
602 
603   bool lowerInterleavedLoad(LoadInst *LI,
604                             ArrayRef<ShuffleVectorInst *> Shuffles,
605                             ArrayRef<unsigned> Indices,
606                             unsigned Factor) const override;
607   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
608                              unsigned Factor) const override;
609 
610   bool isLegalAddImmediate(int64_t) const override;
611   bool isLegalICmpImmediate(int64_t) const override;
612 
613   bool isMulAddWithConstProfitable(const SDValue &AddNode,
614                                    const SDValue &ConstNode) const override;
615 
616   bool shouldConsiderGEPOffsetSplit() const override;
617 
618   EVT getOptimalMemOpType(const MemOp &Op,
619                           const AttributeList &FuncAttributes) const override;
620 
621   LLT getOptimalMemOpLLT(const MemOp &Op,
622                          const AttributeList &FuncAttributes) const override;
623 
624   /// Return true if the addressing mode represented by AM is legal for this
625   /// target, for a load/store of the specified type.
626   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
627                              unsigned AS,
628                              Instruction *I = nullptr) const override;
629 
630   /// Return the cost of the scaling factor used in the addressing
631   /// mode represented by AM for this target, for a load/store
632   /// of the specified type.
633   /// If the AM is supported, the return value must be >= 0.
634   /// If the AM is not supported, it returns a negative value.
635   InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
636                                        Type *Ty, unsigned AS) const override;
637 
638   /// Return true if an FMA operation is faster than a pair of fmul and fadd
639   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
640   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
641   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
642                                   EVT VT) const override;
643   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
644 
645   bool generateFMAsInMachineCombiner(EVT VT,
646                                      CodeGenOpt::Level OptLevel) const override;
647 
648   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
649 
650   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
651   bool isDesirableToCommuteWithShift(const SDNode *N,
652                                      CombineLevel Level) const override;
653 
654   /// Returns true if it is beneficial to convert a load of a constant
655   /// to just the constant itself.
656   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
657                                          Type *Ty) const override;
658 
659   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
660   /// with this index.
661   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
662                                unsigned Index) const override;
663 
664   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
665                             bool MathUsed) const override {
666     // Using overflow ops for overflow checks only should beneficial on
667     // AArch64.
668     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
669   }
670 
671   Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
672                         AtomicOrdering Ord) const override;
673   Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
674                               AtomicOrdering Ord) const override;
675 
676   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
677 
678   bool isOpSuitableForLDPSTP(const Instruction *I) const;
679   bool shouldInsertFencesForAtomic(const Instruction *I) const override;
680 
681   TargetLoweringBase::AtomicExpansionKind
682   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
683   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
684   TargetLoweringBase::AtomicExpansionKind
685   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
686 
687   TargetLoweringBase::AtomicExpansionKind
688   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
689 
690   bool useLoadStackGuardNode() const override;
691   TargetLoweringBase::LegalizeTypeAction
692   getPreferredVectorAction(MVT VT) const override;
693 
694   /// If the target has a standard location for the stack protector cookie,
695   /// returns the address of that location. Otherwise, returns nullptr.
696   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
697 
698   void insertSSPDeclarations(Module &M) const override;
699   Value *getSDagStackGuard(const Module &M) const override;
700   Function *getSSPStackGuardCheck(const Module &M) const override;
701 
702   /// If the target has a standard location for the unsafe stack pointer,
703   /// returns the address of that location. Otherwise, returns nullptr.
704   Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
705 
706   /// If a physical register, this returns the register that receives the
707   /// exception address on entry to an EH pad.
708   Register
709   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
710     // FIXME: This is a guess. Has this been defined yet?
711     return AArch64::X0;
712   }
713 
714   /// If a physical register, this returns the register that receives the
715   /// exception typeid on entry to a landing pad.
716   Register
717   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
718     // FIXME: This is a guess. Has this been defined yet?
719     return AArch64::X1;
720   }
721 
722   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
723 
724   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
725                         const MachineFunction &MF) const override {
726     // Do not merge to float value size (128 bytes) if no implicit
727     // float attribute is set.
728 
729     bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
730 
731     if (NoFloat)
732       return (MemVT.getSizeInBits() <= 64);
733     return true;
734   }
735 
736   bool isCheapToSpeculateCttz() const override {
737     return true;
738   }
739 
740   bool isCheapToSpeculateCtlz() const override {
741     return true;
742   }
743 
744   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
745 
746   bool hasAndNotCompare(SDValue V) const override {
747     // We can use bics for any scalar.
748     return V.getValueType().isScalarInteger();
749   }
750 
751   bool hasAndNot(SDValue Y) const override {
752     EVT VT = Y.getValueType();
753 
754     if (!VT.isVector())
755       return hasAndNotCompare(Y);
756 
757     TypeSize TS = VT.getSizeInBits();
758     // TODO: We should be able to use bic/bif too for SVE.
759     return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
760   }
761 
762   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
763       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
764       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
765       SelectionDAG &DAG) const override;
766 
767   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
768 
769   bool shouldTransformSignedTruncationCheck(EVT XVT,
770                                             unsigned KeptBits) const override {
771     // For vectors, we don't have a preference..
772     if (XVT.isVector())
773       return false;
774 
775     auto VTIsOk = [](EVT VT) -> bool {
776       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
777              VT == MVT::i64;
778     };
779 
780     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
781     // XVT will be larger than KeptBitsVT.
782     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
783     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
784   }
785 
786   bool preferIncOfAddToSubOfNot(EVT VT) const override;
787 
788   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
789 
790   bool hasBitPreservingFPLogic(EVT VT) const override {
791     // FIXME: Is this always true? It should be true for vectors at least.
792     return VT == MVT::f32 || VT == MVT::f64;
793   }
794 
795   bool supportSplitCSR(MachineFunction *MF) const override {
796     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
797            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
798   }
799   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
800   void insertCopiesSplitCSR(
801       MachineBasicBlock *Entry,
802       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
803 
804   bool supportSwiftError() const override {
805     return true;
806   }
807 
808   /// Enable aggressive FMA fusion on targets that want it.
809   bool enableAggressiveFMAFusion(EVT VT) const override;
810 
811   /// Returns the size of the platform's va_list object.
812   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
813 
814   /// Returns true if \p VecTy is a legal interleaved access type. This
815   /// function checks the vector element type and the overall width of the
816   /// vector.
817   bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
818                                     bool &UseScalable) const;
819 
820   /// Returns the number of interleaved accesses that will be generated when
821   /// lowering accesses of the given type.
822   unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
823                                      bool UseScalable) const;
824 
825   MachineMemOperand::Flags getTargetMMOFlags(
826     const Instruction &I) const override;
827 
828   bool functionArgumentNeedsConsecutiveRegisters(
829       Type *Ty, CallingConv::ID CallConv, bool isVarArg,
830       const DataLayout &DL) const override;
831 
832   /// Used for exception handling on Win64.
833   bool needsFixedCatchObjects() const override;
834 
835   bool fallBackToDAGISel(const Instruction &Inst) const override;
836 
837   /// SVE code generation for fixed length vectors does not custom lower
838   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
839   /// merge. However, merging them creates a BUILD_VECTOR that is just as
840   /// illegal as the original, thus leading to an infinite legalisation loop.
841   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
842   /// vector types this override can be removed.
843   bool mergeStoresAfterLegalization(EVT VT) const override;
844 
845   // If the platform/function should have a redzone, return the size in bytes.
846   unsigned getRedZoneSize(const Function &F) const {
847     if (F.hasFnAttribute(Attribute::NoRedZone))
848       return 0;
849     return 128;
850   }
851 
852   bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
853   EVT getPromotedVTForPredicate(EVT VT) const;
854 
855   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
856                              bool AllowUnknown = false) const override;
857 
858   bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
859 
860 private:
861   /// Keep a pointer to the AArch64Subtarget around so that we can
862   /// make the right decision when generating code for different targets.
863   const AArch64Subtarget *Subtarget;
864 
865   bool isExtFreeImpl(const Instruction *Ext) const override;
866 
867   void addTypeForNEON(MVT VT);
868   void addTypeForFixedLengthSVE(MVT VT);
869   void addDRTypeForNEON(MVT VT);
870   void addQRTypeForNEON(MVT VT);
871 
872   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
873                                bool isVarArg,
874                                const SmallVectorImpl<ISD::InputArg> &Ins,
875                                const SDLoc &DL, SelectionDAG &DAG,
876                                SmallVectorImpl<SDValue> &InVals) const override;
877 
878   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
879                     SmallVectorImpl<SDValue> &InVals) const override;
880 
881   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
882                           CallingConv::ID CallConv, bool isVarArg,
883                           const SmallVectorImpl<ISD::InputArg> &Ins,
884                           const SDLoc &DL, SelectionDAG &DAG,
885                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
886                           SDValue ThisVal) const;
887 
888   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
889   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
890   SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
891   SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
892 
893   SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
894   SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
895 
896   SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
897 
898   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
899   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
900 
901   bool isEligibleForTailCallOptimization(
902       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
903       const SmallVectorImpl<ISD::OutputArg> &Outs,
904       const SmallVectorImpl<SDValue> &OutVals,
905       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
906 
907   /// Finds the incoming stack arguments which overlap the given fixed stack
908   /// object and incorporates their load into the current chain. This prevents
909   /// an upcoming store from clobbering the stack argument before it's used.
910   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
911                               MachineFrameInfo &MFI, int ClobberedFI) const;
912 
913   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
914 
915   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
916                            SDValue &Chain) const;
917 
918   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
919                       bool isVarArg,
920                       const SmallVectorImpl<ISD::OutputArg> &Outs,
921                       LLVMContext &Context) const override;
922 
923   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
924                       const SmallVectorImpl<ISD::OutputArg> &Outs,
925                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
926                       SelectionDAG &DAG) const override;
927 
928   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
929                         unsigned Flag) const;
930   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
931                         unsigned Flag) const;
932   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
933                         unsigned Flag) const;
934   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
935                         unsigned Flag) const;
936   template <class NodeTy>
937   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
938   template <class NodeTy>
939   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
940   template <class NodeTy>
941   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
942   template <class NodeTy>
943   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
944   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
946   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
947   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
948   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
949   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
950                                const SDLoc &DL, SelectionDAG &DAG) const;
951   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
952                                  SelectionDAG &DAG) const;
953   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
954   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
955   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
956   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
957   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
958   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
959                          SDValue TVal, SDValue FVal, const SDLoc &dl,
960                          SelectionDAG &DAG) const;
961   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
962   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
963   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
964   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
965   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
966   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
967   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
968   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
969   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
970   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
972   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
973   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
974   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
975   SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
976   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
977   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
978   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
979   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
980   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
981   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
982   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
983   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
984                               bool OverrideNEON = false) const;
985   SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
986   SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
987   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
988   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
989   SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
990   SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
991   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
992   SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
993   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
994   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
995   SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
996   SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
997   SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
998   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
999   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1000   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1001   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1002   SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1003   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1004   SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1005   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1006   SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1007   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1008   SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1009   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1010   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1011   SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1012   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1013   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1014   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1015   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
1016   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1017   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1018   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
1019                                          SDValue &Size,
1020                                          SelectionDAG &DAG) const;
1021   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
1022                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
1023 
1024   SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1025                                                SelectionDAG &DAG) const;
1026   SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1027                                                SelectionDAG &DAG) const;
1028   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1029   SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1030   SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1031   SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1032   SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1033                               SelectionDAG &DAG) const;
1034   SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1035   SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1036   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1037   SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1038                                             SelectionDAG &DAG) const;
1039   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1040                                               SelectionDAG &DAG) const;
1041   SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1042   SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1043   SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1044   SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1045                                              SelectionDAG &DAG) const;
1046   SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1047   SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1048   SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1049   SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1050   SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1051                                               SelectionDAG &DAG) const;
1052 
1053   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1054                         SmallVectorImpl<SDNode *> &Created) const override;
1055   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1056                           int &ExtraSteps, bool &UseOneConst,
1057                           bool Reciprocal) const override;
1058   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1059                            int &ExtraSteps) const override;
1060   SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1061                            const DenormalMode &Mode) const override;
1062   SDValue getSqrtResultForDenormInput(SDValue Operand,
1063                                       SelectionDAG &DAG) const override;
1064   unsigned combineRepeatedFPDivisors() const override;
1065 
1066   ConstraintType getConstraintType(StringRef Constraint) const override;
1067   Register getRegisterByName(const char* RegName, LLT VT,
1068                              const MachineFunction &MF) const override;
1069 
1070   /// Examine constraint string and operand type and determine a weight value.
1071   /// The operand object must already have been set up with the operand type.
1072   ConstraintWeight
1073   getSingleConstraintMatchWeight(AsmOperandInfo &info,
1074                                  const char *constraint) const override;
1075 
1076   std::pair<unsigned, const TargetRegisterClass *>
1077   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1078                                StringRef Constraint, MVT VT) const override;
1079 
1080   const char *LowerXConstraint(EVT ConstraintVT) const override;
1081 
1082   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1083                                     std::vector<SDValue> &Ops,
1084                                     SelectionDAG &DAG) const override;
1085 
1086   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1087     if (ConstraintCode == "Q")
1088       return InlineAsm::Constraint_Q;
1089     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1090     //        followed by llvm_unreachable so we'll leave them unimplemented in
1091     //        the backend for now.
1092     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1093   }
1094 
1095   bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1096   bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
1097   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1098   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1099   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1100   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
1101                               ISD::MemIndexedMode &AM, bool &IsInc,
1102                               SelectionDAG &DAG) const;
1103   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1104                                  ISD::MemIndexedMode &AM,
1105                                  SelectionDAG &DAG) const override;
1106   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1107                                   SDValue &Offset, ISD::MemIndexedMode &AM,
1108                                   SelectionDAG &DAG) const override;
1109 
1110   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1111                           SelectionDAG &DAG) const override;
1112   void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1113                              SelectionDAG &DAG) const;
1114   void ReplaceExtractSubVectorResults(SDNode *N,
1115                                       SmallVectorImpl<SDValue> &Results,
1116                                       SelectionDAG &DAG) const;
1117 
1118   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1119 
1120   void finalizeLowering(MachineFunction &MF) const override;
1121 
1122   bool shouldLocalize(const MachineInstr &MI,
1123                       const TargetTransformInfo *TTI) const override;
1124 
1125   bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1126                                          const APInt &OriginalDemandedBits,
1127                                          const APInt &OriginalDemandedElts,
1128                                          KnownBits &Known,
1129                                          TargetLoweringOpt &TLO,
1130                                          unsigned Depth) const override;
1131 
1132   // Normally SVE is only used for byte size vectors that do not fit within a
1133   // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1134   // used for 64bit and 128bit vectors as well.
1135   bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1136 
1137   // With the exception of data-predicate transitions, no instructions are
1138   // required to cast between legal scalable vector types. However:
1139   //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1140   //     is not universally useable.
1141   //  2. Most unpacked integer types are not legal and thus integer extends
1142   //     cannot be used to convert between unpacked and packed types.
1143   // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1144   // to transition between unpacked and packed types of the same element type,
1145   // with BITCAST used otherwise.
1146   SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1147 
1148   bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
1149                                               LLT Ty2) const override;
1150 };
1151 
1152 namespace AArch64 {
1153 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1154                          const TargetLibraryInfo *libInfo);
1155 } // end namespace AArch64
1156 
1157 } // end namespace llvm
1158 
1159 #endif
1160