xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h (revision 6be3386466ab79a84b48429ae66244f21526d3df)
1 //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16 
17 #include "AArch64.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/IR/Instruction.h"
23 
24 namespace llvm {
25 
26 namespace AArch64ISD {
27 
28 // For predicated nodes where the result is a vector, the operation is
29 // controlled by a governing predicate and the inactive lanes are explicitly
30 // defined with a value, please stick the following naming convention:
31 //
32 //    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
33 //                        to source operand OP<n>.
34 //
35 //    _MERGE_ZERO         The result value is a vector with inactive lanes
36 //                        actively zeroed.
37 //
38 //    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
39 //                        to the last source operand which only purpose is being
40 //                        a passthru value.
41 //
42 // For other cases where no explicit action is needed to set the inactive lanes,
43 // or when the result is not a vector and it is needed or helpful to
44 // distinguish a node from similar unpredicated nodes, use:
45 //
46 //    _PRED
47 //
48 enum NodeType : unsigned {
49   FIRST_NUMBER = ISD::BUILTIN_OP_END,
50   WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
51   CALL,         // Function call.
52 
53   // Produces the full sequence of instructions for getting the thread pointer
54   // offset of a variable into X0, using the TLSDesc model.
55   TLSDESC_CALLSEQ,
56   ADRP,     // Page address of a TargetGlobalAddress operand.
57   ADR,      // ADR
58   ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
59   LOADgot,  // Load from automatically generated descriptor (e.g. Global
60             // Offset Table, TLS record).
61   RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
62   BRCOND,   // Conditional branch instruction; "b.cond".
63   CSEL,
64   FCSEL, // Conditional move instruction.
65   CSINV, // Conditional select invert.
66   CSNEG, // Conditional select negate.
67   CSINC, // Conditional select increment.
68 
69   // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
70   // ELF.
71   THREAD_POINTER,
72   ADC,
73   SBC, // adc, sbc instructions
74 
75   // Arithmetic instructions
76   ADD_PRED,
77   FADD_PRED,
78   SDIV_PRED,
79   UDIV_PRED,
80   FMA_PRED,
81   SMIN_MERGE_OP1,
82   UMIN_MERGE_OP1,
83   SMAX_MERGE_OP1,
84   UMAX_MERGE_OP1,
85   SHL_MERGE_OP1,
86   SRL_MERGE_OP1,
87   SRA_MERGE_OP1,
88 
89   SETCC_MERGE_ZERO,
90 
91   // Arithmetic instructions which write flags.
92   ADDS,
93   SUBS,
94   ADCS,
95   SBCS,
96   ANDS,
97 
98   // Conditional compares. Operands: left,right,falsecc,cc,flags
99   CCMP,
100   CCMN,
101   FCCMP,
102 
103   // Floating point comparison
104   FCMP,
105 
106   // Scalar extract
107   EXTR,
108 
109   // Scalar-to-vector duplication
110   DUP,
111   DUPLANE8,
112   DUPLANE16,
113   DUPLANE32,
114   DUPLANE64,
115 
116   // Vector immedate moves
117   MOVI,
118   MOVIshift,
119   MOVIedit,
120   MOVImsl,
121   FMOV,
122   MVNIshift,
123   MVNImsl,
124 
125   // Vector immediate ops
126   BICi,
127   ORRi,
128 
129   // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
130   // element must be identical.
131   BSP,
132 
133   // Vector arithmetic negation
134   NEG,
135 
136   // Vector shuffles
137   ZIP1,
138   ZIP2,
139   UZP1,
140   UZP2,
141   TRN1,
142   TRN2,
143   REV16,
144   REV32,
145   REV64,
146   EXT,
147 
148   // Vector shift by scalar
149   VSHL,
150   VLSHR,
151   VASHR,
152 
153   // Vector shift by scalar (again)
154   SQSHL_I,
155   UQSHL_I,
156   SQSHLU_I,
157   SRSHR_I,
158   URSHR_I,
159 
160   // Vector shift by constant and insert
161   VSLI,
162   VSRI,
163 
164   // Vector comparisons
165   CMEQ,
166   CMGE,
167   CMGT,
168   CMHI,
169   CMHS,
170   FCMEQ,
171   FCMGE,
172   FCMGT,
173 
174   // Vector zero comparisons
175   CMEQz,
176   CMGEz,
177   CMGTz,
178   CMLEz,
179   CMLTz,
180   FCMEQz,
181   FCMGEz,
182   FCMGTz,
183   FCMLEz,
184   FCMLTz,
185 
186   // Vector across-lanes addition
187   // Only the lower result lane is defined.
188   SADDV,
189   UADDV,
190 
191   // Vector rounding halving addition
192   SRHADD,
193   URHADD,
194 
195   // Vector across-lanes min/max
196   // Only the lower result lane is defined.
197   SMINV,
198   UMINV,
199   SMAXV,
200   UMAXV,
201 
202   SMAXV_PRED,
203   UMAXV_PRED,
204   SMINV_PRED,
205   UMINV_PRED,
206   ORV_PRED,
207   EORV_PRED,
208   ANDV_PRED,
209 
210   // Vector bitwise negation
211   NOT,
212 
213   // Vector bitwise insertion
214   BIT,
215 
216   // Compare-and-branch
217   CBZ,
218   CBNZ,
219   TBZ,
220   TBNZ,
221 
222   // Tail calls
223   TC_RETURN,
224 
225   // Custom prefetch handling
226   PREFETCH,
227 
228   // {s|u}int to FP within a FP register.
229   SITOF,
230   UITOF,
231 
232   /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
233   /// world w.r.t vectors; which causes additional REV instructions to be
234   /// generated to compensate for the byte-swapping. But sometimes we do
235   /// need to re-interpret the data in SIMD vector registers in big-endian
236   /// mode without emitting such REV instructions.
237   NVCAST,
238 
239   SMULL,
240   UMULL,
241 
242   // Reciprocal estimates and steps.
243   FRECPE,
244   FRECPS,
245   FRSQRTE,
246   FRSQRTS,
247 
248   SUNPKHI,
249   SUNPKLO,
250   UUNPKHI,
251   UUNPKLO,
252 
253   CLASTA_N,
254   CLASTB_N,
255   LASTA,
256   LASTB,
257   REV,
258   TBL,
259 
260   // Floating-point reductions.
261   FADDA_PRED,
262   FADDV_PRED,
263   FMAXV_PRED,
264   FMAXNMV_PRED,
265   FMINV_PRED,
266   FMINNMV_PRED,
267 
268   INSR,
269   PTEST,
270   PTRUE,
271 
272   DUP_MERGE_PASSTHRU,
273   INDEX_VECTOR,
274 
275   REINTERPRET_CAST,
276 
277   LD1_MERGE_ZERO,
278   LD1S_MERGE_ZERO,
279   LDNF1_MERGE_ZERO,
280   LDNF1S_MERGE_ZERO,
281   LDFF1_MERGE_ZERO,
282   LDFF1S_MERGE_ZERO,
283   LD1RQ_MERGE_ZERO,
284   LD1RO_MERGE_ZERO,
285 
286   // Structured loads.
287   SVE_LD2_MERGE_ZERO,
288   SVE_LD3_MERGE_ZERO,
289   SVE_LD4_MERGE_ZERO,
290 
291   // Unsigned gather loads.
292   GLD1_MERGE_ZERO,
293   GLD1_SCALED_MERGE_ZERO,
294   GLD1_UXTW_MERGE_ZERO,
295   GLD1_SXTW_MERGE_ZERO,
296   GLD1_UXTW_SCALED_MERGE_ZERO,
297   GLD1_SXTW_SCALED_MERGE_ZERO,
298   GLD1_IMM_MERGE_ZERO,
299 
300   // Signed gather loads
301   GLD1S_MERGE_ZERO,
302   GLD1S_SCALED_MERGE_ZERO,
303   GLD1S_UXTW_MERGE_ZERO,
304   GLD1S_SXTW_MERGE_ZERO,
305   GLD1S_UXTW_SCALED_MERGE_ZERO,
306   GLD1S_SXTW_SCALED_MERGE_ZERO,
307   GLD1S_IMM_MERGE_ZERO,
308 
309   // Unsigned gather loads.
310   GLDFF1_MERGE_ZERO,
311   GLDFF1_SCALED_MERGE_ZERO,
312   GLDFF1_UXTW_MERGE_ZERO,
313   GLDFF1_SXTW_MERGE_ZERO,
314   GLDFF1_UXTW_SCALED_MERGE_ZERO,
315   GLDFF1_SXTW_SCALED_MERGE_ZERO,
316   GLDFF1_IMM_MERGE_ZERO,
317 
318   // Signed gather loads.
319   GLDFF1S_MERGE_ZERO,
320   GLDFF1S_SCALED_MERGE_ZERO,
321   GLDFF1S_UXTW_MERGE_ZERO,
322   GLDFF1S_SXTW_MERGE_ZERO,
323   GLDFF1S_UXTW_SCALED_MERGE_ZERO,
324   GLDFF1S_SXTW_SCALED_MERGE_ZERO,
325   GLDFF1S_IMM_MERGE_ZERO,
326 
327   // Non-temporal gather loads
328   GLDNT1_MERGE_ZERO,
329   GLDNT1_INDEX_MERGE_ZERO,
330   GLDNT1S_MERGE_ZERO,
331 
332   // Contiguous masked store.
333   ST1_PRED,
334 
335   // Scatter store
336   SST1_PRED,
337   SST1_SCALED_PRED,
338   SST1_UXTW_PRED,
339   SST1_SXTW_PRED,
340   SST1_UXTW_SCALED_PRED,
341   SST1_SXTW_SCALED_PRED,
342   SST1_IMM_PRED,
343 
344   // Non-temporal scatter store
345   SSTNT1_PRED,
346   SSTNT1_INDEX_PRED,
347 
348   // Strict (exception-raising) floating point comparison
349   STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
350   STRICT_FCMPE,
351 
352   // NEON Load/Store with post-increment base updates
353   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
354   LD3post,
355   LD4post,
356   ST2post,
357   ST3post,
358   ST4post,
359   LD1x2post,
360   LD1x3post,
361   LD1x4post,
362   ST1x2post,
363   ST1x3post,
364   ST1x4post,
365   LD1DUPpost,
366   LD2DUPpost,
367   LD3DUPpost,
368   LD4DUPpost,
369   LD1LANEpost,
370   LD2LANEpost,
371   LD3LANEpost,
372   LD4LANEpost,
373   ST2LANEpost,
374   ST3LANEpost,
375   ST4LANEpost,
376 
377   STG,
378   STZG,
379   ST2G,
380   STZ2G,
381 
382   LDP,
383   STP,
384   STNP
385 };
386 
387 } // end namespace AArch64ISD
388 
389 namespace {
390 
391 // Any instruction that defines a 32-bit result zeros out the high half of the
392 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
393 // be copying from a truncate. But any other 32-bit operation will zero-extend
394 // up to 64 bits.
395 // FIXME: X86 also checks for CMOV here. Do we need something similar?
396 static inline bool isDef32(const SDNode &N) {
397   unsigned Opc = N.getOpcode();
398   return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
399          Opc != ISD::CopyFromReg;
400 }
401 
402 } // end anonymous namespace
403 
404 class AArch64Subtarget;
405 class AArch64TargetMachine;
406 
407 class AArch64TargetLowering : public TargetLowering {
408 public:
409   explicit AArch64TargetLowering(const TargetMachine &TM,
410                                  const AArch64Subtarget &STI);
411 
412   /// Selects the correct CCAssignFn for a given CallingConvention value.
413   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
414 
415   /// Selects the correct CCAssignFn for a given CallingConvention value.
416   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
417 
418   /// Determine which of the bits specified in Mask are known to be either zero
419   /// or one and return them in the KnownZero/KnownOne bitsets.
420   void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
421                                      const APInt &DemandedElts,
422                                      const SelectionDAG &DAG,
423                                      unsigned Depth = 0) const override;
424 
425   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
426     // Returning i64 unconditionally here (i.e. even for ILP32) means that the
427     // *DAG* representation of pointers will always be 64-bits. They will be
428     // truncated and extended when transferred to memory, but the 64-bit DAG
429     // allows us to use AArch64's addressing modes much more easily.
430     return MVT::getIntegerVT(64);
431   }
432 
433   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
434                                     const APInt &DemandedElts,
435                                     TargetLoweringOpt &TLO) const override;
436 
437   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
438 
439   /// Returns true if the target allows unaligned memory accesses of the
440   /// specified type.
441   bool allowsMisalignedMemoryAccesses(
442       EVT VT, unsigned AddrSpace = 0, unsigned Align = 1,
443       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
444       bool *Fast = nullptr) const override;
445   /// LLT variant.
446   bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
447                                       Align Alignment,
448                                       MachineMemOperand::Flags Flags,
449                                       bool *Fast = nullptr) const override;
450 
451   /// Provide custom lowering hooks for some operations.
452   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
453 
454   const char *getTargetNodeName(unsigned Opcode) const override;
455 
456   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
457 
458   /// Returns true if a cast between SrcAS and DestAS is a noop.
459   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
460     // Addrspacecasts are always noops.
461     return true;
462   }
463 
464   /// This method returns a target specific FastISel object, or null if the
465   /// target does not support "fast" ISel.
466   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
467                            const TargetLibraryInfo *libInfo) const override;
468 
469   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
470 
471   bool isFPImmLegal(const APFloat &Imm, EVT VT,
472                     bool ForCodeSize) const override;
473 
474   /// Return true if the given shuffle mask can be codegen'd directly, or if it
475   /// should be stack expanded.
476   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
477 
478   /// Return the ISD::SETCC ValueType.
479   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
480                          EVT VT) const override;
481 
482   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
483 
484   MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
485                                   MachineBasicBlock *BB) const;
486 
487   MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
488                                            MachineBasicBlock *BB) const;
489 
490   MachineBasicBlock *
491   EmitInstrWithCustomInserter(MachineInstr &MI,
492                               MachineBasicBlock *MBB) const override;
493 
494   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
495                           MachineFunction &MF,
496                           unsigned Intrinsic) const override;
497 
498   bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
499                              EVT NewVT) const override;
500 
501   bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
502   bool isTruncateFree(EVT VT1, EVT VT2) const override;
503 
504   bool isProfitableToHoist(Instruction *I) const override;
505 
506   bool isZExtFree(Type *Ty1, Type *Ty2) const override;
507   bool isZExtFree(EVT VT1, EVT VT2) const override;
508   bool isZExtFree(SDValue Val, EVT VT2) const override;
509 
510   bool shouldSinkOperands(Instruction *I,
511                           SmallVectorImpl<Use *> &Ops) const override;
512 
513   bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
514 
515   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
516 
517   bool lowerInterleavedLoad(LoadInst *LI,
518                             ArrayRef<ShuffleVectorInst *> Shuffles,
519                             ArrayRef<unsigned> Indices,
520                             unsigned Factor) const override;
521   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
522                              unsigned Factor) const override;
523 
524   bool isLegalAddImmediate(int64_t) const override;
525   bool isLegalICmpImmediate(int64_t) const override;
526 
527   bool shouldConsiderGEPOffsetSplit() const override;
528 
529   EVT getOptimalMemOpType(const MemOp &Op,
530                           const AttributeList &FuncAttributes) const override;
531 
532   LLT getOptimalMemOpLLT(const MemOp &Op,
533                          const AttributeList &FuncAttributes) const override;
534 
535   /// Return true if the addressing mode represented by AM is legal for this
536   /// target, for a load/store of the specified type.
537   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
538                              unsigned AS,
539                              Instruction *I = nullptr) const override;
540 
541   /// Return the cost of the scaling factor used in the addressing
542   /// mode represented by AM for this target, for a load/store
543   /// of the specified type.
544   /// If the AM is supported, the return value must be >= 0.
545   /// If the AM is not supported, it returns a negative value.
546   int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
547                            unsigned AS) const override;
548 
549   /// Return true if an FMA operation is faster than a pair of fmul and fadd
550   /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
551   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
552   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
553                                   EVT VT) const override;
554   bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
555 
556   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
557 
558   /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
559   bool isDesirableToCommuteWithShift(const SDNode *N,
560                                      CombineLevel Level) const override;
561 
562   /// Returns true if it is beneficial to convert a load of a constant
563   /// to just the constant itself.
564   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
565                                          Type *Ty) const override;
566 
567   /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
568   /// with this index.
569   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
570                                unsigned Index) const override;
571 
572   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
573                             bool MathUsed) const override {
574     // Using overflow ops for overflow checks only should beneficial on
575     // AArch64.
576     return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
577   }
578 
579   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
580                         AtomicOrdering Ord) const override;
581   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
582                               Value *Addr, AtomicOrdering Ord) const override;
583 
584   void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
585 
586   TargetLoweringBase::AtomicExpansionKind
587   shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
588   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
589   TargetLoweringBase::AtomicExpansionKind
590   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
591 
592   TargetLoweringBase::AtomicExpansionKind
593   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
594 
595   bool useLoadStackGuardNode() const override;
596   TargetLoweringBase::LegalizeTypeAction
597   getPreferredVectorAction(MVT VT) const override;
598 
599   /// If the target has a standard location for the stack protector cookie,
600   /// returns the address of that location. Otherwise, returns nullptr.
601   Value *getIRStackGuard(IRBuilder<> &IRB) const override;
602 
603   void insertSSPDeclarations(Module &M) const override;
604   Value *getSDagStackGuard(const Module &M) const override;
605   Function *getSSPStackGuardCheck(const Module &M) const override;
606 
607   /// If the target has a standard location for the unsafe stack pointer,
608   /// returns the address of that location. Otherwise, returns nullptr.
609   Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
610 
611   /// If a physical register, this returns the register that receives the
612   /// exception address on entry to an EH pad.
613   Register
614   getExceptionPointerRegister(const Constant *PersonalityFn) const override {
615     // FIXME: This is a guess. Has this been defined yet?
616     return AArch64::X0;
617   }
618 
619   /// If a physical register, this returns the register that receives the
620   /// exception typeid on entry to a landing pad.
621   Register
622   getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
623     // FIXME: This is a guess. Has this been defined yet?
624     return AArch64::X1;
625   }
626 
627   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
628 
629   bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
630                         const SelectionDAG &DAG) const override {
631     // Do not merge to float value size (128 bytes) if no implicit
632     // float attribute is set.
633 
634     bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
635         Attribute::NoImplicitFloat);
636 
637     if (NoFloat)
638       return (MemVT.getSizeInBits() <= 64);
639     return true;
640   }
641 
642   bool isCheapToSpeculateCttz() const override {
643     return true;
644   }
645 
646   bool isCheapToSpeculateCtlz() const override {
647     return true;
648   }
649 
650   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
651 
652   bool hasAndNotCompare(SDValue V) const override {
653     // We can use bics for any scalar.
654     return V.getValueType().isScalarInteger();
655   }
656 
657   bool hasAndNot(SDValue Y) const override {
658     EVT VT = Y.getValueType();
659 
660     if (!VT.isVector())
661       return hasAndNotCompare(Y);
662 
663     return VT.getSizeInBits() >= 64; // vector 'bic'
664   }
665 
666   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
667       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
668       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
669       SelectionDAG &DAG) const override;
670 
671   bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
672 
673   bool shouldTransformSignedTruncationCheck(EVT XVT,
674                                             unsigned KeptBits) const override {
675     // For vectors, we don't have a preference..
676     if (XVT.isVector())
677       return false;
678 
679     auto VTIsOk = [](EVT VT) -> bool {
680       return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
681              VT == MVT::i64;
682     };
683 
684     // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
685     // XVT will be larger than KeptBitsVT.
686     MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
687     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
688   }
689 
690   bool preferIncOfAddToSubOfNot(EVT VT) const override;
691 
692   bool hasBitPreservingFPLogic(EVT VT) const override {
693     // FIXME: Is this always true? It should be true for vectors at least.
694     return VT == MVT::f32 || VT == MVT::f64;
695   }
696 
697   bool supportSplitCSR(MachineFunction *MF) const override {
698     return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
699            MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
700   }
701   void initializeSplitCSR(MachineBasicBlock *Entry) const override;
702   void insertCopiesSplitCSR(
703       MachineBasicBlock *Entry,
704       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
705 
706   bool supportSwiftError() const override {
707     return true;
708   }
709 
710   /// Enable aggressive FMA fusion on targets that want it.
711   bool enableAggressiveFMAFusion(EVT VT) const override;
712 
713   /// Returns the size of the platform's va_list object.
714   unsigned getVaListSizeInBits(const DataLayout &DL) const override;
715 
716   /// Returns true if \p VecTy is a legal interleaved access type. This
717   /// function checks the vector element type and the overall width of the
718   /// vector.
719   bool isLegalInterleavedAccessType(VectorType *VecTy,
720                                     const DataLayout &DL) const;
721 
722   /// Returns the number of interleaved accesses that will be generated when
723   /// lowering accesses of the given type.
724   unsigned getNumInterleavedAccesses(VectorType *VecTy,
725                                      const DataLayout &DL) const;
726 
727   MachineMemOperand::Flags getTargetMMOFlags(
728     const Instruction &I) const override;
729 
730   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
731                                                  CallingConv::ID CallConv,
732                                                  bool isVarArg) const override;
733   /// Used for exception handling on Win64.
734   bool needsFixedCatchObjects() const override;
735 
736   bool fallBackToDAGISel(const Instruction &Inst) const override;
737 
738   /// SVE code generation for fixed length vectors does not custom lower
739   /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
740   /// merge. However, merging them creates a BUILD_VECTOR that is just as
741   /// illegal as the original, thus leading to an infinite legalisation loop.
742   /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
743   /// vector types this override can be removed.
744   bool mergeStoresAfterLegalization(EVT VT) const override {
745     return !useSVEForFixedLengthVectors();
746   }
747 
748 private:
749   /// Keep a pointer to the AArch64Subtarget around so that we can
750   /// make the right decision when generating code for different targets.
751   const AArch64Subtarget *Subtarget;
752 
753   bool isExtFreeImpl(const Instruction *Ext) const override;
754 
755   void addTypeForNEON(MVT VT, MVT PromotedBitwiseVT);
756   void addTypeForFixedLengthSVE(MVT VT);
757   void addDRTypeForNEON(MVT VT);
758   void addQRTypeForNEON(MVT VT);
759 
760   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
761                                bool isVarArg,
762                                const SmallVectorImpl<ISD::InputArg> &Ins,
763                                const SDLoc &DL, SelectionDAG &DAG,
764                                SmallVectorImpl<SDValue> &InVals) const override;
765 
766   SDValue LowerCall(CallLoweringInfo & /*CLI*/,
767                     SmallVectorImpl<SDValue> &InVals) const override;
768 
769   SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
770                           CallingConv::ID CallConv, bool isVarArg,
771                           const SmallVectorImpl<ISD::InputArg> &Ins,
772                           const SDLoc &DL, SelectionDAG &DAG,
773                           SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
774                           SDValue ThisVal) const;
775 
776   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
777 
778   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
779 
780   bool isEligibleForTailCallOptimization(
781       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
782       const SmallVectorImpl<ISD::OutputArg> &Outs,
783       const SmallVectorImpl<SDValue> &OutVals,
784       const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
785 
786   /// Finds the incoming stack arguments which overlap the given fixed stack
787   /// object and incorporates their load into the current chain. This prevents
788   /// an upcoming store from clobbering the stack argument before it's used.
789   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
790                               MachineFrameInfo &MFI, int ClobberedFI) const;
791 
792   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
793 
794   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
795                            SDValue &Chain) const;
796 
797   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
798                       bool isVarArg,
799                       const SmallVectorImpl<ISD::OutputArg> &Outs,
800                       LLVMContext &Context) const override;
801 
802   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
803                       const SmallVectorImpl<ISD::OutputArg> &Outs,
804                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
805                       SelectionDAG &DAG) const override;
806 
807   SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
808                         unsigned Flag) const;
809   SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
810                         unsigned Flag) const;
811   SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
812                         unsigned Flag) const;
813   SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
814                         unsigned Flag) const;
815   template <class NodeTy>
816   SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
817   template <class NodeTy>
818   SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
819   template <class NodeTy>
820   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
821   template <class NodeTy>
822   SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
823   SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
824   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
825   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
826   SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
827   SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
828   SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
829                                const SDLoc &DL, SelectionDAG &DAG) const;
830   SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
831                                  SelectionDAG &DAG) const;
832   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
833   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
834   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
835   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
836   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
837   SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
838                          SDValue TVal, SDValue FVal, const SDLoc &dl,
839                          SelectionDAG &DAG) const;
840   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
841   SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
842   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
843   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
844   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
845   SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
846   SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
847   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
848   SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
849   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
850   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
851   SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
852   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
853   SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
854   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
855   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
856   SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
857   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
858   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
859   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
860   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
861   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
862                               unsigned NewOp) const;
863   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
864   SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
865   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
866   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
867   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
868   SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
869   SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
870   SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
871                         RTLIB::Libcall Call) const;
872   SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
873   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
874   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
875   SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
876   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
877   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
878   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
879   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
880   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
881   SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
882   SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
883   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
884   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
885   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
886   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
887   SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
888                                          SDValue &Size,
889                                          SelectionDAG &DAG) const;
890   SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
891                              EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
892 
893   SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
894   SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
895   SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
896                                               SelectionDAG &DAG) const;
897 
898   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
899                         SmallVectorImpl<SDNode *> &Created) const override;
900   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
901                           int &ExtraSteps, bool &UseOneConst,
902                           bool Reciprocal) const override;
903   SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
904                            int &ExtraSteps) const override;
905   unsigned combineRepeatedFPDivisors() const override;
906 
907   ConstraintType getConstraintType(StringRef Constraint) const override;
908   Register getRegisterByName(const char* RegName, LLT VT,
909                              const MachineFunction &MF) const override;
910 
911   /// Examine constraint string and operand type and determine a weight value.
912   /// The operand object must already have been set up with the operand type.
913   ConstraintWeight
914   getSingleConstraintMatchWeight(AsmOperandInfo &info,
915                                  const char *constraint) const override;
916 
917   std::pair<unsigned, const TargetRegisterClass *>
918   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
919                                StringRef Constraint, MVT VT) const override;
920 
921   const char *LowerXConstraint(EVT ConstraintVT) const override;
922 
923   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
924                                     std::vector<SDValue> &Ops,
925                                     SelectionDAG &DAG) const override;
926 
927   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
928     if (ConstraintCode == "Q")
929       return InlineAsm::Constraint_Q;
930     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
931     //        followed by llvm_unreachable so we'll leave them unimplemented in
932     //        the backend for now.
933     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
934   }
935 
936   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
937   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
938   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
939   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
940                               ISD::MemIndexedMode &AM, bool &IsInc,
941                               SelectionDAG &DAG) const;
942   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
943                                  ISD::MemIndexedMode &AM,
944                                  SelectionDAG &DAG) const override;
945   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
946                                   SDValue &Offset, ISD::MemIndexedMode &AM,
947                                   SelectionDAG &DAG) const override;
948 
949   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
950                           SelectionDAG &DAG) const override;
951   void ReplaceExtractSubVectorResults(SDNode *N,
952                                       SmallVectorImpl<SDValue> &Results,
953                                       SelectionDAG &DAG) const;
954 
955   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
956 
957   void finalizeLowering(MachineFunction &MF) const override;
958 
959   bool shouldLocalize(const MachineInstr &MI,
960                       const TargetTransformInfo *TTI) const override;
961 
962   bool useSVEForFixedLengthVectors() const;
963   bool useSVEForFixedLengthVectorVT(EVT VT) const;
964 };
965 
966 namespace AArch64 {
967 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
968                          const TargetLibraryInfo *libInfo);
969 } // end namespace AArch64
970 
971 } // end namespace llvm
972 
973 #endif
974