xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.h (revision c1d255d3ffdbe447de3ab875bf4e7d7accc5bfc5)
1 //===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that ARM uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
15 #define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
16 
17 #include "MCTargetDesc/ARMBaseInfo.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/ISDOpcodes.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/SelectionDAGNodes.h"
24 #include "llvm/CodeGen/TargetLowering.h"
25 #include "llvm/CodeGen/ValueTypes.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/InlineAsm.h"
31 #include "llvm/Support/CodeGen.h"
32 #include "llvm/Support/MachineValueType.h"
33 #include <utility>
34 
35 namespace llvm {
36 
37 class ARMSubtarget;
38 class DataLayout;
39 class FastISel;
40 class FunctionLoweringInfo;
41 class GlobalValue;
42 class InstrItineraryData;
43 class Instruction;
44 class MachineBasicBlock;
45 class MachineInstr;
46 class SelectionDAG;
47 class TargetLibraryInfo;
48 class TargetMachine;
49 class TargetRegisterInfo;
50 class VectorType;
51 
52   namespace ARMISD {
53 
54     // ARM Specific DAG Nodes
55     enum NodeType : unsigned {
56       // Start the numbering where the builtin ops and target ops leave off.
57       FIRST_NUMBER = ISD::BUILTIN_OP_END,
58 
59       Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
60                     // TargetExternalSymbol, and TargetGlobalAddress.
61       WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
62                     // PIC mode.
63       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
64 
65       // Add pseudo op to model memcpy for struct byval.
66       COPY_STRUCT_BYVAL,
67 
68       CALL,         // Function call.
69       CALL_PRED,    // Function call that's predicable.
70       CALL_NOLINK,  // Function call with branch not branch-and-link.
71       tSECALL,      // CMSE non-secure function call.
72       BRCOND,       // Conditional branch.
73       BR_JT,        // Jumptable branch.
74       BR2_JT,       // Jumptable branch (2 level - jumptable entry is a jump).
75       RET_FLAG,     // Return with a flag operand.
76       SERET_FLAG,   // CMSE Entry function return with a flag operand.
77       INTRET_FLAG,  // Interrupt return with an LR-offset and a flag operand.
78 
79       PIC_ADD,      // Add with a PC operand and a PIC label.
80 
81       ASRL,         // MVE long arithmetic shift right.
82       LSRL,         // MVE long shift right.
83       LSLL,         // MVE long shift left.
84 
85       CMP,          // ARM compare instructions.
86       CMN,          // ARM CMN instructions.
87       CMPZ,         // ARM compare that sets only Z flag.
88       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
89       CMPFPE,       // ARM VFP signalling compare instruction, sets FPSCR.
90       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
91       CMPFPEw0,     // ARM VFP signalling compare against zero instruction, sets FPSCR.
92       FMSTAT,       // ARM fmstat instruction.
93 
94       CMOV,         // ARM conditional move instructions.
95       SUBS,         // Flag-setting subtraction.
96 
97       SSAT,         // Signed saturation
98       USAT,         // Unsigned saturation
99 
100       BCC_i64,
101 
102       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
103       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
104       RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
105 
106       ADDC,         // Add with carry
107       ADDE,         // Add using carry
108       SUBC,         // Sub with carry
109       SUBE,         // Sub using carry
110       LSLS,         // Shift left producing carry
111 
112       VMOVRRD,      // double to two gprs.
113       VMOVDRR,      // Two gprs to double.
114       VMOVSR,       // move gpr to single, used for f32 literal constructed in a gpr
115 
116       EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
117       EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
118       EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
119 
120       TC_RETURN,    // Tail call return pseudo.
121 
122       THREAD_POINTER,
123 
124       DYN_ALLOC,    // Dynamic allocation on the stack.
125 
126       MEMBARRIER_MCR, // Memory barrier (MCR)
127 
128       PRELOAD,      // Preload
129 
130       WIN__CHKSTK,  // Windows' __chkstk call to do stack probing.
131       WIN__DBZCHK,  // Windows' divide by zero check
132 
133       WLS,          // Low-overhead loops, While Loop Start
134       LOOP_DEC,     // Really a part of LE, performs the sub
135       LE,           // Low-overhead loops, Loop End
136 
137       PREDICATE_CAST, // Predicate cast for MVE i1 types
138       VECTOR_REG_CAST, // Reinterpret the current contents of a vector register
139 
140       VCMP,         // Vector compare.
141       VCMPZ,        // Vector compare to zero.
142       VTST,         // Vector test bits.
143 
144       // Vector shift by vector
145       VSHLs,        // ...left/right by signed
146       VSHLu,        // ...left/right by unsigned
147 
148       // Vector shift by immediate:
149       VSHLIMM,      // ...left
150       VSHRsIMM,     // ...right (signed)
151       VSHRuIMM,     // ...right (unsigned)
152 
153       // Vector rounding shift by immediate:
154       VRSHRsIMM,    // ...right (signed)
155       VRSHRuIMM,    // ...right (unsigned)
156       VRSHRNIMM,    // ...right narrow
157 
158       // Vector saturating shift by immediate:
159       VQSHLsIMM,    // ...left (signed)
160       VQSHLuIMM,    // ...left (unsigned)
161       VQSHLsuIMM,   // ...left (signed to unsigned)
162       VQSHRNsIMM,   // ...right narrow (signed)
163       VQSHRNuIMM,   // ...right narrow (unsigned)
164       VQSHRNsuIMM,  // ...right narrow (signed to unsigned)
165 
166       // Vector saturating rounding shift by immediate:
167       VQRSHRNsIMM,  // ...right narrow (signed)
168       VQRSHRNuIMM,  // ...right narrow (unsigned)
169       VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
170 
171       // Vector shift and insert:
172       VSLIIMM,      // ...left
173       VSRIIMM,      // ...right
174 
175       // Vector get lane (VMOV scalar to ARM core register)
176       // (These are used for 8- and 16-bit element types only.)
177       VGETLANEu,    // zero-extend vector extract element
178       VGETLANEs,    // sign-extend vector extract element
179 
180       // Vector move immediate and move negated immediate:
181       VMOVIMM,
182       VMVNIMM,
183 
184       // Vector move f32 immediate:
185       VMOVFPIMM,
186 
187       // Move H <-> R, clearing top 16 bits
188       VMOVrh,
189       VMOVhr,
190 
191       // Vector duplicate:
192       VDUP,
193       VDUPLANE,
194 
195       // Vector shuffles:
196       VEXT,         // extract
197       VREV64,       // reverse elements within 64-bit doublewords
198       VREV32,       // reverse elements within 32-bit words
199       VREV16,       // reverse elements within 16-bit halfwords
200       VZIP,         // zip (interleave)
201       VUZP,         // unzip (deinterleave)
202       VTRN,         // transpose
203       VTBL1,        // 1-register shuffle with mask
204       VTBL2,        // 2-register shuffle with mask
205       VMOVN,        // MVE vmovn
206 
207       // MVE Saturating truncates
208       VQMOVNs,      // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
209       VQMOVNu,      // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
210 
211       // MVE float <> half converts
212       VCVTN,        // MVE vcvt f32 -> f16, truncating into either the bottom or top lanes
213       VCVTL,        // MVE vcvt f16 -> f32, extending from either the bottom or top lanes
214 
215       // Vector multiply long:
216       VMULLs,       // ...signed
217       VMULLu,       // ...unsigned
218 
219       VQDMULH,      // MVE vqdmulh instruction
220 
221       // MVE reductions
222       VADDVs,       // sign- or zero-extend the elements of a vector to i32,
223       VADDVu,       //   add them all together, and return an i32 of their sum
224       VADDVps,      // Same as VADDV[su] but with a v4i1 predicate mask
225       VADDVpu,
226       VADDLVs,      // sign- or zero-extend elements to i64 and sum, returning
227       VADDLVu,      //   the low and high 32-bit halves of the sum
228       VADDLVAs,     // Same as VADDLV[su] but also add an input accumulator
229       VADDLVAu,     //   provided as low and high halves
230       VADDLVps,     // Same as VADDLV[su] but with a v4i1 predicate mask
231       VADDLVpu,
232       VADDLVAps,    // Same as VADDLVp[su] but with a v4i1 predicate mask
233       VADDLVApu,
234       VMLAVs,       // sign- or zero-extend the elements of two vectors to i32, multiply them
235       VMLAVu,       //   and add the results together, returning an i32 of their sum
236       VMLAVps,      // Same as VMLAV[su] with a v4i1 predicate mask
237       VMLAVpu,
238       VMLALVs,      // Same as VMLAV but with i64, returning the low and
239       VMLALVu,      //   high 32-bit halves of the sum
240       VMLALVps,     // Same as VMLALV[su] with a v4i1 predicate mask
241       VMLALVpu,
242       VMLALVAs,     // Same as VMLALV but also add an input accumulator
243       VMLALVAu,     //   provided as low and high halves
244       VMLALVAps,    // Same as VMLALVA[su] with a v4i1 predicate mask
245       VMLALVApu,
246       VMINVu,        // Find minimum unsigned value of a vector and register
247       VMINVs,        // Find minimum signed value of a vector and register
248       VMAXVu,        // Find maximum unsigned value of a vector and register
249       VMAXVs,        // Find maximum signed value of a vector and register
250 
251       SMULWB,       // Signed multiply word by half word, bottom
252       SMULWT,       // Signed multiply word by half word, top
253       UMLAL,        // 64bit Unsigned Accumulate Multiply
254       SMLAL,        // 64bit Signed Accumulate Multiply
255       UMAAL,        // 64-bit Unsigned Accumulate Accumulate Multiply
256       SMLALBB,      // 64-bit signed accumulate multiply bottom, bottom 16
257       SMLALBT,      // 64-bit signed accumulate multiply bottom, top 16
258       SMLALTB,      // 64-bit signed accumulate multiply top, bottom 16
259       SMLALTT,      // 64-bit signed accumulate multiply top, top 16
260       SMLALD,       // Signed multiply accumulate long dual
261       SMLALDX,      // Signed multiply accumulate long dual exchange
262       SMLSLD,       // Signed multiply subtract long dual
263       SMLSLDX,      // Signed multiply subtract long dual exchange
264       SMMLAR,       // Signed multiply long, round and add
265       SMMLSR,       // Signed multiply long, subtract and round
266 
267       // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b stands for.
268       QADD8b,
269       QSUB8b,
270       QADD16b,
271       QSUB16b,
272 
273       // Operands of the standard BUILD_VECTOR node are not legalized, which
274       // is fine if BUILD_VECTORs are always lowered to shuffles or other
275       // operations, but for ARM some BUILD_VECTORs are legal as-is and their
276       // operands need to be legalized.  Define an ARM-specific version of
277       // BUILD_VECTOR for this purpose.
278       BUILD_VECTOR,
279 
280       // Bit-field insert
281       BFI,
282 
283       // Vector OR with immediate
284       VORRIMM,
285       // Vector AND with NOT of immediate
286       VBICIMM,
287 
288       // Pseudo vector bitwise select
289       VBSP,
290 
291       // Pseudo-instruction representing a memory copy using ldm/stm
292       // instructions.
293       MEMCPY,
294 
295       // V8.1MMainline condition select
296       CSINV, // Conditional select invert.
297       CSNEG, // Conditional select negate.
298       CSINC, // Conditional select increment.
299 
300       // Vector load N-element structure to all lanes:
301       VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
302       VLD2DUP,
303       VLD3DUP,
304       VLD4DUP,
305 
306       // NEON loads with post-increment base updates:
307       VLD1_UPD,
308       VLD2_UPD,
309       VLD3_UPD,
310       VLD4_UPD,
311       VLD2LN_UPD,
312       VLD3LN_UPD,
313       VLD4LN_UPD,
314       VLD1DUP_UPD,
315       VLD2DUP_UPD,
316       VLD3DUP_UPD,
317       VLD4DUP_UPD,
318 
319       // NEON stores with post-increment base updates:
320       VST1_UPD,
321       VST2_UPD,
322       VST3_UPD,
323       VST4_UPD,
324       VST2LN_UPD,
325       VST3LN_UPD,
326       VST4LN_UPD,
327 
328       // Load/Store of dual registers
329       LDRD,
330       STRD
331     };
332 
333   } // end namespace ARMISD
334 
335   /// Define some predicates that are used for node matching.
336   namespace ARM {
337 
338     bool isBitFieldInvertedMask(unsigned v);
339 
340   } // end namespace ARM
341 
342   //===--------------------------------------------------------------------===//
343   //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
344 
345   class ARMTargetLowering : public TargetLowering {
346   public:
347     explicit ARMTargetLowering(const TargetMachine &TM,
348                                const ARMSubtarget &STI);
349 
350     unsigned getJumpTableEncoding() const override;
351     bool useSoftFloat() const override;
352 
353     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
354 
355     /// ReplaceNodeResults - Replace the results of node with an illegal result
356     /// type with new values built out of custom code.
357     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
358                             SelectionDAG &DAG) const override;
359 
360     const char *getTargetNodeName(unsigned Opcode) const override;
361 
362     bool isSelectSupported(SelectSupportKind Kind) const override {
363       // ARM does not support scalar condition selects on vectors.
364       return (Kind != ScalarCondVectorVal);
365     }
366 
367     bool isReadOnly(const GlobalValue *GV) const;
368 
369     /// getSetCCResultType - Return the value type to use for ISD::SETCC.
370     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
371                            EVT VT) const override;
372 
373     MachineBasicBlock *
374     EmitInstrWithCustomInserter(MachineInstr &MI,
375                                 MachineBasicBlock *MBB) const override;
376 
377     void AdjustInstrPostInstrSelection(MachineInstr &MI,
378                                        SDNode *Node) const override;
379 
380     SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
381     SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const;
382     SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
383     SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const;
384     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
385 
386     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
387                                            const APInt &OriginalDemandedBits,
388                                            const APInt &OriginalDemandedElts,
389                                            KnownBits &Known,
390                                            TargetLoweringOpt &TLO,
391                                            unsigned Depth) const override;
392 
393     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
394 
395     /// allowsMisalignedMemoryAccesses - Returns true if the target allows
396     /// unaligned memory accesses of the specified type. Returns whether it
397     /// is "fast" by reference in the second argument.
398     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
399                                         unsigned Align,
400                                         MachineMemOperand::Flags Flags,
401                                         bool *Fast) const override;
402 
403     EVT getOptimalMemOpType(const MemOp &Op,
404                             const AttributeList &FuncAttributes) const override;
405 
406     bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
407     bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
408     bool isZExtFree(SDValue Val, EVT VT2) const override;
409     bool shouldSinkOperands(Instruction *I,
410                             SmallVectorImpl<Use *> &Ops) const override;
411     Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const override;
412 
413     bool isFNegFree(EVT VT) const override;
414 
415     bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
416 
417     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
418 
419 
420     /// isLegalAddressingMode - Return true if the addressing mode represented
421     /// by AM is legal for this target, for a load/store of the specified type.
422     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
423                                Type *Ty, unsigned AS,
424                                Instruction *I = nullptr) const override;
425 
426     /// getScalingFactorCost - Return the cost of the scaling used in
427     /// addressing mode represented by AM.
428     /// If the AM is supported, the return value must be >= 0.
429     /// If the AM is not supported, the return value must be negative.
430     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
431                              unsigned AS) const override;
432 
433     bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
434 
435     /// Returns true if the addressing mode representing by AM is legal
436     /// for the Thumb1 target, for a load/store of the specified type.
437     bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
438 
439     /// isLegalICmpImmediate - Return true if the specified immediate is legal
440     /// icmp immediate, that is the target has icmp instructions which can
441     /// compare a register against the immediate without having to materialize
442     /// the immediate into a register.
443     bool isLegalICmpImmediate(int64_t Imm) const override;
444 
445     /// isLegalAddImmediate - Return true if the specified immediate is legal
446     /// add immediate, that is the target has add instructions which can
447     /// add a register and the immediate without having to materialize
448     /// the immediate into a register.
449     bool isLegalAddImmediate(int64_t Imm) const override;
450 
451     /// getPreIndexedAddressParts - returns true by value, base pointer and
452     /// offset pointer and addressing mode by reference if the node's address
453     /// can be legally represented as pre-indexed load / store address.
454     bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
455                                    ISD::MemIndexedMode &AM,
456                                    SelectionDAG &DAG) const override;
457 
458     /// getPostIndexedAddressParts - returns true by value, base pointer and
459     /// offset pointer and addressing mode by reference if this node can be
460     /// combined with a load / store to form a post-indexed load / store.
461     bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
462                                     SDValue &Offset, ISD::MemIndexedMode &AM,
463                                     SelectionDAG &DAG) const override;
464 
465     void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
466                                        const APInt &DemandedElts,
467                                        const SelectionDAG &DAG,
468                                        unsigned Depth) const override;
469 
470     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
471                                       const APInt &DemandedElts,
472                                       TargetLoweringOpt &TLO) const override;
473 
474     bool ExpandInlineAsm(CallInst *CI) const override;
475 
476     ConstraintType getConstraintType(StringRef Constraint) const override;
477 
478     /// Examine constraint string and operand type and determine a weight value.
479     /// The operand object must already have been set up with the operand type.
480     ConstraintWeight getSingleConstraintMatchWeight(
481       AsmOperandInfo &info, const char *constraint) const override;
482 
483     std::pair<unsigned, const TargetRegisterClass *>
484     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
485                                  StringRef Constraint, MVT VT) const override;
486 
487     const char *LowerXConstraint(EVT ConstraintVT) const override;
488 
489     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
490     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
491     /// true it means one of the asm constraint of the inline asm instruction
492     /// being processed is 'm'.
493     void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
494                                       std::vector<SDValue> &Ops,
495                                       SelectionDAG &DAG) const override;
496 
497     unsigned
498     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
499       if (ConstraintCode == "Q")
500         return InlineAsm::Constraint_Q;
501       else if (ConstraintCode == "o")
502         return InlineAsm::Constraint_o;
503       else if (ConstraintCode.size() == 2) {
504         if (ConstraintCode[0] == 'U') {
505           switch(ConstraintCode[1]) {
506           default:
507             break;
508           case 'm':
509             return InlineAsm::Constraint_Um;
510           case 'n':
511             return InlineAsm::Constraint_Un;
512           case 'q':
513             return InlineAsm::Constraint_Uq;
514           case 's':
515             return InlineAsm::Constraint_Us;
516           case 't':
517             return InlineAsm::Constraint_Ut;
518           case 'v':
519             return InlineAsm::Constraint_Uv;
520           case 'y':
521             return InlineAsm::Constraint_Uy;
522           }
523         }
524       }
525       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
526     }
527 
528     const ARMSubtarget* getSubtarget() const {
529       return Subtarget;
530     }
531 
532     /// getRegClassFor - Return the register class that should be used for the
533     /// specified value type.
534     const TargetRegisterClass *
535     getRegClassFor(MVT VT, bool isDivergent = false) const override;
536 
537     bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
538                                 unsigned &PrefAlign) const override;
539 
540     /// createFastISel - This method returns a target specific FastISel object,
541     /// or null if the target does not support "fast" ISel.
542     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
543                              const TargetLibraryInfo *libInfo) const override;
544 
545     Sched::Preference getSchedulingPreference(SDNode *N) const override;
546 
547     bool
548     isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
549     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
550 
551     /// isFPImmLegal - Returns true if the target can instruction select the
552     /// specified FP immediate natively. If false, the legalizer will
553     /// materialize the FP immediate as a load from a constant pool.
554     bool isFPImmLegal(const APFloat &Imm, EVT VT,
555                       bool ForCodeSize = false) const override;
556 
557     bool getTgtMemIntrinsic(IntrinsicInfo &Info,
558                             const CallInst &I,
559                             MachineFunction &MF,
560                             unsigned Intrinsic) const override;
561 
562     /// Returns true if it is beneficial to convert a load of a constant
563     /// to just the constant itself.
564     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
565                                            Type *Ty) const override;
566 
567     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
568     /// with this index.
569     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
570                                  unsigned Index) const override;
571 
572     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
573                               bool MathUsed) const override {
574       // Using overflow ops for overflow checks only should beneficial on ARM.
575       return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
576     }
577 
578     /// Returns true if an argument of type Ty needs to be passed in a
579     /// contiguous block of registers in calling convention CallConv.
580     bool functionArgumentNeedsConsecutiveRegisters(
581         Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
582 
583     /// If a physical register, this returns the register that receives the
584     /// exception address on entry to an EH pad.
585     Register
586     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
587 
588     /// If a physical register, this returns the register that receives the
589     /// exception typeid on entry to a landing pad.
590     Register
591     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
592 
593     Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
594     Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
595                           AtomicOrdering Ord) const override;
596     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
597                                 Value *Addr, AtomicOrdering Ord) const override;
598 
599     void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
600 
601     Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
602                                   AtomicOrdering Ord) const override;
603     Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
604                                    AtomicOrdering Ord) const override;
605 
606     unsigned getMaxSupportedInterleaveFactor() const override;
607 
608     bool lowerInterleavedLoad(LoadInst *LI,
609                               ArrayRef<ShuffleVectorInst *> Shuffles,
610                               ArrayRef<unsigned> Indices,
611                               unsigned Factor) const override;
612     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
613                                unsigned Factor) const override;
614 
615     bool shouldInsertFencesForAtomic(const Instruction *I) const override;
616     TargetLoweringBase::AtomicExpansionKind
617     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
618     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
619     TargetLoweringBase::AtomicExpansionKind
620     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
621     TargetLoweringBase::AtomicExpansionKind
622     shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
623 
624     bool useLoadStackGuardNode() const override;
625 
626     void insertSSPDeclarations(Module &M) const override;
627     Value *getSDagStackGuard(const Module &M) const override;
628     Function *getSSPStackGuardCheck(const Module &M) const override;
629 
630     bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
631                                    unsigned &Cost) const override;
632 
633     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
634                           const SelectionDAG &DAG) const override {
635       // Do not merge to larger than i32.
636       return (MemVT.getSizeInBits() <= 32);
637     }
638 
639     bool isCheapToSpeculateCttz() const override;
640     bool isCheapToSpeculateCtlz() const override;
641 
642     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
643       return VT.isScalarInteger();
644     }
645 
646     bool supportSwiftError() const override {
647       return true;
648     }
649 
650     bool hasStandaloneRem(EVT VT) const override {
651       return HasStandaloneRem;
652     }
653 
654     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
655 
656     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
657     CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
658 
659     /// Returns true if \p VecTy is a legal interleaved access type. This
660     /// function checks the vector element type and the overall width of the
661     /// vector.
662     bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy,
663                                       const DataLayout &DL) const;
664 
665     bool alignLoopsWithOptSize() const override;
666 
667     /// Returns the number of interleaved accesses that will be generated when
668     /// lowering accesses of the given type.
669     unsigned getNumInterleavedAccesses(VectorType *VecTy,
670                                        const DataLayout &DL) const;
671 
672     void finalizeLowering(MachineFunction &MF) const override;
673 
674     /// Return the correct alignment for the current calling convention.
675     Align getABIAlignmentForCallingConv(Type *ArgTy,
676                                         DataLayout DL) const override;
677 
678     bool isDesirableToCommuteWithShift(const SDNode *N,
679                                        CombineLevel Level) const override;
680 
681     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
682                                            CombineLevel Level) const override;
683 
684     bool preferIncOfAddToSubOfNot(EVT VT) const override;
685 
686   protected:
687     std::pair<const TargetRegisterClass *, uint8_t>
688     findRepresentativeClass(const TargetRegisterInfo *TRI,
689                             MVT VT) const override;
690 
691   private:
692     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
693     /// make the right decision when generating code for different targets.
694     const ARMSubtarget *Subtarget;
695 
696     const TargetRegisterInfo *RegInfo;
697 
698     const InstrItineraryData *Itins;
699 
700     /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
701     unsigned ARMPCLabelIndex;
702 
703     // TODO: remove this, and have shouldInsertFencesForAtomic do the proper
704     // check.
705     bool InsertFencesForAtomic;
706 
707     bool HasStandaloneRem = true;
708 
709     void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
710     void addDRTypeForNEON(MVT VT);
711     void addQRTypeForNEON(MVT VT);
712     std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
713 
714     using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, 8>;
715 
716     void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain,
717                           SDValue &Arg, RegsToPassVector &RegsToPass,
718                           CCValAssign &VA, CCValAssign &NextVA,
719                           SDValue &StackPtr,
720                           SmallVectorImpl<SDValue> &MemOpChains,
721                           ISD::ArgFlagsTy Flags) const;
722     SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
723                                  SDValue &Root, SelectionDAG &DAG,
724                                  const SDLoc &dl) const;
725 
726     CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
727                                             bool isVarArg) const;
728     CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
729                                   bool isVarArg) const;
730     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
731                              const SDLoc &dl, SelectionDAG &DAG,
732                              const CCValAssign &VA,
733                              ISD::ArgFlagsTy Flags) const;
734     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
735     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
736     SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
737     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG,
738                                     const ARMSubtarget *Subtarget) const;
739     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
740                                     const ARMSubtarget *Subtarget) const;
741     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
742     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
743     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
744     SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
745     SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
746     SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
747     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
748     SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
749                                             SelectionDAG &DAG) const;
750     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
751                                  SelectionDAG &DAG,
752                                  TLSModel::Model model) const;
753     SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
754     SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
755     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
756     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
757     SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
758     SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
759     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
760     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
761     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
762     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
763     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
764     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
765     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
766     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
767     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
768     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
769     SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
770                             const ARMSubtarget *ST) const;
771     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
772                               const ARMSubtarget *ST) const;
773     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
774     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
775     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
776     SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
777     void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
778                            SmallVectorImpl<SDValue> &Results) const;
779     SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
780                           const ARMSubtarget *Subtarget) const;
781     SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
782                                    SDValue &Chain) const;
783     SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
784     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
785     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
786     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
787     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
788     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
789     SDValue LowerFSETCC(SDValue Op, SelectionDAG &DAG) const;
790     void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results,
791                   SelectionDAG &DAG) const;
792     void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
793                    SelectionDAG &DAG) const;
794 
795     Register getRegisterByName(const char* RegName, LLT VT,
796                                const MachineFunction &MF) const override;
797 
798     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
799                           SmallVectorImpl<SDNode *> &Created) const override;
800 
801     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
802                                     EVT VT) const override;
803 
804     SDValue MoveToHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT, MVT ValVT,
805                       SDValue Val) const;
806     SDValue MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG, MVT LocVT,
807                         MVT ValVT, SDValue Val) const;
808 
809     SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
810 
811     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
812                             CallingConv::ID CallConv, bool isVarArg,
813                             const SmallVectorImpl<ISD::InputArg> &Ins,
814                             const SDLoc &dl, SelectionDAG &DAG,
815                             SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
816                             SDValue ThisVal) const;
817 
818     bool supportSplitCSR(MachineFunction *MF) const override {
819       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
820           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
821     }
822 
823     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
824     void insertCopiesSplitCSR(
825       MachineBasicBlock *Entry,
826       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
827 
828     bool
829     splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
830                                 SDValue *Parts, unsigned NumParts, MVT PartVT,
831                                 Optional<CallingConv::ID> CC) const override;
832 
833     SDValue
834     joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
835                                const SDValue *Parts, unsigned NumParts,
836                                MVT PartVT, EVT ValueVT,
837                                Optional<CallingConv::ID> CC) const override;
838 
839     SDValue
840     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
841                          const SmallVectorImpl<ISD::InputArg> &Ins,
842                          const SDLoc &dl, SelectionDAG &DAG,
843                          SmallVectorImpl<SDValue> &InVals) const override;
844 
845     int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &dl,
846                        SDValue &Chain, const Value *OrigArg,
847                        unsigned InRegsParamRecordIdx, int ArgOffset,
848                        unsigned ArgSize) const;
849 
850     void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
851                               const SDLoc &dl, SDValue &Chain,
852                               unsigned ArgOffset, unsigned TotalArgRegsSaveSize,
853                               bool ForceMutable = false) const;
854 
855     SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
856                       SmallVectorImpl<SDValue> &InVals) const override;
857 
858     /// HandleByVal - Target-specific cleanup for ByVal support.
859     void HandleByVal(CCState *, unsigned &, Align) const override;
860 
861     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
862     /// for tail call optimization. Targets which want to do tail call
863     /// optimization should implement this function.
864     bool IsEligibleForTailCallOptimization(
865         SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
866         bool isCalleeStructRet, bool isCallerStructRet,
867         const SmallVectorImpl<ISD::OutputArg> &Outs,
868         const SmallVectorImpl<SDValue> &OutVals,
869         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
870         const bool isIndirect) const;
871 
872     bool CanLowerReturn(CallingConv::ID CallConv,
873                         MachineFunction &MF, bool isVarArg,
874                         const SmallVectorImpl<ISD::OutputArg> &Outs,
875                         LLVMContext &Context) const override;
876 
877     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
878                         const SmallVectorImpl<ISD::OutputArg> &Outs,
879                         const SmallVectorImpl<SDValue> &OutVals,
880                         const SDLoc &dl, SelectionDAG &DAG) const override;
881 
882     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
883 
884     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
885 
886     bool shouldConsiderGEPOffsetSplit() const override { return true; }
887 
888     bool isUnsupportedFloatingType(EVT VT) const;
889 
890     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
891                     SDValue ARMcc, SDValue CCR, SDValue Cmp,
892                     SelectionDAG &DAG) const;
893     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
894                       SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
895     SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
896                       const SDLoc &dl, bool Signaling = false) const;
897     SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
898 
899     SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
900 
901     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
902                                 MachineBasicBlock *DispatchBB, int FI) const;
903 
904     void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
905 
906     bool RemapAddSubWithFlags(MachineInstr &MI, MachineBasicBlock *BB) const;
907 
908     MachineBasicBlock *EmitStructByval(MachineInstr &MI,
909                                        MachineBasicBlock *MBB) const;
910 
911     MachineBasicBlock *EmitLowered__chkstk(MachineInstr &MI,
912                                            MachineBasicBlock *MBB) const;
913     MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
914                                            MachineBasicBlock *MBB) const;
915     void addMVEVectorTypes(bool HasMVEFP);
916     void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
917     void setAllExpand(MVT VT);
918   };
919 
920   enum VMOVModImmType {
921     VMOVModImm,
922     VMVNModImm,
923     MVEVMVNModImm,
924     OtherModImm
925   };
926 
927   namespace ARM {
928 
929     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
930                              const TargetLibraryInfo *libInfo);
931 
932   } // end namespace ARM
933 
934 } // end namespace llvm
935 
936 #endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H
937