xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (revision e92ffd9b626833ebdbf2742c8ffddc6cd94b963e)
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Intrinsics.h"
32 #include "llvm/IR/IntrinsicsARM.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Target/TargetOptions.h"
38 
39 using namespace llvm;
40 
41 #define DEBUG_TYPE "arm-isel"
42 
43 static cl::opt<bool>
44 DisableShifterOp("disable-shifter-op", cl::Hidden,
45   cl::desc("Disable isel of shifter-op"),
46   cl::init(false));
47 
48 //===--------------------------------------------------------------------===//
49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
50 /// instructions for SelectionDAG operations.
51 ///
52 namespace {
53 
54 class ARMDAGToDAGISel : public SelectionDAGISel {
55   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56   /// make the right decision when generating code for different targets.
57   const ARMSubtarget *Subtarget;
58 
59 public:
60   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
61       : SelectionDAGISel(tm, OptLevel) {}
62 
63   bool runOnMachineFunction(MachineFunction &MF) override {
64     // Reset the subtarget each time through.
65     Subtarget = &MF.getSubtarget<ARMSubtarget>();
66     SelectionDAGISel::runOnMachineFunction(MF);
67     return true;
68   }
69 
70   StringRef getPassName() const override { return "ARM Instruction Selection"; }
71 
72   void PreprocessISelDAG() override;
73 
74   /// getI32Imm - Return a target constant of type i32 with the specified
75   /// value.
76   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78   }
79 
80   void Select(SDNode *N) override;
81 
82   /// Return true as some complex patterns, like those that call
83   /// canExtractShiftFromMul can modify the DAG inplace.
84   bool ComplexPatternFuncMutatesDAG() const override { return true; }
85 
86   bool hasNoVMLxHazardUse(SDNode *N) const;
87   bool isShifterOpProfitable(const SDValue &Shift,
88                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89   bool SelectRegShifterOperand(SDValue N, SDValue &A,
90                                SDValue &B, SDValue &C,
91                                bool CheckProfitability = true);
92   bool SelectImmShifterOperand(SDValue N, SDValue &A,
93                                SDValue &B, bool CheckProfitability = true);
94   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95                                     SDValue &C) {
96     // Don't apply the profitability check
97     return SelectRegShifterOperand(N, A, B, C, false);
98   }
99   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100     // Don't apply the profitability check
101     return SelectImmShifterOperand(N, A, B, false);
102   }
103   bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104     if (!N.hasOneUse())
105       return false;
106     return SelectImmShifterOperand(N, A, B, false);
107   }
108 
109   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110 
111   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113 
114   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
115     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
116     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
117     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
118     return true;
119   }
120 
121   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
122                              SDValue &Offset, SDValue &Opc);
123   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
124                              SDValue &Offset, SDValue &Opc);
125   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
126                              SDValue &Offset, SDValue &Opc);
127   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
128   bool SelectAddrMode3(SDValue N, SDValue &Base,
129                        SDValue &Offset, SDValue &Opc);
130   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
131                              SDValue &Offset, SDValue &Opc);
132   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
133   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
134   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
135   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
136   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
137 
138   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
139 
140   // Thumb Addressing Modes:
141   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
142   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
143   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
144                                 SDValue &OffImm);
145   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
146                                  SDValue &OffImm);
147   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
148                                  SDValue &OffImm);
149   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
150                                  SDValue &OffImm);
151   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
152   template <unsigned Shift>
153   bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
154 
155   // Thumb 2 Addressing Modes:
156   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
157   template <unsigned Shift>
158   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
159   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
160                             SDValue &OffImm);
161   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
162                                  SDValue &OffImm);
163   template <unsigned Shift>
164   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
165   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
166                                   unsigned Shift);
167   template <unsigned Shift>
168   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
169   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
170                              SDValue &OffReg, SDValue &ShImm);
171   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
172 
173   template<int Min, int Max>
174   bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
175 
176   inline bool is_so_imm(unsigned Imm) const {
177     return ARM_AM::getSOImmVal(Imm) != -1;
178   }
179 
180   inline bool is_so_imm_not(unsigned Imm) const {
181     return ARM_AM::getSOImmVal(~Imm) != -1;
182   }
183 
184   inline bool is_t2_so_imm(unsigned Imm) const {
185     return ARM_AM::getT2SOImmVal(Imm) != -1;
186   }
187 
188   inline bool is_t2_so_imm_not(unsigned Imm) const {
189     return ARM_AM::getT2SOImmVal(~Imm) != -1;
190   }
191 
192   // Include the pieces autogenerated from the target description.
193 #include "ARMGenDAGISel.inc"
194 
195 private:
196   void transferMemOperands(SDNode *Src, SDNode *Dst);
197 
198   /// Indexed (pre/post inc/dec) load matching code for ARM.
199   bool tryARMIndexedLoad(SDNode *N);
200   bool tryT1IndexedLoad(SDNode *N);
201   bool tryT2IndexedLoad(SDNode *N);
202   bool tryMVEIndexedLoad(SDNode *N);
203   bool tryFMULFixed(SDNode *N, SDLoc dl);
204   bool tryFP_TO_INT(SDNode *N, SDLoc dl);
205   bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
206                                              bool IsUnsigned,
207                                              bool FixedToFloat);
208 
209   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
210   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
211   /// loads of D registers and even subregs and odd subregs of Q registers.
212   /// For NumVecs <= 2, QOpcodes1 is not used.
213   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
214                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
215                  const uint16_t *QOpcodes1);
216 
217   /// SelectVST - Select NEON store intrinsics.  NumVecs should
218   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
219   /// stores of D registers and even subregs and odd subregs of Q registers.
220   /// For NumVecs <= 2, QOpcodes1 is not used.
221   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
222                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
223                  const uint16_t *QOpcodes1);
224 
225   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
226   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
227   /// load/store of D registers and Q registers.
228   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
229                        unsigned NumVecs, const uint16_t *DOpcodes,
230                        const uint16_t *QOpcodes);
231 
232   /// Helper functions for setting up clusters of MVE predication operands.
233   template <typename SDValueVector>
234   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
235                             SDValue PredicateMask);
236   template <typename SDValueVector>
237   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
238                             SDValue PredicateMask, SDValue Inactive);
239 
240   template <typename SDValueVector>
241   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
242   template <typename SDValueVector>
243   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
244 
245   /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
246   void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
247 
248   /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
249   void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
250                            bool HasSaturationOperand);
251 
252   /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
253   void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
254                          uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
255 
256   /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
257   /// vector lanes.
258   void SelectMVE_VSHLC(SDNode *N, bool Predicated);
259 
260   /// Select long MVE vector reductions with two vector operands
261   /// Stride is the number of vector element widths the instruction can operate
262   /// on:
263   /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
264   /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
265   /// Stride is used when addressing the OpcodesS array which contains multiple
266   /// opcodes for each element width.
267   /// TySize is the index into the list of element types listed above
268   void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
269                              const uint16_t *OpcodesS, const uint16_t *OpcodesU,
270                              size_t Stride, size_t TySize);
271 
272   /// Select a 64-bit MVE vector reduction with two vector operands
273   /// arm_mve_vmlldava_[predicated]
274   void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
275                          const uint16_t *OpcodesU);
276   /// Select a 72-bit MVE vector rounding reduction with two vector operands
277   /// int_arm_mve_vrmlldavha[_predicated]
278   void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
279                            const uint16_t *OpcodesU);
280 
281   /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
282   /// should be 2 or 4. The opcode array specifies the instructions
283   /// used for 8, 16 and 32-bit lane sizes respectively, and each
284   /// pointer points to a set of NumVecs sub-opcodes used for the
285   /// different stages (e.g. VLD20 versus VLD21) of each load family.
286   void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
287                      const uint16_t *const *Opcodes, bool HasWriteback);
288 
289   /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
290   /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
291   void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
292                        bool Wrapping, bool Predicated);
293 
294   /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
295   /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
296   /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
297   ///                     the accumulator and the immediate operand, i.e. 0
298   ///                     for CX1*, 1 for CX2*, 2 for CX3*
299   /// \arg \c HasAccum whether the instruction has an accumulator operand
300   void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
301                       bool HasAccum);
302 
303   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
304   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
305   /// for loading D registers.
306   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
307                     unsigned NumVecs, const uint16_t *DOpcodes,
308                     const uint16_t *QOpcodes0 = nullptr,
309                     const uint16_t *QOpcodes1 = nullptr);
310 
311   /// Try to select SBFX/UBFX instructions for ARM.
312   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
313 
314   bool tryInsertVectorElt(SDNode *N);
315 
316   // Select special operations if node forms integer ABS pattern
317   bool tryABSOp(SDNode *N);
318 
319   bool tryReadRegister(SDNode *N);
320   bool tryWriteRegister(SDNode *N);
321 
322   bool tryInlineAsm(SDNode *N);
323 
324   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
325 
326   void SelectCMP_SWAP(SDNode *N);
327 
328   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
329   /// inline asm expressions.
330   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
331                                     std::vector<SDValue> &OutOps) override;
332 
333   // Form pairs of consecutive R, S, D, or Q registers.
334   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
335   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
336   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
337   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
338 
339   // Form sequences of 4 consecutive S, D, or Q registers.
340   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
341   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
342   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
343 
344   // Get the alignment operand for a NEON VLD or VST instruction.
345   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
346                         bool is64BitVector);
347 
348   /// Checks if N is a multiplication by a constant where we can extract out a
349   /// power of two from the constant so that it can be used in a shift, but only
350   /// if it simplifies the materialization of the constant. Returns true if it
351   /// is, and assigns to PowerOfTwo the power of two that should be extracted
352   /// out and to NewMulConst the new constant to be multiplied by.
353   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
354                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
355 
356   /// Replace N with M in CurDAG, in a way that also ensures that M gets
357   /// selected when N would have been selected.
358   void replaceDAGValue(const SDValue &N, SDValue M);
359 };
360 }
361 
362 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
363 /// operand. If so Imm will receive the 32-bit value.
364 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
365   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
366     Imm = cast<ConstantSDNode>(N)->getZExtValue();
367     return true;
368   }
369   return false;
370 }
371 
372 // isInt32Immediate - This method tests to see if a constant operand.
373 // If so Imm will receive the 32 bit value.
374 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
375   return isInt32Immediate(N.getNode(), Imm);
376 }
377 
378 // isOpcWithIntImmediate - This method tests to see if the node is a specific
379 // opcode and that it has a immediate integer right operand.
380 // If so Imm will receive the 32 bit value.
381 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
382   return N->getOpcode() == Opc &&
383          isInt32Immediate(N->getOperand(1).getNode(), Imm);
384 }
385 
386 /// Check whether a particular node is a constant value representable as
387 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
388 ///
389 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
390 static bool isScaledConstantInRange(SDValue Node, int Scale,
391                                     int RangeMin, int RangeMax,
392                                     int &ScaledConstant) {
393   assert(Scale > 0 && "Invalid scale!");
394 
395   // Check that this is a constant.
396   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
397   if (!C)
398     return false;
399 
400   ScaledConstant = (int) C->getZExtValue();
401   if ((ScaledConstant % Scale) != 0)
402     return false;
403 
404   ScaledConstant /= Scale;
405   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
406 }
407 
408 void ARMDAGToDAGISel::PreprocessISelDAG() {
409   if (!Subtarget->hasV6T2Ops())
410     return;
411 
412   bool isThumb2 = Subtarget->isThumb();
413   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
414        E = CurDAG->allnodes_end(); I != E; ) {
415     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
416 
417     if (N->getOpcode() != ISD::ADD)
418       continue;
419 
420     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
421     // leading zeros, followed by consecutive set bits, followed by 1 or 2
422     // trailing zeros, e.g. 1020.
423     // Transform the expression to
424     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
425     // of trailing zeros of c2. The left shift would be folded as an shifter
426     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
427     // node (UBFX).
428 
429     SDValue N0 = N->getOperand(0);
430     SDValue N1 = N->getOperand(1);
431     unsigned And_imm = 0;
432     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
433       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
434         std::swap(N0, N1);
435     }
436     if (!And_imm)
437       continue;
438 
439     // Check if the AND mask is an immediate of the form: 000.....1111111100
440     unsigned TZ = countTrailingZeros(And_imm);
441     if (TZ != 1 && TZ != 2)
442       // Be conservative here. Shifter operands aren't always free. e.g. On
443       // Swift, left shifter operand of 1 / 2 for free but others are not.
444       // e.g.
445       //  ubfx   r3, r1, #16, #8
446       //  ldr.w  r3, [r0, r3, lsl #2]
447       // vs.
448       //  mov.w  r9, #1020
449       //  and.w  r2, r9, r1, lsr #14
450       //  ldr    r2, [r0, r2]
451       continue;
452     And_imm >>= TZ;
453     if (And_imm & (And_imm + 1))
454       continue;
455 
456     // Look for (and (srl X, c1), c2).
457     SDValue Srl = N1.getOperand(0);
458     unsigned Srl_imm = 0;
459     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
460         (Srl_imm <= 2))
461       continue;
462 
463     // Make sure first operand is not a shifter operand which would prevent
464     // folding of the left shift.
465     SDValue CPTmp0;
466     SDValue CPTmp1;
467     SDValue CPTmp2;
468     if (isThumb2) {
469       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
470         continue;
471     } else {
472       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
473           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
474         continue;
475     }
476 
477     // Now make the transformation.
478     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
479                           Srl.getOperand(0),
480                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
481                                               MVT::i32));
482     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
483                          Srl,
484                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
485     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
486                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
487     CurDAG->UpdateNodeOperands(N, N0, N1);
488   }
489 }
490 
491 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
492 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
493 /// least on current ARM implementations) which should be avoidded.
494 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
495   if (OptLevel == CodeGenOpt::None)
496     return true;
497 
498   if (!Subtarget->hasVMLxHazards())
499     return true;
500 
501   if (!N->hasOneUse())
502     return false;
503 
504   SDNode *Use = *N->use_begin();
505   if (Use->getOpcode() == ISD::CopyToReg)
506     return true;
507   if (Use->isMachineOpcode()) {
508     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
509         CurDAG->getSubtarget().getInstrInfo());
510 
511     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
512     if (MCID.mayStore())
513       return true;
514     unsigned Opcode = MCID.getOpcode();
515     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
516       return true;
517     // vmlx feeding into another vmlx. We actually want to unfold
518     // the use later in the MLxExpansion pass. e.g.
519     // vmla
520     // vmla (stall 8 cycles)
521     //
522     // vmul (5 cycles)
523     // vadd (5 cycles)
524     // vmla
525     // This adds up to about 18 - 19 cycles.
526     //
527     // vmla
528     // vmul (stall 4 cycles)
529     // vadd adds up to about 14 cycles.
530     return TII->isFpMLxInstruction(Opcode);
531   }
532 
533   return false;
534 }
535 
536 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
537                                             ARM_AM::ShiftOpc ShOpcVal,
538                                             unsigned ShAmt) {
539   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
540     return true;
541   if (Shift.hasOneUse())
542     return true;
543   // R << 2 is free.
544   return ShOpcVal == ARM_AM::lsl &&
545          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
546 }
547 
548 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
549                                              unsigned MaxShift,
550                                              unsigned &PowerOfTwo,
551                                              SDValue &NewMulConst) const {
552   assert(N.getOpcode() == ISD::MUL);
553   assert(MaxShift > 0);
554 
555   // If the multiply is used in more than one place then changing the constant
556   // will make other uses incorrect, so don't.
557   if (!N.hasOneUse()) return false;
558   // Check if the multiply is by a constant
559   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
560   if (!MulConst) return false;
561   // If the constant is used in more than one place then modifying it will mean
562   // we need to materialize two constants instead of one, which is a bad idea.
563   if (!MulConst->hasOneUse()) return false;
564   unsigned MulConstVal = MulConst->getZExtValue();
565   if (MulConstVal == 0) return false;
566 
567   // Find the largest power of 2 that MulConstVal is a multiple of
568   PowerOfTwo = MaxShift;
569   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
570     --PowerOfTwo;
571     if (PowerOfTwo == 0) return false;
572   }
573 
574   // Only optimise if the new cost is better
575   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
576   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
577   unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
578   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
579   return NewCost < OldCost;
580 }
581 
582 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
583   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
584   ReplaceUses(N, M);
585 }
586 
587 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
588                                               SDValue &BaseReg,
589                                               SDValue &Opc,
590                                               bool CheckProfitability) {
591   if (DisableShifterOp)
592     return false;
593 
594   // If N is a multiply-by-constant and it's profitable to extract a shift and
595   // use it in a shifted operand do so.
596   if (N.getOpcode() == ISD::MUL) {
597     unsigned PowerOfTwo = 0;
598     SDValue NewMulConst;
599     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
600       HandleSDNode Handle(N);
601       SDLoc Loc(N);
602       replaceDAGValue(N.getOperand(1), NewMulConst);
603       BaseReg = Handle.getValue();
604       Opc = CurDAG->getTargetConstant(
605           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
606       return true;
607     }
608   }
609 
610   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
611 
612   // Don't match base register only case. That is matched to a separate
613   // lower complexity pattern with explicit register operand.
614   if (ShOpcVal == ARM_AM::no_shift) return false;
615 
616   BaseReg = N.getOperand(0);
617   unsigned ShImmVal = 0;
618   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
619   if (!RHS) return false;
620   ShImmVal = RHS->getZExtValue() & 31;
621   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
622                                   SDLoc(N), MVT::i32);
623   return true;
624 }
625 
626 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
627                                               SDValue &BaseReg,
628                                               SDValue &ShReg,
629                                               SDValue &Opc,
630                                               bool CheckProfitability) {
631   if (DisableShifterOp)
632     return false;
633 
634   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
635 
636   // Don't match base register only case. That is matched to a separate
637   // lower complexity pattern with explicit register operand.
638   if (ShOpcVal == ARM_AM::no_shift) return false;
639 
640   BaseReg = N.getOperand(0);
641   unsigned ShImmVal = 0;
642   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
643   if (RHS) return false;
644 
645   ShReg = N.getOperand(1);
646   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
647     return false;
648   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
649                                   SDLoc(N), MVT::i32);
650   return true;
651 }
652 
653 // Determine whether an ISD::OR's operands are suitable to turn the operation
654 // into an addition, which often has more compact encodings.
655 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
656   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
657   Out = N;
658   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
659 }
660 
661 
662 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
663                                           SDValue &Base,
664                                           SDValue &OffImm) {
665   // Match simple R + imm12 operands.
666 
667   // Base only.
668   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
669       !CurDAG->isBaseWithConstantOffset(N)) {
670     if (N.getOpcode() == ISD::FrameIndex) {
671       // Match frame index.
672       int FI = cast<FrameIndexSDNode>(N)->getIndex();
673       Base = CurDAG->getTargetFrameIndex(
674           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
675       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
676       return true;
677     }
678 
679     if (N.getOpcode() == ARMISD::Wrapper &&
680         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
681         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
682         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
683       Base = N.getOperand(0);
684     } else
685       Base = N;
686     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
687     return true;
688   }
689 
690   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
691     int RHSC = (int)RHS->getSExtValue();
692     if (N.getOpcode() == ISD::SUB)
693       RHSC = -RHSC;
694 
695     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
696       Base   = N.getOperand(0);
697       if (Base.getOpcode() == ISD::FrameIndex) {
698         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
699         Base = CurDAG->getTargetFrameIndex(
700             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
701       }
702       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
703       return true;
704     }
705   }
706 
707   // Base only.
708   Base = N;
709   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
710   return true;
711 }
712 
713 
714 
715 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
716                                       SDValue &Opc) {
717   if (N.getOpcode() == ISD::MUL &&
718       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
719     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
720       // X * [3,5,9] -> X + X * [2,4,8] etc.
721       int RHSC = (int)RHS->getZExtValue();
722       if (RHSC & 1) {
723         RHSC = RHSC & ~1;
724         ARM_AM::AddrOpc AddSub = ARM_AM::add;
725         if (RHSC < 0) {
726           AddSub = ARM_AM::sub;
727           RHSC = - RHSC;
728         }
729         if (isPowerOf2_32(RHSC)) {
730           unsigned ShAmt = Log2_32(RHSC);
731           Base = Offset = N.getOperand(0);
732           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
733                                                             ARM_AM::lsl),
734                                           SDLoc(N), MVT::i32);
735           return true;
736         }
737       }
738     }
739   }
740 
741   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
742       // ISD::OR that is equivalent to an ISD::ADD.
743       !CurDAG->isBaseWithConstantOffset(N))
744     return false;
745 
746   // Leave simple R +/- imm12 operands for LDRi12
747   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
748     int RHSC;
749     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
750                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
751       return false;
752   }
753 
754   // Otherwise this is R +/- [possibly shifted] R.
755   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
756   ARM_AM::ShiftOpc ShOpcVal =
757     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
758   unsigned ShAmt = 0;
759 
760   Base   = N.getOperand(0);
761   Offset = N.getOperand(1);
762 
763   if (ShOpcVal != ARM_AM::no_shift) {
764     // Check to see if the RHS of the shift is a constant, if not, we can't fold
765     // it.
766     if (ConstantSDNode *Sh =
767            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
768       ShAmt = Sh->getZExtValue();
769       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
770         Offset = N.getOperand(1).getOperand(0);
771       else {
772         ShAmt = 0;
773         ShOpcVal = ARM_AM::no_shift;
774       }
775     } else {
776       ShOpcVal = ARM_AM::no_shift;
777     }
778   }
779 
780   // Try matching (R shl C) + (R).
781   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
782       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
783         N.getOperand(0).hasOneUse())) {
784     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
785     if (ShOpcVal != ARM_AM::no_shift) {
786       // Check to see if the RHS of the shift is a constant, if not, we can't
787       // fold it.
788       if (ConstantSDNode *Sh =
789           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
790         ShAmt = Sh->getZExtValue();
791         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
792           Offset = N.getOperand(0).getOperand(0);
793           Base = N.getOperand(1);
794         } else {
795           ShAmt = 0;
796           ShOpcVal = ARM_AM::no_shift;
797         }
798       } else {
799         ShOpcVal = ARM_AM::no_shift;
800       }
801     }
802   }
803 
804   // If Offset is a multiply-by-constant and it's profitable to extract a shift
805   // and use it in a shifted operand do so.
806   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
807     unsigned PowerOfTwo = 0;
808     SDValue NewMulConst;
809     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
810       HandleSDNode Handle(Offset);
811       replaceDAGValue(Offset.getOperand(1), NewMulConst);
812       Offset = Handle.getValue();
813       ShAmt = PowerOfTwo;
814       ShOpcVal = ARM_AM::lsl;
815     }
816   }
817 
818   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
819                                   SDLoc(N), MVT::i32);
820   return true;
821 }
822 
823 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
824                                             SDValue &Offset, SDValue &Opc) {
825   unsigned Opcode = Op->getOpcode();
826   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
827     ? cast<LoadSDNode>(Op)->getAddressingMode()
828     : cast<StoreSDNode>(Op)->getAddressingMode();
829   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
830     ? ARM_AM::add : ARM_AM::sub;
831   int Val;
832   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
833     return false;
834 
835   Offset = N;
836   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
837   unsigned ShAmt = 0;
838   if (ShOpcVal != ARM_AM::no_shift) {
839     // Check to see if the RHS of the shift is a constant, if not, we can't fold
840     // it.
841     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
842       ShAmt = Sh->getZExtValue();
843       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
844         Offset = N.getOperand(0);
845       else {
846         ShAmt = 0;
847         ShOpcVal = ARM_AM::no_shift;
848       }
849     } else {
850       ShOpcVal = ARM_AM::no_shift;
851     }
852   }
853 
854   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
855                                   SDLoc(N), MVT::i32);
856   return true;
857 }
858 
859 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
860                                             SDValue &Offset, SDValue &Opc) {
861   unsigned Opcode = Op->getOpcode();
862   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
863     ? cast<LoadSDNode>(Op)->getAddressingMode()
864     : cast<StoreSDNode>(Op)->getAddressingMode();
865   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
866     ? ARM_AM::add : ARM_AM::sub;
867   int Val;
868   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
869     if (AddSub == ARM_AM::sub) Val *= -1;
870     Offset = CurDAG->getRegister(0, MVT::i32);
871     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
872     return true;
873   }
874 
875   return false;
876 }
877 
878 
879 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
880                                             SDValue &Offset, SDValue &Opc) {
881   unsigned Opcode = Op->getOpcode();
882   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
883     ? cast<LoadSDNode>(Op)->getAddressingMode()
884     : cast<StoreSDNode>(Op)->getAddressingMode();
885   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
886     ? ARM_AM::add : ARM_AM::sub;
887   int Val;
888   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
889     Offset = CurDAG->getRegister(0, MVT::i32);
890     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
891                                                       ARM_AM::no_shift),
892                                     SDLoc(Op), MVT::i32);
893     return true;
894   }
895 
896   return false;
897 }
898 
899 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
900   Base = N;
901   return true;
902 }
903 
904 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
905                                       SDValue &Base, SDValue &Offset,
906                                       SDValue &Opc) {
907   if (N.getOpcode() == ISD::SUB) {
908     // X - C  is canonicalize to X + -C, no need to handle it here.
909     Base = N.getOperand(0);
910     Offset = N.getOperand(1);
911     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
912                                     MVT::i32);
913     return true;
914   }
915 
916   if (!CurDAG->isBaseWithConstantOffset(N)) {
917     Base = N;
918     if (N.getOpcode() == ISD::FrameIndex) {
919       int FI = cast<FrameIndexSDNode>(N)->getIndex();
920       Base = CurDAG->getTargetFrameIndex(
921           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
922     }
923     Offset = CurDAG->getRegister(0, MVT::i32);
924     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
925                                     MVT::i32);
926     return true;
927   }
928 
929   // If the RHS is +/- imm8, fold into addr mode.
930   int RHSC;
931   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
932                               -256 + 1, 256, RHSC)) { // 8 bits.
933     Base = N.getOperand(0);
934     if (Base.getOpcode() == ISD::FrameIndex) {
935       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
936       Base = CurDAG->getTargetFrameIndex(
937           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
938     }
939     Offset = CurDAG->getRegister(0, MVT::i32);
940 
941     ARM_AM::AddrOpc AddSub = ARM_AM::add;
942     if (RHSC < 0) {
943       AddSub = ARM_AM::sub;
944       RHSC = -RHSC;
945     }
946     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
947                                     MVT::i32);
948     return true;
949   }
950 
951   Base = N.getOperand(0);
952   Offset = N.getOperand(1);
953   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
954                                   MVT::i32);
955   return true;
956 }
957 
958 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
959                                             SDValue &Offset, SDValue &Opc) {
960   unsigned Opcode = Op->getOpcode();
961   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
962     ? cast<LoadSDNode>(Op)->getAddressingMode()
963     : cast<StoreSDNode>(Op)->getAddressingMode();
964   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
965     ? ARM_AM::add : ARM_AM::sub;
966   int Val;
967   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
968     Offset = CurDAG->getRegister(0, MVT::i32);
969     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
970                                     MVT::i32);
971     return true;
972   }
973 
974   Offset = N;
975   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
976                                   MVT::i32);
977   return true;
978 }
979 
980 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
981                                         bool FP16) {
982   if (!CurDAG->isBaseWithConstantOffset(N)) {
983     Base = N;
984     if (N.getOpcode() == ISD::FrameIndex) {
985       int FI = cast<FrameIndexSDNode>(N)->getIndex();
986       Base = CurDAG->getTargetFrameIndex(
987           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
988     } else if (N.getOpcode() == ARMISD::Wrapper &&
989                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
990                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
991                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
992       Base = N.getOperand(0);
993     }
994     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
995                                        SDLoc(N), MVT::i32);
996     return true;
997   }
998 
999   // If the RHS is +/- imm8, fold into addr mode.
1000   int RHSC;
1001   const int Scale = FP16 ? 2 : 4;
1002 
1003   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1004     Base = N.getOperand(0);
1005     if (Base.getOpcode() == ISD::FrameIndex) {
1006       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1007       Base = CurDAG->getTargetFrameIndex(
1008           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1009     }
1010 
1011     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1012     if (RHSC < 0) {
1013       AddSub = ARM_AM::sub;
1014       RHSC = -RHSC;
1015     }
1016 
1017     if (FP16)
1018       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1019                                          SDLoc(N), MVT::i32);
1020     else
1021       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1022                                          SDLoc(N), MVT::i32);
1023 
1024     return true;
1025   }
1026 
1027   Base = N;
1028 
1029   if (FP16)
1030     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1031                                        SDLoc(N), MVT::i32);
1032   else
1033     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1034                                        SDLoc(N), MVT::i32);
1035 
1036   return true;
1037 }
1038 
1039 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1040                                       SDValue &Base, SDValue &Offset) {
1041   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1042 }
1043 
1044 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1045                                           SDValue &Base, SDValue &Offset) {
1046   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1047 }
1048 
1049 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1050                                       SDValue &Align) {
1051   Addr = N;
1052 
1053   unsigned Alignment = 0;
1054 
1055   MemSDNode *MemN = cast<MemSDNode>(Parent);
1056 
1057   if (isa<LSBaseSDNode>(MemN) ||
1058       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1059         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1060        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1061     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1062     // The maximum alignment is equal to the memory size being referenced.
1063     unsigned MMOAlign = MemN->getAlignment();
1064     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1065     if (MMOAlign >= MemSize && MemSize > 1)
1066       Alignment = MemSize;
1067   } else {
1068     // All other uses of addrmode6 are for intrinsics.  For now just record
1069     // the raw alignment value; it will be refined later based on the legal
1070     // alignment operands for the intrinsic.
1071     Alignment = MemN->getAlignment();
1072   }
1073 
1074   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1075   return true;
1076 }
1077 
1078 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1079                                             SDValue &Offset) {
1080   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1081   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1082   if (AM != ISD::POST_INC)
1083     return false;
1084   Offset = N;
1085   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1086     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1087       Offset = CurDAG->getRegister(0, MVT::i32);
1088   }
1089   return true;
1090 }
1091 
1092 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1093                                        SDValue &Offset, SDValue &Label) {
1094   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1095     Offset = N.getOperand(0);
1096     SDValue N1 = N.getOperand(1);
1097     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1098                                       SDLoc(N), MVT::i32);
1099     return true;
1100   }
1101 
1102   return false;
1103 }
1104 
1105 
1106 //===----------------------------------------------------------------------===//
1107 //                         Thumb Addressing Modes
1108 //===----------------------------------------------------------------------===//
1109 
1110 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1111   // Negative numbers are difficult to materialise in thumb1. If we are
1112   // selecting the add of a negative, instead try to select ri with a zero
1113   // offset, so create the add node directly which will become a sub.
1114   if (N.getOpcode() != ISD::ADD)
1115     return false;
1116 
1117   // Look for an imm which is not legal for ld/st, but is legal for sub.
1118   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1119     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1120 
1121   return false;
1122 }
1123 
1124 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1125                                                 SDValue &Offset) {
1126   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1127     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1128     if (!NC || !NC->isNullValue())
1129       return false;
1130 
1131     Base = Offset = N;
1132     return true;
1133   }
1134 
1135   Base = N.getOperand(0);
1136   Offset = N.getOperand(1);
1137   return true;
1138 }
1139 
1140 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1141                                             SDValue &Offset) {
1142   if (shouldUseZeroOffsetLdSt(N))
1143     return false; // Select ri instead
1144   return SelectThumbAddrModeRRSext(N, Base, Offset);
1145 }
1146 
1147 bool
1148 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1149                                           SDValue &Base, SDValue &OffImm) {
1150   if (shouldUseZeroOffsetLdSt(N)) {
1151     Base = N;
1152     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1153     return true;
1154   }
1155 
1156   if (!CurDAG->isBaseWithConstantOffset(N)) {
1157     if (N.getOpcode() == ISD::ADD) {
1158       return false; // We want to select register offset instead
1159     } else if (N.getOpcode() == ARMISD::Wrapper &&
1160         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1161         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1162         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1163         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1164       Base = N.getOperand(0);
1165     } else {
1166       Base = N;
1167     }
1168 
1169     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1170     return true;
1171   }
1172 
1173   // If the RHS is + imm5 * scale, fold into addr mode.
1174   int RHSC;
1175   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1176     Base = N.getOperand(0);
1177     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1178     return true;
1179   }
1180 
1181   // Offset is too large, so use register offset instead.
1182   return false;
1183 }
1184 
1185 bool
1186 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1187                                            SDValue &OffImm) {
1188   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1189 }
1190 
1191 bool
1192 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1193                                            SDValue &OffImm) {
1194   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1195 }
1196 
1197 bool
1198 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1199                                            SDValue &OffImm) {
1200   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1201 }
1202 
1203 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1204                                             SDValue &Base, SDValue &OffImm) {
1205   if (N.getOpcode() == ISD::FrameIndex) {
1206     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1207     // Only multiples of 4 are allowed for the offset, so the frame object
1208     // alignment must be at least 4.
1209     MachineFrameInfo &MFI = MF->getFrameInfo();
1210     if (MFI.getObjectAlign(FI) < Align(4))
1211       MFI.setObjectAlignment(FI, Align(4));
1212     Base = CurDAG->getTargetFrameIndex(
1213         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1214     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1215     return true;
1216   }
1217 
1218   if (!CurDAG->isBaseWithConstantOffset(N))
1219     return false;
1220 
1221   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1222     // If the RHS is + imm8 * scale, fold into addr mode.
1223     int RHSC;
1224     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1225       Base = N.getOperand(0);
1226       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1227       // Make sure the offset is inside the object, or we might fail to
1228       // allocate an emergency spill slot. (An out-of-range access is UB, but
1229       // it could show up anyway.)
1230       MachineFrameInfo &MFI = MF->getFrameInfo();
1231       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1232         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1233         // indexed by the LHS must be 4-byte aligned.
1234         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1235           MFI.setObjectAlignment(FI, Align(4));
1236         if (MFI.getObjectAlign(FI) >= Align(4)) {
1237           Base = CurDAG->getTargetFrameIndex(
1238               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1239           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1240           return true;
1241         }
1242       }
1243     }
1244   }
1245 
1246   return false;
1247 }
1248 
1249 template <unsigned Shift>
1250 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1251                                           SDValue &OffImm) {
1252   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1253     int RHSC;
1254     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1255                                 RHSC)) {
1256       Base = N.getOperand(0);
1257       if (N.getOpcode() == ISD::SUB)
1258         RHSC = -RHSC;
1259       OffImm =
1260           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1261       return true;
1262     }
1263   }
1264 
1265   // Base only.
1266   Base = N;
1267   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1268   return true;
1269 }
1270 
1271 
1272 //===----------------------------------------------------------------------===//
1273 //                        Thumb 2 Addressing Modes
1274 //===----------------------------------------------------------------------===//
1275 
1276 
1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1278                                             SDValue &Base, SDValue &OffImm) {
1279   // Match simple R + imm12 operands.
1280 
1281   // Base only.
1282   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1283       !CurDAG->isBaseWithConstantOffset(N)) {
1284     if (N.getOpcode() == ISD::FrameIndex) {
1285       // Match frame index.
1286       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1287       Base = CurDAG->getTargetFrameIndex(
1288           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1290       return true;
1291     }
1292 
1293     if (N.getOpcode() == ARMISD::Wrapper &&
1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1295         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1297       Base = N.getOperand(0);
1298       if (Base.getOpcode() == ISD::TargetConstantPool)
1299         return false;  // We want to select t2LDRpci instead.
1300     } else
1301       Base = N;
1302     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1303     return true;
1304   }
1305 
1306   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1307     if (SelectT2AddrModeImm8(N, Base, OffImm))
1308       // Let t2LDRi8 handle (R - imm8).
1309       return false;
1310 
1311     int RHSC = (int)RHS->getZExtValue();
1312     if (N.getOpcode() == ISD::SUB)
1313       RHSC = -RHSC;
1314 
1315     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1316       Base   = N.getOperand(0);
1317       if (Base.getOpcode() == ISD::FrameIndex) {
1318         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1319         Base = CurDAG->getTargetFrameIndex(
1320             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1321       }
1322       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1323       return true;
1324     }
1325   }
1326 
1327   // Base only.
1328   Base = N;
1329   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1330   return true;
1331 }
1332 
1333 template <unsigned Shift>
1334 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1335                                            SDValue &OffImm) {
1336   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1337     int RHSC;
1338     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1339       Base = N.getOperand(0);
1340       if (Base.getOpcode() == ISD::FrameIndex) {
1341         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1342         Base = CurDAG->getTargetFrameIndex(
1343             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1344       }
1345 
1346       if (N.getOpcode() == ISD::SUB)
1347         RHSC = -RHSC;
1348       OffImm =
1349           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1350       return true;
1351     }
1352   }
1353 
1354   // Base only.
1355   Base = N;
1356   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1357   return true;
1358 }
1359 
1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1361                                            SDValue &Base, SDValue &OffImm) {
1362   // Match simple R - imm8 operands.
1363   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1364       !CurDAG->isBaseWithConstantOffset(N))
1365     return false;
1366 
1367   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1368     int RHSC = (int)RHS->getSExtValue();
1369     if (N.getOpcode() == ISD::SUB)
1370       RHSC = -RHSC;
1371 
1372     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1373       Base = N.getOperand(0);
1374       if (Base.getOpcode() == ISD::FrameIndex) {
1375         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1376         Base = CurDAG->getTargetFrameIndex(
1377             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1378       }
1379       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1380       return true;
1381     }
1382   }
1383 
1384   return false;
1385 }
1386 
1387 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1388                                                  SDValue &OffImm){
1389   unsigned Opcode = Op->getOpcode();
1390   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1391     ? cast<LoadSDNode>(Op)->getAddressingMode()
1392     : cast<StoreSDNode>(Op)->getAddressingMode();
1393   int RHSC;
1394   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1395     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1396       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1397       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1398     return true;
1399   }
1400 
1401   return false;
1402 }
1403 
1404 template <unsigned Shift>
1405 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1406                                            SDValue &OffImm) {
1407   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1408     int RHSC;
1409     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1410                                 RHSC)) {
1411       Base = N.getOperand(0);
1412       if (Base.getOpcode() == ISD::FrameIndex) {
1413         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1414         Base = CurDAG->getTargetFrameIndex(
1415             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1416       }
1417 
1418       if (N.getOpcode() == ISD::SUB)
1419         RHSC = -RHSC;
1420       OffImm =
1421           CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1422       return true;
1423     }
1424   }
1425 
1426   // Base only.
1427   Base = N;
1428   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1429   return true;
1430 }
1431 
1432 template <unsigned Shift>
1433 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1434                                                  SDValue &OffImm) {
1435   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1436 }
1437 
1438 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1439                                                  SDValue &OffImm,
1440                                                  unsigned Shift) {
1441   unsigned Opcode = Op->getOpcode();
1442   ISD::MemIndexedMode AM;
1443   switch (Opcode) {
1444   case ISD::LOAD:
1445     AM = cast<LoadSDNode>(Op)->getAddressingMode();
1446     break;
1447   case ISD::STORE:
1448     AM = cast<StoreSDNode>(Op)->getAddressingMode();
1449     break;
1450   case ISD::MLOAD:
1451     AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1452     break;
1453   case ISD::MSTORE:
1454     AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1455     break;
1456   default:
1457     llvm_unreachable("Unexpected Opcode for Imm7Offset");
1458   }
1459 
1460   int RHSC;
1461   // 7 bit constant, shifted by Shift.
1462   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1463     OffImm =
1464         ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1465             ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1466             : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1467                                         MVT::i32);
1468     return true;
1469   }
1470   return false;
1471 }
1472 
1473 template <int Min, int Max>
1474 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1475   int Val;
1476   if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1477     OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1478     return true;
1479   }
1480   return false;
1481 }
1482 
1483 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1484                                             SDValue &Base,
1485                                             SDValue &OffReg, SDValue &ShImm) {
1486   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1487   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1488     return false;
1489 
1490   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1491   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1492     int RHSC = (int)RHS->getZExtValue();
1493     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1494       return false;
1495     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1496       return false;
1497   }
1498 
1499   // Look for (R + R) or (R + (R << [1,2,3])).
1500   unsigned ShAmt = 0;
1501   Base   = N.getOperand(0);
1502   OffReg = N.getOperand(1);
1503 
1504   // Swap if it is ((R << c) + R).
1505   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1506   if (ShOpcVal != ARM_AM::lsl) {
1507     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1508     if (ShOpcVal == ARM_AM::lsl)
1509       std::swap(Base, OffReg);
1510   }
1511 
1512   if (ShOpcVal == ARM_AM::lsl) {
1513     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1514     // it.
1515     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1516       ShAmt = Sh->getZExtValue();
1517       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1518         OffReg = OffReg.getOperand(0);
1519       else {
1520         ShAmt = 0;
1521       }
1522     }
1523   }
1524 
1525   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1526   // and use it in a shifted operand do so.
1527   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1528     unsigned PowerOfTwo = 0;
1529     SDValue NewMulConst;
1530     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1531       HandleSDNode Handle(OffReg);
1532       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1533       OffReg = Handle.getValue();
1534       ShAmt = PowerOfTwo;
1535     }
1536   }
1537 
1538   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1539 
1540   return true;
1541 }
1542 
1543 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1544                                                 SDValue &OffImm) {
1545   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1546   // instructions.
1547   Base = N;
1548   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1549 
1550   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1551     return true;
1552 
1553   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1554   if (!RHS)
1555     return true;
1556 
1557   uint32_t RHSC = (int)RHS->getZExtValue();
1558   if (RHSC > 1020 || RHSC % 4 != 0)
1559     return true;
1560 
1561   Base = N.getOperand(0);
1562   if (Base.getOpcode() == ISD::FrameIndex) {
1563     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1564     Base = CurDAG->getTargetFrameIndex(
1565         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1566   }
1567 
1568   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1569   return true;
1570 }
1571 
1572 //===--------------------------------------------------------------------===//
1573 
1574 /// getAL - Returns a ARMCC::AL immediate node.
1575 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1576   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1577 }
1578 
1579 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1580   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1581   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1582 }
1583 
1584 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1585   LoadSDNode *LD = cast<LoadSDNode>(N);
1586   ISD::MemIndexedMode AM = LD->getAddressingMode();
1587   if (AM == ISD::UNINDEXED)
1588     return false;
1589 
1590   EVT LoadedVT = LD->getMemoryVT();
1591   SDValue Offset, AMOpc;
1592   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1593   unsigned Opcode = 0;
1594   bool Match = false;
1595   if (LoadedVT == MVT::i32 && isPre &&
1596       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1597     Opcode = ARM::LDR_PRE_IMM;
1598     Match = true;
1599   } else if (LoadedVT == MVT::i32 && !isPre &&
1600       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1601     Opcode = ARM::LDR_POST_IMM;
1602     Match = true;
1603   } else if (LoadedVT == MVT::i32 &&
1604       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1605     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1606     Match = true;
1607 
1608   } else if (LoadedVT == MVT::i16 &&
1609              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1610     Match = true;
1611     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1612       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1613       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1614   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1615     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1616       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1617         Match = true;
1618         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1619       }
1620     } else {
1621       if (isPre &&
1622           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1623         Match = true;
1624         Opcode = ARM::LDRB_PRE_IMM;
1625       } else if (!isPre &&
1626                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1627         Match = true;
1628         Opcode = ARM::LDRB_POST_IMM;
1629       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1630         Match = true;
1631         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1632       }
1633     }
1634   }
1635 
1636   if (Match) {
1637     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1638       SDValue Chain = LD->getChain();
1639       SDValue Base = LD->getBasePtr();
1640       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1641                        CurDAG->getRegister(0, MVT::i32), Chain };
1642       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1643                                            MVT::Other, Ops);
1644       transferMemOperands(N, New);
1645       ReplaceNode(N, New);
1646       return true;
1647     } else {
1648       SDValue Chain = LD->getChain();
1649       SDValue Base = LD->getBasePtr();
1650       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1651                        CurDAG->getRegister(0, MVT::i32), Chain };
1652       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1653                                            MVT::Other, Ops);
1654       transferMemOperands(N, New);
1655       ReplaceNode(N, New);
1656       return true;
1657     }
1658   }
1659 
1660   return false;
1661 }
1662 
1663 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1664   LoadSDNode *LD = cast<LoadSDNode>(N);
1665   EVT LoadedVT = LD->getMemoryVT();
1666   ISD::MemIndexedMode AM = LD->getAddressingMode();
1667   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1668       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1669     return false;
1670 
1671   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1672   if (!COffs || COffs->getZExtValue() != 4)
1673     return false;
1674 
1675   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1676   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1677   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1678   // ISel.
1679   SDValue Chain = LD->getChain();
1680   SDValue Base = LD->getBasePtr();
1681   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1682                    CurDAG->getRegister(0, MVT::i32), Chain };
1683   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1684                                        MVT::i32, MVT::Other, Ops);
1685   transferMemOperands(N, New);
1686   ReplaceNode(N, New);
1687   return true;
1688 }
1689 
1690 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1691   LoadSDNode *LD = cast<LoadSDNode>(N);
1692   ISD::MemIndexedMode AM = LD->getAddressingMode();
1693   if (AM == ISD::UNINDEXED)
1694     return false;
1695 
1696   EVT LoadedVT = LD->getMemoryVT();
1697   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1698   SDValue Offset;
1699   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1700   unsigned Opcode = 0;
1701   bool Match = false;
1702   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1703     switch (LoadedVT.getSimpleVT().SimpleTy) {
1704     case MVT::i32:
1705       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1706       break;
1707     case MVT::i16:
1708       if (isSExtLd)
1709         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1710       else
1711         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1712       break;
1713     case MVT::i8:
1714     case MVT::i1:
1715       if (isSExtLd)
1716         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1717       else
1718         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1719       break;
1720     default:
1721       return false;
1722     }
1723     Match = true;
1724   }
1725 
1726   if (Match) {
1727     SDValue Chain = LD->getChain();
1728     SDValue Base = LD->getBasePtr();
1729     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1730                      CurDAG->getRegister(0, MVT::i32), Chain };
1731     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1732                                          MVT::Other, Ops);
1733     transferMemOperands(N, New);
1734     ReplaceNode(N, New);
1735     return true;
1736   }
1737 
1738   return false;
1739 }
1740 
1741 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1742   EVT LoadedVT;
1743   unsigned Opcode = 0;
1744   bool isSExtLd, isPre;
1745   Align Alignment;
1746   ARMVCC::VPTCodes Pred;
1747   SDValue PredReg;
1748   SDValue Chain, Base, Offset;
1749 
1750   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1751     ISD::MemIndexedMode AM = LD->getAddressingMode();
1752     if (AM == ISD::UNINDEXED)
1753       return false;
1754     LoadedVT = LD->getMemoryVT();
1755     if (!LoadedVT.isVector())
1756       return false;
1757 
1758     Chain = LD->getChain();
1759     Base = LD->getBasePtr();
1760     Offset = LD->getOffset();
1761     Alignment = LD->getAlign();
1762     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1763     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1764     Pred = ARMVCC::None;
1765     PredReg = CurDAG->getRegister(0, MVT::i32);
1766   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1767     ISD::MemIndexedMode AM = LD->getAddressingMode();
1768     if (AM == ISD::UNINDEXED)
1769       return false;
1770     LoadedVT = LD->getMemoryVT();
1771     if (!LoadedVT.isVector())
1772       return false;
1773 
1774     Chain = LD->getChain();
1775     Base = LD->getBasePtr();
1776     Offset = LD->getOffset();
1777     Alignment = LD->getAlign();
1778     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1779     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1780     Pred = ARMVCC::Then;
1781     PredReg = LD->getMask();
1782   } else
1783     llvm_unreachable("Expected a Load or a Masked Load!");
1784 
1785   // We allow LE non-masked loads to change the type (for example use a vldrb.8
1786   // as opposed to a vldrw.32). This can allow extra addressing modes or
1787   // alignments for what is otherwise an equivalent instruction.
1788   bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1789 
1790   SDValue NewOffset;
1791   if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1792       SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1793     if (isSExtLd)
1794       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1795     else
1796       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1797   } else if (LoadedVT == MVT::v8i8 &&
1798              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1799     if (isSExtLd)
1800       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1801     else
1802       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1803   } else if (LoadedVT == MVT::v4i8 &&
1804              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1805     if (isSExtLd)
1806       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1807     else
1808       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1809   } else if (Alignment >= Align(4) &&
1810              (CanChangeType || LoadedVT == MVT::v4i32 ||
1811               LoadedVT == MVT::v4f32) &&
1812              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1813     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1814   else if (Alignment >= Align(2) &&
1815            (CanChangeType || LoadedVT == MVT::v8i16 ||
1816             LoadedVT == MVT::v8f16) &&
1817            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1818     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1819   else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1820            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1821     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1822   else
1823     return false;
1824 
1825   SDValue Ops[] = {Base, NewOffset,
1826                    CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1827                    Chain};
1828   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1829                                        N->getValueType(0), MVT::Other, Ops);
1830   transferMemOperands(N, New);
1831   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1832   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1833   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1834   CurDAG->RemoveDeadNode(N);
1835   return true;
1836 }
1837 
1838 /// Form a GPRPair pseudo register from a pair of GPR regs.
1839 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1840   SDLoc dl(V0.getNode());
1841   SDValue RegClass =
1842     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1843   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1844   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1845   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1846   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1847 }
1848 
1849 /// Form a D register from a pair of S registers.
1850 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1851   SDLoc dl(V0.getNode());
1852   SDValue RegClass =
1853     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1854   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1855   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1856   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1857   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1858 }
1859 
1860 /// Form a quad register from a pair of D registers.
1861 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1862   SDLoc dl(V0.getNode());
1863   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1864                                                MVT::i32);
1865   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1866   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1867   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1868   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1869 }
1870 
1871 /// Form 4 consecutive D registers from a pair of Q registers.
1872 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1873   SDLoc dl(V0.getNode());
1874   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1875                                                MVT::i32);
1876   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1877   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1878   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1879   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1880 }
1881 
1882 /// Form 4 consecutive S registers.
1883 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1884                                    SDValue V2, SDValue V3) {
1885   SDLoc dl(V0.getNode());
1886   SDValue RegClass =
1887     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1888   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1889   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1890   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1891   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1892   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1893                                     V2, SubReg2, V3, SubReg3 };
1894   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1895 }
1896 
1897 /// Form 4 consecutive D registers.
1898 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1899                                    SDValue V2, SDValue V3) {
1900   SDLoc dl(V0.getNode());
1901   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1902                                                MVT::i32);
1903   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1904   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1905   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1906   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1907   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1908                                     V2, SubReg2, V3, SubReg3 };
1909   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1910 }
1911 
1912 /// Form 4 consecutive Q registers.
1913 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1914                                    SDValue V2, SDValue V3) {
1915   SDLoc dl(V0.getNode());
1916   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1917                                                MVT::i32);
1918   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1919   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1920   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1921   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1922   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1923                                     V2, SubReg2, V3, SubReg3 };
1924   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1925 }
1926 
1927 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1928 /// of a NEON VLD or VST instruction.  The supported values depend on the
1929 /// number of registers being loaded.
1930 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1931                                        unsigned NumVecs, bool is64BitVector) {
1932   unsigned NumRegs = NumVecs;
1933   if (!is64BitVector && NumVecs < 3)
1934     NumRegs *= 2;
1935 
1936   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1937   if (Alignment >= 32 && NumRegs == 4)
1938     Alignment = 32;
1939   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1940     Alignment = 16;
1941   else if (Alignment >= 8)
1942     Alignment = 8;
1943   else
1944     Alignment = 0;
1945 
1946   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1947 }
1948 
1949 static bool isVLDfixed(unsigned Opc)
1950 {
1951   switch (Opc) {
1952   default: return false;
1953   case ARM::VLD1d8wb_fixed : return true;
1954   case ARM::VLD1d16wb_fixed : return true;
1955   case ARM::VLD1d64Qwb_fixed : return true;
1956   case ARM::VLD1d32wb_fixed : return true;
1957   case ARM::VLD1d64wb_fixed : return true;
1958   case ARM::VLD1d8TPseudoWB_fixed : return true;
1959   case ARM::VLD1d16TPseudoWB_fixed : return true;
1960   case ARM::VLD1d32TPseudoWB_fixed : return true;
1961   case ARM::VLD1d64TPseudoWB_fixed : return true;
1962   case ARM::VLD1d8QPseudoWB_fixed : return true;
1963   case ARM::VLD1d16QPseudoWB_fixed : return true;
1964   case ARM::VLD1d32QPseudoWB_fixed : return true;
1965   case ARM::VLD1d64QPseudoWB_fixed : return true;
1966   case ARM::VLD1q8wb_fixed : return true;
1967   case ARM::VLD1q16wb_fixed : return true;
1968   case ARM::VLD1q32wb_fixed : return true;
1969   case ARM::VLD1q64wb_fixed : return true;
1970   case ARM::VLD1DUPd8wb_fixed : return true;
1971   case ARM::VLD1DUPd16wb_fixed : return true;
1972   case ARM::VLD1DUPd32wb_fixed : return true;
1973   case ARM::VLD1DUPq8wb_fixed : return true;
1974   case ARM::VLD1DUPq16wb_fixed : return true;
1975   case ARM::VLD1DUPq32wb_fixed : return true;
1976   case ARM::VLD2d8wb_fixed : return true;
1977   case ARM::VLD2d16wb_fixed : return true;
1978   case ARM::VLD2d32wb_fixed : return true;
1979   case ARM::VLD2q8PseudoWB_fixed : return true;
1980   case ARM::VLD2q16PseudoWB_fixed : return true;
1981   case ARM::VLD2q32PseudoWB_fixed : return true;
1982   case ARM::VLD2DUPd8wb_fixed : return true;
1983   case ARM::VLD2DUPd16wb_fixed : return true;
1984   case ARM::VLD2DUPd32wb_fixed : return true;
1985   case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1986   case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1987   case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1988   }
1989 }
1990 
1991 static bool isVSTfixed(unsigned Opc)
1992 {
1993   switch (Opc) {
1994   default: return false;
1995   case ARM::VST1d8wb_fixed : return true;
1996   case ARM::VST1d16wb_fixed : return true;
1997   case ARM::VST1d32wb_fixed : return true;
1998   case ARM::VST1d64wb_fixed : return true;
1999   case ARM::VST1q8wb_fixed : return true;
2000   case ARM::VST1q16wb_fixed : return true;
2001   case ARM::VST1q32wb_fixed : return true;
2002   case ARM::VST1q64wb_fixed : return true;
2003   case ARM::VST1d8TPseudoWB_fixed : return true;
2004   case ARM::VST1d16TPseudoWB_fixed : return true;
2005   case ARM::VST1d32TPseudoWB_fixed : return true;
2006   case ARM::VST1d64TPseudoWB_fixed : return true;
2007   case ARM::VST1d8QPseudoWB_fixed : return true;
2008   case ARM::VST1d16QPseudoWB_fixed : return true;
2009   case ARM::VST1d32QPseudoWB_fixed : return true;
2010   case ARM::VST1d64QPseudoWB_fixed : return true;
2011   case ARM::VST2d8wb_fixed : return true;
2012   case ARM::VST2d16wb_fixed : return true;
2013   case ARM::VST2d32wb_fixed : return true;
2014   case ARM::VST2q8PseudoWB_fixed : return true;
2015   case ARM::VST2q16PseudoWB_fixed : return true;
2016   case ARM::VST2q32PseudoWB_fixed : return true;
2017   }
2018 }
2019 
2020 // Get the register stride update opcode of a VLD/VST instruction that
2021 // is otherwise equivalent to the given fixed stride updating instruction.
2022 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2023   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2024     && "Incorrect fixed stride updating instruction.");
2025   switch (Opc) {
2026   default: break;
2027   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2028   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2029   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2030   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2031   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2032   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2033   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2034   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2035   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2036   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2037   case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2038   case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2039   case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2040   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2041   case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2042   case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2043   case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2044   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2045   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2046   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2047   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2048   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2049   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2050   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2051   case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2052   case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2053   case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2054 
2055   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2056   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2057   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2058   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2059   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2060   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2061   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2062   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2063   case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2064   case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2065   case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2066   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2067   case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2068   case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2069   case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2070   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2071 
2072   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2073   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2074   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2075   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2076   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2077   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2078 
2079   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2080   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2081   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2082   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2083   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2084   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2085 
2086   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2087   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2088   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2089   }
2090   return Opc; // If not one we handle, return it unchanged.
2091 }
2092 
2093 /// Returns true if the given increment is a Constant known to be equal to the
2094 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2095 /// be used.
2096 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2097   auto C = dyn_cast<ConstantSDNode>(Inc);
2098   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2099 }
2100 
2101 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2102                                 const uint16_t *DOpcodes,
2103                                 const uint16_t *QOpcodes0,
2104                                 const uint16_t *QOpcodes1) {
2105   assert(Subtarget->hasNEON());
2106   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2107   SDLoc dl(N);
2108 
2109   SDValue MemAddr, Align;
2110   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2111                                    // nodes are not intrinsics.
2112   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2113   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2114     return;
2115 
2116   SDValue Chain = N->getOperand(0);
2117   EVT VT = N->getValueType(0);
2118   bool is64BitVector = VT.is64BitVector();
2119   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2120 
2121   unsigned OpcodeIndex;
2122   switch (VT.getSimpleVT().SimpleTy) {
2123   default: llvm_unreachable("unhandled vld type");
2124     // Double-register operations:
2125   case MVT::v8i8:  OpcodeIndex = 0; break;
2126   case MVT::v4f16:
2127   case MVT::v4bf16:
2128   case MVT::v4i16: OpcodeIndex = 1; break;
2129   case MVT::v2f32:
2130   case MVT::v2i32: OpcodeIndex = 2; break;
2131   case MVT::v1i64: OpcodeIndex = 3; break;
2132     // Quad-register operations:
2133   case MVT::v16i8: OpcodeIndex = 0; break;
2134   case MVT::v8f16:
2135   case MVT::v8bf16:
2136   case MVT::v8i16: OpcodeIndex = 1; break;
2137   case MVT::v4f32:
2138   case MVT::v4i32: OpcodeIndex = 2; break;
2139   case MVT::v2f64:
2140   case MVT::v2i64: OpcodeIndex = 3; break;
2141   }
2142 
2143   EVT ResTy;
2144   if (NumVecs == 1)
2145     ResTy = VT;
2146   else {
2147     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2148     if (!is64BitVector)
2149       ResTyElts *= 2;
2150     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2151   }
2152   std::vector<EVT> ResTys;
2153   ResTys.push_back(ResTy);
2154   if (isUpdating)
2155     ResTys.push_back(MVT::i32);
2156   ResTys.push_back(MVT::Other);
2157 
2158   SDValue Pred = getAL(CurDAG, dl);
2159   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2160   SDNode *VLd;
2161   SmallVector<SDValue, 7> Ops;
2162 
2163   // Double registers and VLD1/VLD2 quad registers are directly supported.
2164   if (is64BitVector || NumVecs <= 2) {
2165     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2166                     QOpcodes0[OpcodeIndex]);
2167     Ops.push_back(MemAddr);
2168     Ops.push_back(Align);
2169     if (isUpdating) {
2170       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2171       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2172       if (!IsImmUpdate) {
2173         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2174         // check for the opcode rather than the number of vector elements.
2175         if (isVLDfixed(Opc))
2176           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2177         Ops.push_back(Inc);
2178       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2179       // the operands if not such an opcode.
2180       } else if (!isVLDfixed(Opc))
2181         Ops.push_back(Reg0);
2182     }
2183     Ops.push_back(Pred);
2184     Ops.push_back(Reg0);
2185     Ops.push_back(Chain);
2186     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2187 
2188   } else {
2189     // Otherwise, quad registers are loaded with two separate instructions,
2190     // where one loads the even registers and the other loads the odd registers.
2191     EVT AddrTy = MemAddr.getValueType();
2192 
2193     // Load the even subregs.  This is always an updating load, so that it
2194     // provides the address to the second load for the odd subregs.
2195     SDValue ImplDef =
2196       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2197     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2198     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2199                                           ResTy, AddrTy, MVT::Other, OpsA);
2200     Chain = SDValue(VLdA, 2);
2201 
2202     // Load the odd subregs.
2203     Ops.push_back(SDValue(VLdA, 1));
2204     Ops.push_back(Align);
2205     if (isUpdating) {
2206       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2207       assert(isa<ConstantSDNode>(Inc.getNode()) &&
2208              "only constant post-increment update allowed for VLD3/4");
2209       (void)Inc;
2210       Ops.push_back(Reg0);
2211     }
2212     Ops.push_back(SDValue(VLdA, 0));
2213     Ops.push_back(Pred);
2214     Ops.push_back(Reg0);
2215     Ops.push_back(Chain);
2216     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2217   }
2218 
2219   // Transfer memoperands.
2220   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2221   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2222 
2223   if (NumVecs == 1) {
2224     ReplaceNode(N, VLd);
2225     return;
2226   }
2227 
2228   // Extract out the subregisters.
2229   SDValue SuperReg = SDValue(VLd, 0);
2230   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2231                     ARM::qsub_3 == ARM::qsub_0 + 3,
2232                 "Unexpected subreg numbering");
2233   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2234   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2235     ReplaceUses(SDValue(N, Vec),
2236                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2237   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2238   if (isUpdating)
2239     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2240   CurDAG->RemoveDeadNode(N);
2241 }
2242 
2243 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2244                                 const uint16_t *DOpcodes,
2245                                 const uint16_t *QOpcodes0,
2246                                 const uint16_t *QOpcodes1) {
2247   assert(Subtarget->hasNEON());
2248   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2249   SDLoc dl(N);
2250 
2251   SDValue MemAddr, Align;
2252   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2253                                    // nodes are not intrinsics.
2254   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2255   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2256   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2257     return;
2258 
2259   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2260 
2261   SDValue Chain = N->getOperand(0);
2262   EVT VT = N->getOperand(Vec0Idx).getValueType();
2263   bool is64BitVector = VT.is64BitVector();
2264   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2265 
2266   unsigned OpcodeIndex;
2267   switch (VT.getSimpleVT().SimpleTy) {
2268   default: llvm_unreachable("unhandled vst type");
2269     // Double-register operations:
2270   case MVT::v8i8:  OpcodeIndex = 0; break;
2271   case MVT::v4f16:
2272   case MVT::v4bf16:
2273   case MVT::v4i16: OpcodeIndex = 1; break;
2274   case MVT::v2f32:
2275   case MVT::v2i32: OpcodeIndex = 2; break;
2276   case MVT::v1i64: OpcodeIndex = 3; break;
2277     // Quad-register operations:
2278   case MVT::v16i8: OpcodeIndex = 0; break;
2279   case MVT::v8f16:
2280   case MVT::v8bf16:
2281   case MVT::v8i16: OpcodeIndex = 1; break;
2282   case MVT::v4f32:
2283   case MVT::v4i32: OpcodeIndex = 2; break;
2284   case MVT::v2f64:
2285   case MVT::v2i64: OpcodeIndex = 3; break;
2286   }
2287 
2288   std::vector<EVT> ResTys;
2289   if (isUpdating)
2290     ResTys.push_back(MVT::i32);
2291   ResTys.push_back(MVT::Other);
2292 
2293   SDValue Pred = getAL(CurDAG, dl);
2294   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2295   SmallVector<SDValue, 7> Ops;
2296 
2297   // Double registers and VST1/VST2 quad registers are directly supported.
2298   if (is64BitVector || NumVecs <= 2) {
2299     SDValue SrcReg;
2300     if (NumVecs == 1) {
2301       SrcReg = N->getOperand(Vec0Idx);
2302     } else if (is64BitVector) {
2303       // Form a REG_SEQUENCE to force register allocation.
2304       SDValue V0 = N->getOperand(Vec0Idx + 0);
2305       SDValue V1 = N->getOperand(Vec0Idx + 1);
2306       if (NumVecs == 2)
2307         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2308       else {
2309         SDValue V2 = N->getOperand(Vec0Idx + 2);
2310         // If it's a vst3, form a quad D-register and leave the last part as
2311         // an undef.
2312         SDValue V3 = (NumVecs == 3)
2313           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2314           : N->getOperand(Vec0Idx + 3);
2315         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2316       }
2317     } else {
2318       // Form a QQ register.
2319       SDValue Q0 = N->getOperand(Vec0Idx);
2320       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2321       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2322     }
2323 
2324     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2325                     QOpcodes0[OpcodeIndex]);
2326     Ops.push_back(MemAddr);
2327     Ops.push_back(Align);
2328     if (isUpdating) {
2329       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2330       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2331       if (!IsImmUpdate) {
2332         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2333         // check for the opcode rather than the number of vector elements.
2334         if (isVSTfixed(Opc))
2335           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2336         Ops.push_back(Inc);
2337       }
2338       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2339       // the operands if not such an opcode.
2340       else if (!isVSTfixed(Opc))
2341         Ops.push_back(Reg0);
2342     }
2343     Ops.push_back(SrcReg);
2344     Ops.push_back(Pred);
2345     Ops.push_back(Reg0);
2346     Ops.push_back(Chain);
2347     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2348 
2349     // Transfer memoperands.
2350     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2351 
2352     ReplaceNode(N, VSt);
2353     return;
2354   }
2355 
2356   // Otherwise, quad registers are stored with two separate instructions,
2357   // where one stores the even registers and the other stores the odd registers.
2358 
2359   // Form the QQQQ REG_SEQUENCE.
2360   SDValue V0 = N->getOperand(Vec0Idx + 0);
2361   SDValue V1 = N->getOperand(Vec0Idx + 1);
2362   SDValue V2 = N->getOperand(Vec0Idx + 2);
2363   SDValue V3 = (NumVecs == 3)
2364     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2365     : N->getOperand(Vec0Idx + 3);
2366   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2367 
2368   // Store the even D registers.  This is always an updating store, so that it
2369   // provides the address to the second store for the odd subregs.
2370   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2371   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2372                                         MemAddr.getValueType(),
2373                                         MVT::Other, OpsA);
2374   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2375   Chain = SDValue(VStA, 1);
2376 
2377   // Store the odd D registers.
2378   Ops.push_back(SDValue(VStA, 0));
2379   Ops.push_back(Align);
2380   if (isUpdating) {
2381     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2382     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2383            "only constant post-increment update allowed for VST3/4");
2384     (void)Inc;
2385     Ops.push_back(Reg0);
2386   }
2387   Ops.push_back(RegSeq);
2388   Ops.push_back(Pred);
2389   Ops.push_back(Reg0);
2390   Ops.push_back(Chain);
2391   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2392                                         Ops);
2393   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2394   ReplaceNode(N, VStB);
2395 }
2396 
2397 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2398                                       unsigned NumVecs,
2399                                       const uint16_t *DOpcodes,
2400                                       const uint16_t *QOpcodes) {
2401   assert(Subtarget->hasNEON());
2402   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2403   SDLoc dl(N);
2404 
2405   SDValue MemAddr, Align;
2406   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2407                                    // nodes are not intrinsics.
2408   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2409   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2410   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2411     return;
2412 
2413   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2414 
2415   SDValue Chain = N->getOperand(0);
2416   unsigned Lane =
2417     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2418   EVT VT = N->getOperand(Vec0Idx).getValueType();
2419   bool is64BitVector = VT.is64BitVector();
2420 
2421   unsigned Alignment = 0;
2422   if (NumVecs != 3) {
2423     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2424     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2425     if (Alignment > NumBytes)
2426       Alignment = NumBytes;
2427     if (Alignment < 8 && Alignment < NumBytes)
2428       Alignment = 0;
2429     // Alignment must be a power of two; make sure of that.
2430     Alignment = (Alignment & -Alignment);
2431     if (Alignment == 1)
2432       Alignment = 0;
2433   }
2434   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2435 
2436   unsigned OpcodeIndex;
2437   switch (VT.getSimpleVT().SimpleTy) {
2438   default: llvm_unreachable("unhandled vld/vst lane type");
2439     // Double-register operations:
2440   case MVT::v8i8:  OpcodeIndex = 0; break;
2441   case MVT::v4f16:
2442   case MVT::v4bf16:
2443   case MVT::v4i16: OpcodeIndex = 1; break;
2444   case MVT::v2f32:
2445   case MVT::v2i32: OpcodeIndex = 2; break;
2446     // Quad-register operations:
2447   case MVT::v8f16:
2448   case MVT::v8bf16:
2449   case MVT::v8i16: OpcodeIndex = 0; break;
2450   case MVT::v4f32:
2451   case MVT::v4i32: OpcodeIndex = 1; break;
2452   }
2453 
2454   std::vector<EVT> ResTys;
2455   if (IsLoad) {
2456     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2457     if (!is64BitVector)
2458       ResTyElts *= 2;
2459     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2460                                       MVT::i64, ResTyElts));
2461   }
2462   if (isUpdating)
2463     ResTys.push_back(MVT::i32);
2464   ResTys.push_back(MVT::Other);
2465 
2466   SDValue Pred = getAL(CurDAG, dl);
2467   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2468 
2469   SmallVector<SDValue, 8> Ops;
2470   Ops.push_back(MemAddr);
2471   Ops.push_back(Align);
2472   if (isUpdating) {
2473     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2474     bool IsImmUpdate =
2475         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2476     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2477   }
2478 
2479   SDValue SuperReg;
2480   SDValue V0 = N->getOperand(Vec0Idx + 0);
2481   SDValue V1 = N->getOperand(Vec0Idx + 1);
2482   if (NumVecs == 2) {
2483     if (is64BitVector)
2484       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2485     else
2486       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2487   } else {
2488     SDValue V2 = N->getOperand(Vec0Idx + 2);
2489     SDValue V3 = (NumVecs == 3)
2490       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2491       : N->getOperand(Vec0Idx + 3);
2492     if (is64BitVector)
2493       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2494     else
2495       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2496   }
2497   Ops.push_back(SuperReg);
2498   Ops.push_back(getI32Imm(Lane, dl));
2499   Ops.push_back(Pred);
2500   Ops.push_back(Reg0);
2501   Ops.push_back(Chain);
2502 
2503   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2504                                   QOpcodes[OpcodeIndex]);
2505   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2506   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2507   if (!IsLoad) {
2508     ReplaceNode(N, VLdLn);
2509     return;
2510   }
2511 
2512   // Extract the subregisters.
2513   SuperReg = SDValue(VLdLn, 0);
2514   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2515                     ARM::qsub_3 == ARM::qsub_0 + 3,
2516                 "Unexpected subreg numbering");
2517   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2518   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2519     ReplaceUses(SDValue(N, Vec),
2520                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2521   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2522   if (isUpdating)
2523     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2524   CurDAG->RemoveDeadNode(N);
2525 }
2526 
2527 template <typename SDValueVector>
2528 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2529                                            SDValue PredicateMask) {
2530   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2531   Ops.push_back(PredicateMask);
2532 }
2533 
2534 template <typename SDValueVector>
2535 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2536                                            SDValue PredicateMask,
2537                                            SDValue Inactive) {
2538   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2539   Ops.push_back(PredicateMask);
2540   Ops.push_back(Inactive);
2541 }
2542 
2543 template <typename SDValueVector>
2544 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2545   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2546   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2547 }
2548 
2549 template <typename SDValueVector>
2550 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2551                                                 EVT InactiveTy) {
2552   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2553   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2554   Ops.push_back(SDValue(
2555       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2556 }
2557 
2558 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2559                                    bool Predicated) {
2560   SDLoc Loc(N);
2561   SmallVector<SDValue, 8> Ops;
2562 
2563   uint16_t Opcode;
2564   switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2565   case 32:
2566     Opcode = Opcodes[0];
2567     break;
2568   case 64:
2569     Opcode = Opcodes[1];
2570     break;
2571   default:
2572     llvm_unreachable("bad vector element size in SelectMVE_WB");
2573   }
2574 
2575   Ops.push_back(N->getOperand(2)); // vector of base addresses
2576 
2577   int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2578   Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2579 
2580   if (Predicated)
2581     AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2582   else
2583     AddEmptyMVEPredicateToOps(Ops, Loc);
2584 
2585   Ops.push_back(N->getOperand(0)); // chain
2586 
2587   SmallVector<EVT, 8> VTs;
2588   VTs.push_back(N->getValueType(1));
2589   VTs.push_back(N->getValueType(0));
2590   VTs.push_back(N->getValueType(2));
2591 
2592   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2593   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2594   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2595   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2596   transferMemOperands(N, New);
2597   CurDAG->RemoveDeadNode(N);
2598 }
2599 
2600 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2601                                           bool Immediate,
2602                                           bool HasSaturationOperand) {
2603   SDLoc Loc(N);
2604   SmallVector<SDValue, 8> Ops;
2605 
2606   // Two 32-bit halves of the value to be shifted
2607   Ops.push_back(N->getOperand(1));
2608   Ops.push_back(N->getOperand(2));
2609 
2610   // The shift count
2611   if (Immediate) {
2612     int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2613     Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2614   } else {
2615     Ops.push_back(N->getOperand(3));
2616   }
2617 
2618   // The immediate saturation operand, if any
2619   if (HasSaturationOperand) {
2620     int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
2621     int SatBit = (SatOp == 64 ? 0 : 1);
2622     Ops.push_back(getI32Imm(SatBit, Loc));
2623   }
2624 
2625   // MVE scalar shifts are IT-predicable, so include the standard
2626   // predicate arguments.
2627   Ops.push_back(getAL(CurDAG, Loc));
2628   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2629 
2630   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2631 }
2632 
2633 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2634                                         uint16_t OpcodeWithNoCarry,
2635                                         bool Add, bool Predicated) {
2636   SDLoc Loc(N);
2637   SmallVector<SDValue, 8> Ops;
2638   uint16_t Opcode;
2639 
2640   unsigned FirstInputOp = Predicated ? 2 : 1;
2641 
2642   // Two input vectors and the input carry flag
2643   Ops.push_back(N->getOperand(FirstInputOp));
2644   Ops.push_back(N->getOperand(FirstInputOp + 1));
2645   SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2646   ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2647   uint32_t CarryMask = 1 << 29;
2648   uint32_t CarryExpected = Add ? 0 : CarryMask;
2649   if (CarryInConstant &&
2650       (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2651     Opcode = OpcodeWithNoCarry;
2652   } else {
2653     Ops.push_back(CarryIn);
2654     Opcode = OpcodeWithCarry;
2655   }
2656 
2657   if (Predicated)
2658     AddMVEPredicateToOps(Ops, Loc,
2659                          N->getOperand(FirstInputOp + 3),  // predicate
2660                          N->getOperand(FirstInputOp - 1)); // inactive
2661   else
2662     AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2663 
2664   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2665 }
2666 
2667 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2668   SDLoc Loc(N);
2669   SmallVector<SDValue, 8> Ops;
2670 
2671   // One vector input, followed by a 32-bit word of bits to shift in
2672   // and then an immediate shift count
2673   Ops.push_back(N->getOperand(1));
2674   Ops.push_back(N->getOperand(2));
2675   int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
2676   Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2677 
2678   if (Predicated)
2679     AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2680   else
2681     AddEmptyMVEPredicateToOps(Ops, Loc);
2682 
2683   CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
2684 }
2685 
2686 static bool SDValueToConstBool(SDValue SDVal) {
2687   assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2688   ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2689   uint64_t Value = SDValConstant->getZExtValue();
2690   assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2691   return Value;
2692 }
2693 
2694 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2695                                             const uint16_t *OpcodesS,
2696                                             const uint16_t *OpcodesU,
2697                                             size_t Stride, size_t TySize) {
2698   assert(TySize < Stride && "Invalid TySize");
2699   bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2700   bool IsSub = SDValueToConstBool(N->getOperand(2));
2701   bool IsExchange = SDValueToConstBool(N->getOperand(3));
2702   if (IsUnsigned) {
2703     assert(!IsSub &&
2704            "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2705     assert(!IsExchange &&
2706            "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2707   }
2708 
2709   auto OpIsZero = [N](size_t OpNo) {
2710     if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
2711       if (OpConst->getZExtValue() == 0)
2712         return true;
2713     return false;
2714   };
2715 
2716   // If the input accumulator value is not zero, select an instruction with
2717   // accumulator, otherwise select an instruction without accumulator
2718   bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2719 
2720   const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2721   if (IsSub)
2722     Opcodes += 4 * Stride;
2723   if (IsExchange)
2724     Opcodes += 2 * Stride;
2725   if (IsAccum)
2726     Opcodes += Stride;
2727   uint16_t Opcode = Opcodes[TySize];
2728 
2729   SDLoc Loc(N);
2730   SmallVector<SDValue, 8> Ops;
2731   // Push the accumulator operands, if they are used
2732   if (IsAccum) {
2733     Ops.push_back(N->getOperand(4));
2734     Ops.push_back(N->getOperand(5));
2735   }
2736   // Push the two vector operands
2737   Ops.push_back(N->getOperand(6));
2738   Ops.push_back(N->getOperand(7));
2739 
2740   if (Predicated)
2741     AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2742   else
2743     AddEmptyMVEPredicateToOps(Ops, Loc);
2744 
2745   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2746 }
2747 
2748 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2749                                         const uint16_t *OpcodesS,
2750                                         const uint16_t *OpcodesU) {
2751   EVT VecTy = N->getOperand(6).getValueType();
2752   size_t SizeIndex;
2753   switch (VecTy.getVectorElementType().getSizeInBits()) {
2754   case 16:
2755     SizeIndex = 0;
2756     break;
2757   case 32:
2758     SizeIndex = 1;
2759     break;
2760   default:
2761     llvm_unreachable("bad vector element size");
2762   }
2763 
2764   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2765 }
2766 
2767 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2768                                           const uint16_t *OpcodesS,
2769                                           const uint16_t *OpcodesU) {
2770   assert(
2771       N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2772           32 &&
2773       "bad vector element size");
2774   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2775 }
2776 
2777 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2778                                     const uint16_t *const *Opcodes,
2779                                     bool HasWriteback) {
2780   EVT VT = N->getValueType(0);
2781   SDLoc Loc(N);
2782 
2783   const uint16_t *OurOpcodes;
2784   switch (VT.getVectorElementType().getSizeInBits()) {
2785   case 8:
2786     OurOpcodes = Opcodes[0];
2787     break;
2788   case 16:
2789     OurOpcodes = Opcodes[1];
2790     break;
2791   case 32:
2792     OurOpcodes = Opcodes[2];
2793     break;
2794   default:
2795     llvm_unreachable("bad vector element size in SelectMVE_VLD");
2796   }
2797 
2798   EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2799   SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2800   unsigned PtrOperand = HasWriteback ? 1 : 2;
2801 
2802   auto Data = SDValue(
2803       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2804   SDValue Chain = N->getOperand(0);
2805   // Add a MVE_VLDn instruction for each Vec, except the last
2806   for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2807     SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2808     auto LoadInst =
2809         CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2810     Data = SDValue(LoadInst, 0);
2811     Chain = SDValue(LoadInst, 1);
2812     transferMemOperands(N, LoadInst);
2813   }
2814   // The last may need a writeback on it
2815   if (HasWriteback)
2816     ResultTys = {DataTy, MVT::i32, MVT::Other};
2817   SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2818   auto LoadInst =
2819       CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2820   transferMemOperands(N, LoadInst);
2821 
2822   unsigned i;
2823   for (i = 0; i < NumVecs; i++)
2824     ReplaceUses(SDValue(N, i),
2825                 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2826                                                SDValue(LoadInst, 0)));
2827   if (HasWriteback)
2828     ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2829   ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2830   CurDAG->RemoveDeadNode(N);
2831 }
2832 
2833 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2834                                       bool Wrapping, bool Predicated) {
2835   EVT VT = N->getValueType(0);
2836   SDLoc Loc(N);
2837 
2838   uint16_t Opcode;
2839   switch (VT.getScalarSizeInBits()) {
2840   case 8:
2841     Opcode = Opcodes[0];
2842     break;
2843   case 16:
2844     Opcode = Opcodes[1];
2845     break;
2846   case 32:
2847     Opcode = Opcodes[2];
2848     break;
2849   default:
2850     llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2851   }
2852 
2853   SmallVector<SDValue, 8> Ops;
2854   unsigned OpIdx = 1;
2855 
2856   SDValue Inactive;
2857   if (Predicated)
2858     Inactive = N->getOperand(OpIdx++);
2859 
2860   Ops.push_back(N->getOperand(OpIdx++));     // base
2861   if (Wrapping)
2862     Ops.push_back(N->getOperand(OpIdx++));   // limit
2863 
2864   SDValue ImmOp = N->getOperand(OpIdx++);    // step
2865   int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
2866   Ops.push_back(getI32Imm(ImmValue, Loc));
2867 
2868   if (Predicated)
2869     AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2870   else
2871     AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2872 
2873   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
2874 }
2875 
2876 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2877                                      size_t NumExtraOps, bool HasAccum) {
2878   bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2879   SDLoc Loc(N);
2880   SmallVector<SDValue, 8> Ops;
2881 
2882   unsigned OpIdx = 1;
2883 
2884   // Convert and append the immediate operand designating the coprocessor.
2885   SDValue ImmCorpoc = N->getOperand(OpIdx++);
2886   uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
2887   Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2888 
2889   // For accumulating variants copy the low and high order parts of the
2890   // accumulator into a register pair and add it to the operand vector.
2891   if (HasAccum) {
2892     SDValue AccLo = N->getOperand(OpIdx++);
2893     SDValue AccHi = N->getOperand(OpIdx++);
2894     if (IsBigEndian)
2895       std::swap(AccLo, AccHi);
2896     Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2897   }
2898 
2899   // Copy extra operands as-is.
2900   for (size_t I = 0; I < NumExtraOps; I++)
2901     Ops.push_back(N->getOperand(OpIdx++));
2902 
2903   // Convert and append the immediate operand
2904   SDValue Imm = N->getOperand(OpIdx);
2905   uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
2906   Ops.push_back(getI32Imm(ImmVal, Loc));
2907 
2908   // Accumulating variants are IT-predicable, add predicate operands.
2909   if (HasAccum) {
2910     SDValue Pred = getAL(CurDAG, Loc);
2911     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2912     Ops.push_back(Pred);
2913     Ops.push_back(PredReg);
2914   }
2915 
2916   // Create the CDE intruction
2917   SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2918   SDValue ResultPair = SDValue(InstrNode, 0);
2919 
2920   // The original intrinsic had two outputs, and the output of the dual-register
2921   // CDE instruction is a register pair. We need to extract the two subregisters
2922   // and replace all uses of the original outputs with the extracted
2923   // subregisters.
2924   uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2925   if (IsBigEndian)
2926     std::swap(SubRegs[0], SubRegs[1]);
2927 
2928   for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2929     if (SDValue(N, ResIdx).use_empty())
2930       continue;
2931     SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2932                                                     MVT::i32, ResultPair);
2933     ReplaceUses(SDValue(N, ResIdx), SubReg);
2934   }
2935 
2936   CurDAG->RemoveDeadNode(N);
2937 }
2938 
2939 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2940                                    bool isUpdating, unsigned NumVecs,
2941                                    const uint16_t *DOpcodes,
2942                                    const uint16_t *QOpcodes0,
2943                                    const uint16_t *QOpcodes1) {
2944   assert(Subtarget->hasNEON());
2945   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2946   SDLoc dl(N);
2947 
2948   SDValue MemAddr, Align;
2949   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2950   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2951     return;
2952 
2953   SDValue Chain = N->getOperand(0);
2954   EVT VT = N->getValueType(0);
2955   bool is64BitVector = VT.is64BitVector();
2956 
2957   unsigned Alignment = 0;
2958   if (NumVecs != 3) {
2959     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2960     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2961     if (Alignment > NumBytes)
2962       Alignment = NumBytes;
2963     if (Alignment < 8 && Alignment < NumBytes)
2964       Alignment = 0;
2965     // Alignment must be a power of two; make sure of that.
2966     Alignment = (Alignment & -Alignment);
2967     if (Alignment == 1)
2968       Alignment = 0;
2969   }
2970   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2971 
2972   unsigned OpcodeIndex;
2973   switch (VT.getSimpleVT().SimpleTy) {
2974   default: llvm_unreachable("unhandled vld-dup type");
2975   case MVT::v8i8:
2976   case MVT::v16i8: OpcodeIndex = 0; break;
2977   case MVT::v4i16:
2978   case MVT::v8i16:
2979   case MVT::v4f16:
2980   case MVT::v8f16:
2981   case MVT::v4bf16:
2982   case MVT::v8bf16:
2983                   OpcodeIndex = 1; break;
2984   case MVT::v2f32:
2985   case MVT::v2i32:
2986   case MVT::v4f32:
2987   case MVT::v4i32: OpcodeIndex = 2; break;
2988   case MVT::v1f64:
2989   case MVT::v1i64: OpcodeIndex = 3; break;
2990   }
2991 
2992   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2993   if (!is64BitVector)
2994     ResTyElts *= 2;
2995   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2996 
2997   std::vector<EVT> ResTys;
2998   ResTys.push_back(ResTy);
2999   if (isUpdating)
3000     ResTys.push_back(MVT::i32);
3001   ResTys.push_back(MVT::Other);
3002 
3003   SDValue Pred = getAL(CurDAG, dl);
3004   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3005 
3006   SmallVector<SDValue, 6> Ops;
3007   Ops.push_back(MemAddr);
3008   Ops.push_back(Align);
3009   unsigned Opc = is64BitVector    ? DOpcodes[OpcodeIndex]
3010                  : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3011                                   : QOpcodes1[OpcodeIndex];
3012   if (isUpdating) {
3013     SDValue Inc = N->getOperand(2);
3014     bool IsImmUpdate =
3015         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3016     if (IsImmUpdate) {
3017       if (!isVLDfixed(Opc))
3018         Ops.push_back(Reg0);
3019     } else {
3020       if (isVLDfixed(Opc))
3021         Opc = getVLDSTRegisterUpdateOpcode(Opc);
3022       Ops.push_back(Inc);
3023     }
3024   }
3025   if (is64BitVector || NumVecs == 1) {
3026     // Double registers and VLD1 quad registers are directly supported.
3027   } else if (NumVecs == 2) {
3028     const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
3029     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3030                                           MVT::Other, OpsA);
3031     Chain = SDValue(VLdA, 1);
3032   } else {
3033     SDValue ImplDef = SDValue(
3034         CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3035     const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3036     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3037                                           MVT::Other, OpsA);
3038     Ops.push_back(SDValue(VLdA, 0));
3039     Chain = SDValue(VLdA, 1);
3040   }
3041 
3042   Ops.push_back(Pred);
3043   Ops.push_back(Reg0);
3044   Ops.push_back(Chain);
3045 
3046   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3047 
3048   // Transfer memoperands.
3049   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3050   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3051 
3052   // Extract the subregisters.
3053   if (NumVecs == 1) {
3054     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3055   } else {
3056     SDValue SuperReg = SDValue(VLdDup, 0);
3057     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3058     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3059     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3060       ReplaceUses(SDValue(N, Vec),
3061                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3062     }
3063   }
3064   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3065   if (isUpdating)
3066     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3067   CurDAG->RemoveDeadNode(N);
3068 }
3069 
3070 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3071   if (!Subtarget->hasMVEIntegerOps())
3072     return false;
3073 
3074   SDLoc dl(N);
3075 
3076   // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077   // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078   // inserts of the correct type:
3079   SDValue Ins1 = SDValue(N, 0);
3080   SDValue Ins2 = N->getOperand(0);
3081   EVT VT = Ins1.getValueType();
3082   if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3083       !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3084       !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3085       (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3086     return false;
3087 
3088   unsigned Lane1 = Ins1.getConstantOperandVal(2);
3089   unsigned Lane2 = Ins2.getConstantOperandVal(2);
3090   if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3091     return false;
3092 
3093   // If the inserted values will be able to use T/B already, leave it to the
3094   // existing tablegen patterns. For example VCVTT/VCVTB.
3095   SDValue Val1 = Ins1.getOperand(1);
3096   SDValue Val2 = Ins2.getOperand(1);
3097   if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3098     return false;
3099 
3100   // Check if the inserted values are both extracts.
3101   if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3102        Val1.getOpcode() == ARMISD::VGETLANEu) &&
3103       (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3104        Val2.getOpcode() == ARMISD::VGETLANEu) &&
3105       isa<ConstantSDNode>(Val1.getOperand(1)) &&
3106       isa<ConstantSDNode>(Val2.getOperand(1)) &&
3107       (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3108        Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3109       (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3110        Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3111     unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3112     unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3113 
3114     // If the two extracted lanes are from the same place and adjacent, this
3115     // simplifies into a f32 lane move.
3116     if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3117         ExtractLane1 == ExtractLane2 + 1) {
3118       SDValue NewExt = CurDAG->getTargetExtractSubreg(
3119           ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3120       SDValue NewIns = CurDAG->getTargetInsertSubreg(
3121           ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3122           NewExt);
3123       ReplaceUses(Ins1, NewIns);
3124       return true;
3125     }
3126 
3127     // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128     // extracting odd lanes.
3129     if (VT == MVT::v8i16) {
3130       SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3131           ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3132       SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3133           ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3134       if (ExtractLane1 % 2 != 0)
3135         Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3136       if (ExtractLane2 % 2 != 0)
3137         Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3138       SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3139       SDValue NewIns =
3140           CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3141                                         Ins2.getOperand(0), SDValue(VINS, 0));
3142       ReplaceUses(Ins1, NewIns);
3143       return true;
3144     }
3145   }
3146 
3147   // The inserted values are not extracted - if they are f16 then insert them
3148   // directly using a VINS.
3149   if (VT == MVT::v8f16) {
3150     SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3151     SDValue NewIns =
3152         CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3153                                       Ins2.getOperand(0), SDValue(VINS, 0));
3154     ReplaceUses(Ins1, NewIns);
3155     return true;
3156   }
3157 
3158   return false;
3159 }
3160 
3161 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3162                                                             SDNode *FMul,
3163                                                             bool IsUnsigned,
3164                                                             bool FixedToFloat) {
3165   auto Type = N->getValueType(0);
3166   unsigned ScalarBits = Type.getScalarSizeInBits();
3167   if (ScalarBits > 32)
3168     return false;
3169 
3170   SDNodeFlags FMulFlags = FMul->getFlags();
3171   // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172   // allowed in 16 bit unsigned floats
3173   if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3174     return false;
3175 
3176   SDValue ImmNode = FMul->getOperand(1);
3177   SDValue VecVal = FMul->getOperand(0);
3178   if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3179       VecVal->getOpcode() == ISD::SINT_TO_FP)
3180     VecVal = VecVal->getOperand(0);
3181 
3182   if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3183     return false;
3184 
3185   if (ImmNode.getOpcode() == ISD::BITCAST) {
3186     if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3187       return false;
3188     ImmNode = ImmNode.getOperand(0);
3189   }
3190 
3191   if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192     return false;
3193 
3194   APFloat ImmAPF(0.0f);
3195   switch (ImmNode.getOpcode()) {
3196   case ARMISD::VMOVIMM:
3197   case ARMISD::VDUP: {
3198     if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3199       return false;
3200     unsigned Imm = ImmNode.getConstantOperandVal(0);
3201     if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3202       Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3203     ImmAPF =
3204         APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205                 APInt(ScalarBits, Imm));
3206     break;
3207   }
3208   case ARMISD::VMOVFPIMM: {
3209     ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3210     break;
3211   }
3212   default:
3213     return false;
3214   }
3215 
3216   // Where n is the number of fractional bits, multiplying by 2^n will convert
3217   // from float to fixed and multiplying by 2^-n will convert from fixed to
3218   // float. Taking log2 of the factor (after taking the inverse in the case of
3219   // float to fixed) will give n.
3220   APFloat ToConvert = ImmAPF;
3221   if (FixedToFloat) {
3222     if (!ImmAPF.getExactInverse(&ToConvert))
3223       return false;
3224   }
3225   APSInt Converted(64, 0);
3226   bool IsExact;
3227   ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
3228                              &IsExact);
3229   if (!IsExact || !Converted.isPowerOf2())
3230     return false;
3231 
3232   unsigned FracBits = Converted.logBase2();
3233   if (FracBits > ScalarBits)
3234     return false;
3235 
3236   SmallVector<SDValue, 3> Ops{
3237       VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3238   AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3239 
3240   unsigned int Opcode;
3241   switch (ScalarBits) {
3242   case 16:
3243     if (FixedToFloat)
3244       Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3245     else
3246       Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3247     break;
3248   case 32:
3249     if (FixedToFloat)
3250       Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3251     else
3252       Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3253     break;
3254   default:
3255     llvm_unreachable("unexpected number of scalar bits");
3256     break;
3257   }
3258 
3259   ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3260   return true;
3261 }
3262 
3263 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3264   // Transform a floating-point to fixed-point conversion to a VCVT
3265   if (!Subtarget->hasMVEFloatOps())
3266     return false;
3267   EVT Type = N->getValueType(0);
3268   if (!Type.isVector())
3269     return false;
3270   unsigned int ScalarBits = Type.getScalarSizeInBits();
3271 
3272   bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT;
3273   SDNode *Node = N->getOperand(0).getNode();
3274 
3275   // floating-point to fixed-point with one fractional bit gets turned into an
3276   // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3277   if (Node->getOpcode() == ISD::FADD) {
3278     if (Node->getOperand(0) != Node->getOperand(1))
3279       return false;
3280     SDNodeFlags Flags = Node->getFlags();
3281     // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3282     // allowed in 16 bit unsigned floats
3283     if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3284       return false;
3285 
3286     unsigned Opcode;
3287     switch (ScalarBits) {
3288     case 16:
3289       Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3290       break;
3291     case 32:
3292       Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3293       break;
3294     }
3295     SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3296                                 CurDAG->getConstant(1, dl, MVT::i32)};
3297     AddEmptyMVEPredicateToOps(Ops, dl, Type);
3298 
3299     ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3300     return true;
3301   }
3302 
3303   if (Node->getOpcode() != ISD::FMUL)
3304     return false;
3305 
3306   return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3307 }
3308 
3309 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3310   // Transform a fixed-point to floating-point conversion to a VCVT
3311   if (!Subtarget->hasMVEFloatOps())
3312     return false;
3313   auto Type = N->getValueType(0);
3314   if (!Type.isVector())
3315     return false;
3316 
3317   auto LHS = N->getOperand(0);
3318   if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3319     return false;
3320 
3321   return transformFixedFloatingPointConversion(
3322       N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3323 }
3324 
3325 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3326   if (!Subtarget->hasV6T2Ops())
3327     return false;
3328 
3329   unsigned Opc = isSigned
3330     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3331     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3332   SDLoc dl(N);
3333 
3334   // For unsigned extracts, check for a shift right and mask
3335   unsigned And_imm = 0;
3336   if (N->getOpcode() == ISD::AND) {
3337     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3338 
3339       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3340       if (And_imm & (And_imm + 1))
3341         return false;
3342 
3343       unsigned Srl_imm = 0;
3344       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3345                                 Srl_imm)) {
3346         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3347 
3348         // Mask off the unnecessary bits of the AND immediate; normally
3349         // DAGCombine will do this, but that might not happen if
3350         // targetShrinkDemandedConstant chooses a different immediate.
3351         And_imm &= -1U >> Srl_imm;
3352 
3353         // Note: The width operand is encoded as width-1.
3354         unsigned Width = countTrailingOnes(And_imm) - 1;
3355         unsigned LSB = Srl_imm;
3356 
3357         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3358 
3359         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3360           // It's cheaper to use a right shift to extract the top bits.
3361           if (Subtarget->isThumb()) {
3362             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3363             SDValue Ops[] = { N->getOperand(0).getOperand(0),
3364                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3365                               getAL(CurDAG, dl), Reg0, Reg0 };
3366             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3367             return true;
3368           }
3369 
3370           // ARM models shift instructions as MOVsi with shifter operand.
3371           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
3372           SDValue ShOpc =
3373             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3374                                       MVT::i32);
3375           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3376                             getAL(CurDAG, dl), Reg0, Reg0 };
3377           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3378           return true;
3379         }
3380 
3381         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3382         SDValue Ops[] = { N->getOperand(0).getOperand(0),
3383                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3384                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
3385                           getAL(CurDAG, dl), Reg0 };
3386         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3387         return true;
3388       }
3389     }
3390     return false;
3391   }
3392 
3393   // Otherwise, we're looking for a shift of a shift
3394   unsigned Shl_imm = 0;
3395   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3396     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3397     unsigned Srl_imm = 0;
3398     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3399       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3400       // Note: The width operand is encoded as width-1.
3401       unsigned Width = 32 - Srl_imm - 1;
3402       int LSB = Srl_imm - Shl_imm;
3403       if (LSB < 0)
3404         return false;
3405       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3406       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3407       SDValue Ops[] = { N->getOperand(0).getOperand(0),
3408                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3409                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
3410                         getAL(CurDAG, dl), Reg0 };
3411       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3412       return true;
3413     }
3414   }
3415 
3416   // Or we are looking for a shift of an and, with a mask operand
3417   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3418       isShiftedMask_32(And_imm)) {
3419     unsigned Srl_imm = 0;
3420     unsigned LSB = countTrailingZeros(And_imm);
3421     // Shift must be the same as the ands lsb
3422     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3423       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3424       unsigned MSB = 31 - countLeadingZeros(And_imm);
3425       // Note: The width operand is encoded as width-1.
3426       unsigned Width = MSB - LSB;
3427       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3428       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3429       SDValue Ops[] = { N->getOperand(0).getOperand(0),
3430                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3431                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
3432                         getAL(CurDAG, dl), Reg0 };
3433       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3434       return true;
3435     }
3436   }
3437 
3438   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3439     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3440     unsigned LSB = 0;
3441     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3442         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3443       return false;
3444 
3445     if (LSB + Width > 32)
3446       return false;
3447 
3448     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3449     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3450     SDValue Ops[] = { N->getOperand(0).getOperand(0),
3451                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3452                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3453                       getAL(CurDAG, dl), Reg0 };
3454     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3455     return true;
3456   }
3457 
3458   return false;
3459 }
3460 
3461 /// Target-specific DAG combining for ISD::XOR.
3462 /// Target-independent combining lowers SELECT_CC nodes of the form
3463 /// select_cc setg[ge] X,  0,  X, -X
3464 /// select_cc setgt    X, -1,  X, -X
3465 /// select_cc setl[te] X,  0, -X,  X
3466 /// select_cc setlt    X,  1, -X,  X
3467 /// which represent Integer ABS into:
3468 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
3469 /// ARM instruction selection detects the latter and matches it to
3470 /// ARM::ABS or ARM::t2ABS machine node.
3471 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3472   SDValue XORSrc0 = N->getOperand(0);
3473   SDValue XORSrc1 = N->getOperand(1);
3474   EVT VT = N->getValueType(0);
3475 
3476   if (Subtarget->isThumb1Only())
3477     return false;
3478 
3479   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
3480     return false;
3481 
3482   SDValue ADDSrc0 = XORSrc0.getOperand(0);
3483   SDValue ADDSrc1 = XORSrc0.getOperand(1);
3484   SDValue SRASrc0 = XORSrc1.getOperand(0);
3485   SDValue SRASrc1 = XORSrc1.getOperand(1);
3486   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
3487   EVT XType = SRASrc0.getValueType();
3488   unsigned Size = XType.getSizeInBits() - 1;
3489 
3490   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
3491       XType.isInteger() && SRAConstant != nullptr &&
3492       Size == SRAConstant->getZExtValue()) {
3493     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
3495     return true;
3496   }
3497 
3498   return false;
3499 }
3500 
3501 /// We've got special pseudo-instructions for these
3502 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3503   unsigned Opcode;
3504   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3505   if (MemTy == MVT::i8)
3506     Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3507   else if (MemTy == MVT::i16)
3508     Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3509   else if (MemTy == MVT::i32)
3510     Opcode = ARM::CMP_SWAP_32;
3511   else
3512     llvm_unreachable("Unknown AtomicCmpSwap type");
3513 
3514   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3515                    N->getOperand(0)};
3516   SDNode *CmpSwap = CurDAG->getMachineNode(
3517       Opcode, SDLoc(N),
3518       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3519 
3520   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3521   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3522 
3523   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3524   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3525   CurDAG->RemoveDeadNode(N);
3526 }
3527 
3528 static Optional<std::pair<unsigned, unsigned>>
3529 getContiguousRangeOfSetBits(const APInt &A) {
3530   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
3531   unsigned LastOne = A.countTrailingZeros();
3532   if (A.countPopulation() != (FirstOne - LastOne + 1))
3533     return Optional<std::pair<unsigned,unsigned>>();
3534   return std::make_pair(FirstOne, LastOne);
3535 }
3536 
3537 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3538   assert(N->getOpcode() == ARMISD::CMPZ);
3539   SwitchEQNEToPLMI = false;
3540 
3541   if (!Subtarget->isThumb())
3542     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543     // LSR don't exist as standalone instructions - they need the barrel shifter.
3544     return;
3545 
3546   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547   SDValue And = N->getOperand(0);
3548   if (!And->hasOneUse())
3549     return;
3550 
3551   SDValue Zero = N->getOperand(1);
3552   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
3553       And->getOpcode() != ISD::AND)
3554     return;
3555   SDValue X = And.getOperand(0);
3556   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3557 
3558   if (!C)
3559     return;
3560   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3561   if (!Range)
3562     return;
3563 
3564   // There are several ways to lower this:
3565   SDNode *NewN;
3566   SDLoc dl(N);
3567 
3568   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3569     if (Subtarget->isThumb2()) {
3570       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3571       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3572                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3573                         CurDAG->getRegister(0, MVT::i32) };
3574       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3575     } else {
3576       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3577                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3578                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3579       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3580     }
3581   };
3582 
3583   if (Range->second == 0) {
3584     //  1. Mask includes the LSB -> Simply shift the top N bits off
3585     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3586     ReplaceNode(And.getNode(), NewN);
3587   } else if (Range->first == 31) {
3588     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
3589     NewN = EmitShift(ARM::tLSRri, X, Range->second);
3590     ReplaceNode(And.getNode(), NewN);
3591   } else if (Range->first == Range->second) {
3592     //  3. Only one bit is set. We can shift this into the sign bit and use a
3593     //     PL/MI comparison.
3594     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3595     ReplaceNode(And.getNode(), NewN);
3596 
3597     SwitchEQNEToPLMI = true;
3598   } else if (!Subtarget->hasV6T2Ops()) {
3599     //  4. Do a double shift to clear bottom and top bits, but only in
3600     //     thumb-1 mode as in thumb-2 we can use UBFX.
3601     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3602     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3603                      Range->second + (31 - Range->first));
3604     ReplaceNode(And.getNode(), NewN);
3605   }
3606 
3607 }
3608 
3609 void ARMDAGToDAGISel::Select(SDNode *N) {
3610   SDLoc dl(N);
3611 
3612   if (N->isMachineOpcode()) {
3613     N->setNodeId(-1);
3614     return;   // Already selected.
3615   }
3616 
3617   switch (N->getOpcode()) {
3618   default: break;
3619   case ISD::STORE: {
3620     // For Thumb1, match an sp-relative store in C++. This is a little
3621     // unfortunate, but I don't think I can make the chain check work
3622     // otherwise.  (The chain of the store has to be the same as the chain
3623     // of the CopyFromReg, or else we can't replace the CopyFromReg with
3624     // a direct reference to "SP".)
3625     //
3626     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3627     // a different addressing mode from other four-byte stores.
3628     //
3629     // This pattern usually comes up with call arguments.
3630     StoreSDNode *ST = cast<StoreSDNode>(N);
3631     SDValue Ptr = ST->getBasePtr();
3632     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3633       int RHSC = 0;
3634       if (Ptr.getOpcode() == ISD::ADD &&
3635           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3636         Ptr = Ptr.getOperand(0);
3637 
3638       if (Ptr.getOpcode() == ISD::CopyFromReg &&
3639           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3640           Ptr.getOperand(0) == ST->getChain()) {
3641         SDValue Ops[] = {ST->getValue(),
3642                          CurDAG->getRegister(ARM::SP, MVT::i32),
3643                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3644                          getAL(CurDAG, dl),
3645                          CurDAG->getRegister(0, MVT::i32),
3646                          ST->getChain()};
3647         MachineSDNode *ResNode =
3648             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3649         MachineMemOperand *MemOp = ST->getMemOperand();
3650         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3651         ReplaceNode(N, ResNode);
3652         return;
3653       }
3654     }
3655     break;
3656   }
3657   case ISD::WRITE_REGISTER:
3658     if (tryWriteRegister(N))
3659       return;
3660     break;
3661   case ISD::READ_REGISTER:
3662     if (tryReadRegister(N))
3663       return;
3664     break;
3665   case ISD::INLINEASM:
3666   case ISD::INLINEASM_BR:
3667     if (tryInlineAsm(N))
3668       return;
3669     break;
3670   case ISD::XOR:
3671     // Select special operations if XOR node forms integer ABS pattern
3672     if (tryABSOp(N))
3673       return;
3674     // Other cases are autogenerated.
3675     break;
3676   case ISD::Constant: {
3677     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
3678     // If we can't materialize the constant we need to use a literal pool
3679     if (ConstantMaterializationCost(Val, Subtarget) > 2) {
3680       SDValue CPIdx = CurDAG->getTargetConstantPool(
3681           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3682           TLI->getPointerTy(CurDAG->getDataLayout()));
3683 
3684       SDNode *ResNode;
3685       if (Subtarget->isThumb()) {
3686         SDValue Ops[] = {
3687           CPIdx,
3688           getAL(CurDAG, dl),
3689           CurDAG->getRegister(0, MVT::i32),
3690           CurDAG->getEntryNode()
3691         };
3692         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3693                                          Ops);
3694       } else {
3695         SDValue Ops[] = {
3696           CPIdx,
3697           CurDAG->getTargetConstant(0, dl, MVT::i32),
3698           getAL(CurDAG, dl),
3699           CurDAG->getRegister(0, MVT::i32),
3700           CurDAG->getEntryNode()
3701         };
3702         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3703                                          Ops);
3704       }
3705       // Annotate the Node with memory operand information so that MachineInstr
3706       // queries work properly. This e.g. gives the register allocation the
3707       // required information for rematerialization.
3708       MachineFunction& MF = CurDAG->getMachineFunction();
3709       MachineMemOperand *MemOp =
3710           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3711                                   MachineMemOperand::MOLoad, 4, Align(4));
3712 
3713       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3714 
3715       ReplaceNode(N, ResNode);
3716       return;
3717     }
3718 
3719     // Other cases are autogenerated.
3720     break;
3721   }
3722   case ISD::FrameIndex: {
3723     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3724     int FI = cast<FrameIndexSDNode>(N)->getIndex();
3725     SDValue TFI = CurDAG->getTargetFrameIndex(
3726         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3727     if (Subtarget->isThumb1Only()) {
3728       // Set the alignment of the frame object to 4, to avoid having to generate
3729       // more than one ADD
3730       MachineFrameInfo &MFI = MF->getFrameInfo();
3731       if (MFI.getObjectAlign(FI) < Align(4))
3732         MFI.setObjectAlignment(FI, Align(4));
3733       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3734                            CurDAG->getTargetConstant(0, dl, MVT::i32));
3735       return;
3736     } else {
3737       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3738                       ARM::t2ADDri : ARM::ADDri);
3739       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3740                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3741                         CurDAG->getRegister(0, MVT::i32) };
3742       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3743       return;
3744     }
3745   }
3746   case ISD::INSERT_VECTOR_ELT: {
3747     if (tryInsertVectorElt(N))
3748       return;
3749     break;
3750   }
3751   case ISD::SRL:
3752     if (tryV6T2BitfieldExtractOp(N, false))
3753       return;
3754     break;
3755   case ISD::SIGN_EXTEND_INREG:
3756   case ISD::SRA:
3757     if (tryV6T2BitfieldExtractOp(N, true))
3758       return;
3759     break;
3760   case ISD::FP_TO_UINT:
3761   case ISD::FP_TO_SINT:
3762     if (tryFP_TO_INT(N, dl))
3763       return;
3764     break;
3765   case ISD::FMUL:
3766     if (tryFMULFixed(N, dl))
3767       return;
3768     break;
3769   case ISD::MUL:
3770     if (Subtarget->isThumb1Only())
3771       break;
3772     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3773       unsigned RHSV = C->getZExtValue();
3774       if (!RHSV) break;
3775       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
3776         unsigned ShImm = Log2_32(RHSV-1);
3777         if (ShImm >= 32)
3778           break;
3779         SDValue V = N->getOperand(0);
3780         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3781         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3782         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3783         if (Subtarget->isThumb()) {
3784           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3785           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3786           return;
3787         } else {
3788           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3789                             Reg0 };
3790           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3791           return;
3792         }
3793       }
3794       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
3795         unsigned ShImm = Log2_32(RHSV+1);
3796         if (ShImm >= 32)
3797           break;
3798         SDValue V = N->getOperand(0);
3799         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3800         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3801         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3802         if (Subtarget->isThumb()) {
3803           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3804           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3805           return;
3806         } else {
3807           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3808                             Reg0 };
3809           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3810           return;
3811         }
3812       }
3813     }
3814     break;
3815   case ISD::AND: {
3816     // Check for unsigned bitfield extract
3817     if (tryV6T2BitfieldExtractOp(N, false))
3818       return;
3819 
3820     // If an immediate is used in an AND node, it is possible that the immediate
3821     // can be more optimally materialized when negated. If this is the case we
3822     // can negate the immediate and use a BIC instead.
3823     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3824     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3825       uint32_t Imm = (uint32_t) N1C->getZExtValue();
3826 
3827       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3828       // immediate can be negated and fit in the immediate operand of
3829       // a t2BIC, don't do any manual transform here as this can be
3830       // handled by the generic ISel machinery.
3831       bool PreferImmediateEncoding =
3832         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3833       if (!PreferImmediateEncoding &&
3834           ConstantMaterializationCost(Imm, Subtarget) >
3835               ConstantMaterializationCost(~Imm, Subtarget)) {
3836         // The current immediate costs more to materialize than a negated
3837         // immediate, so negate the immediate and use a BIC.
3838         SDValue NewImm =
3839           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3840         // If the new constant didn't exist before, reposition it in the topological
3841         // ordering so it is just before N. Otherwise, don't touch its location.
3842         if (NewImm->getNodeId() == -1)
3843           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3844 
3845         if (!Subtarget->hasThumb2()) {
3846           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3847                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
3848                            CurDAG->getRegister(0, MVT::i32)};
3849           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3850           return;
3851         } else {
3852           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3853                            CurDAG->getRegister(0, MVT::i32),
3854                            CurDAG->getRegister(0, MVT::i32)};
3855           ReplaceNode(N,
3856                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3857           return;
3858         }
3859       }
3860     }
3861 
3862     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3863     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3864     // are entirely contributed by c2 and lower 16-bits are entirely contributed
3865     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3866     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3867     EVT VT = N->getValueType(0);
3868     if (VT != MVT::i32)
3869       break;
3870     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3871       ? ARM::t2MOVTi16
3872       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3873     if (!Opc)
3874       break;
3875     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3876     N1C = dyn_cast<ConstantSDNode>(N1);
3877     if (!N1C)
3878       break;
3879     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3880       SDValue N2 = N0.getOperand(1);
3881       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3882       if (!N2C)
3883         break;
3884       unsigned N1CVal = N1C->getZExtValue();
3885       unsigned N2CVal = N2C->getZExtValue();
3886       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3887           (N1CVal & 0xffffU) == 0xffffU &&
3888           (N2CVal & 0xffffU) == 0x0U) {
3889         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3890                                                   dl, MVT::i32);
3891         SDValue Ops[] = { N0.getOperand(0), Imm16,
3892                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3893         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3894         return;
3895       }
3896     }
3897 
3898     break;
3899   }
3900   case ARMISD::UMAAL: {
3901     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3902     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3903                       N->getOperand(2), N->getOperand(3),
3904                       getAL(CurDAG, dl),
3905                       CurDAG->getRegister(0, MVT::i32) };
3906     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3907     return;
3908   }
3909   case ARMISD::UMLAL:{
3910     if (Subtarget->isThumb()) {
3911       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3912                         N->getOperand(3), getAL(CurDAG, dl),
3913                         CurDAG->getRegister(0, MVT::i32)};
3914       ReplaceNode(
3915           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3916       return;
3917     }else{
3918       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3919                         N->getOperand(3), getAL(CurDAG, dl),
3920                         CurDAG->getRegister(0, MVT::i32),
3921                         CurDAG->getRegister(0, MVT::i32) };
3922       ReplaceNode(N, CurDAG->getMachineNode(
3923                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3924                          MVT::i32, MVT::i32, Ops));
3925       return;
3926     }
3927   }
3928   case ARMISD::SMLAL:{
3929     if (Subtarget->isThumb()) {
3930       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3931                         N->getOperand(3), getAL(CurDAG, dl),
3932                         CurDAG->getRegister(0, MVT::i32)};
3933       ReplaceNode(
3934           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3935       return;
3936     }else{
3937       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3938                         N->getOperand(3), getAL(CurDAG, dl),
3939                         CurDAG->getRegister(0, MVT::i32),
3940                         CurDAG->getRegister(0, MVT::i32) };
3941       ReplaceNode(N, CurDAG->getMachineNode(
3942                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3943                          MVT::i32, MVT::i32, Ops));
3944       return;
3945     }
3946   }
3947   case ARMISD::SUBE: {
3948     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3949       break;
3950     // Look for a pattern to match SMMLS
3951     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3952     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3953         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3954         !SDValue(N, 1).use_empty())
3955       break;
3956 
3957     if (Subtarget->isThumb())
3958       assert(Subtarget->hasThumb2() &&
3959              "This pattern should not be generated for Thumb");
3960 
3961     SDValue SmulLoHi = N->getOperand(1);
3962     SDValue Subc = N->getOperand(2);
3963     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
3964 
3965     if (!Zero || Zero->getZExtValue() != 0 ||
3966         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3967         N->getOperand(1) != SmulLoHi.getValue(1) ||
3968         N->getOperand(2) != Subc.getValue(1))
3969       break;
3970 
3971     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3972     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3973                       N->getOperand(0), getAL(CurDAG, dl),
3974                       CurDAG->getRegister(0, MVT::i32) };
3975     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3976     return;
3977   }
3978   case ISD::LOAD: {
3979     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3980       return;
3981     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
3982       if (tryT2IndexedLoad(N))
3983         return;
3984     } else if (Subtarget->isThumb()) {
3985       if (tryT1IndexedLoad(N))
3986         return;
3987     } else if (tryARMIndexedLoad(N))
3988       return;
3989     // Other cases are autogenerated.
3990     break;
3991   }
3992   case ISD::MLOAD:
3993     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3994       return;
3995     // Other cases are autogenerated.
3996     break;
3997   case ARMISD::WLSSETUP: {
3998     SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
3999                                          N->getOperand(0));
4000     ReplaceUses(N, New);
4001     CurDAG->RemoveDeadNode(N);
4002     return;
4003   }
4004   case ARMISD::WLS: {
4005     SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4006                                          N->getOperand(1), N->getOperand(2),
4007                                          N->getOperand(0));
4008     ReplaceUses(N, New);
4009     CurDAG->RemoveDeadNode(N);
4010     return;
4011   }
4012   case ARMISD::LE: {
4013     SDValue Ops[] = { N->getOperand(1),
4014                       N->getOperand(2),
4015                       N->getOperand(0) };
4016     unsigned Opc = ARM::t2LoopEnd;
4017     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4018     ReplaceUses(N, New);
4019     CurDAG->RemoveDeadNode(N);
4020     return;
4021   }
4022   case ARMISD::LDRD: {
4023     if (Subtarget->isThumb2())
4024       break; // TableGen handles isel in this case.
4025     SDValue Base, RegOffset, ImmOffset;
4026     const SDValue &Chain = N->getOperand(0);
4027     const SDValue &Addr = N->getOperand(1);
4028     SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4029     if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4030       // The register-offset variant of LDRD mandates that the register
4031       // allocated to RegOffset is not reused in any of the remaining operands.
4032       // This restriction is currently not enforced. Therefore emitting this
4033       // variant is explicitly avoided.
4034       Base = Addr;
4035       RegOffset = CurDAG->getRegister(0, MVT::i32);
4036     }
4037     SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4038     SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4039                                          {MVT::Untyped, MVT::Other}, Ops);
4040     SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4041                                                 SDValue(New, 0));
4042     SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4043                                                 SDValue(New, 0));
4044     transferMemOperands(N, New);
4045     ReplaceUses(SDValue(N, 0), Lo);
4046     ReplaceUses(SDValue(N, 1), Hi);
4047     ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4048     CurDAG->RemoveDeadNode(N);
4049     return;
4050   }
4051   case ARMISD::STRD: {
4052     if (Subtarget->isThumb2())
4053       break; // TableGen handles isel in this case.
4054     SDValue Base, RegOffset, ImmOffset;
4055     const SDValue &Chain = N->getOperand(0);
4056     const SDValue &Addr = N->getOperand(3);
4057     SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4058     if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4059       // The register-offset variant of STRD mandates that the register
4060       // allocated to RegOffset is not reused in any of the remaining operands.
4061       // This restriction is currently not enforced. Therefore emitting this
4062       // variant is explicitly avoided.
4063       Base = Addr;
4064       RegOffset = CurDAG->getRegister(0, MVT::i32);
4065     }
4066     SDNode *RegPair =
4067         createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4068     SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4069     SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4070     transferMemOperands(N, New);
4071     ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4072     CurDAG->RemoveDeadNode(N);
4073     return;
4074   }
4075   case ARMISD::LOOP_DEC: {
4076     SDValue Ops[] = { N->getOperand(1),
4077                       N->getOperand(2),
4078                       N->getOperand(0) };
4079     SDNode *Dec =
4080       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4081                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4082     ReplaceUses(N, Dec);
4083     CurDAG->RemoveDeadNode(N);
4084     return;
4085   }
4086   case ARMISD::BRCOND: {
4087     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4088     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4089     // Pattern complexity = 6  cost = 1  size = 0
4090 
4091     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4092     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4093     // Pattern complexity = 6  cost = 1  size = 0
4094 
4095     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4096     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4097     // Pattern complexity = 6  cost = 1  size = 0
4098 
4099     unsigned Opc = Subtarget->isThumb() ?
4100       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4101     SDValue Chain = N->getOperand(0);
4102     SDValue N1 = N->getOperand(1);
4103     SDValue N2 = N->getOperand(2);
4104     SDValue N3 = N->getOperand(3);
4105     SDValue InFlag = N->getOperand(4);
4106     assert(N1.getOpcode() == ISD::BasicBlock);
4107     assert(N2.getOpcode() == ISD::Constant);
4108     assert(N3.getOpcode() == ISD::Register);
4109 
4110     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
4111 
4112     if (InFlag.getOpcode() == ARMISD::CMPZ) {
4113       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4114         SDValue Int = InFlag.getOperand(0);
4115         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
4116 
4117         // Handle low-overhead loops.
4118         if (ID == Intrinsic::loop_decrement_reg) {
4119           SDValue Elements = Int.getOperand(2);
4120           SDValue Size = CurDAG->getTargetConstant(
4121             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
4122                                  MVT::i32);
4123 
4124           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4125           SDNode *LoopDec =
4126             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4127                                    CurDAG->getVTList(MVT::i32, MVT::Other),
4128                                    Args);
4129           ReplaceUses(Int.getNode(), LoopDec);
4130 
4131           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4132           SDNode *LoopEnd =
4133             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4134 
4135           ReplaceUses(N, LoopEnd);
4136           CurDAG->RemoveDeadNode(N);
4137           CurDAG->RemoveDeadNode(InFlag.getNode());
4138           CurDAG->RemoveDeadNode(Int.getNode());
4139           return;
4140         }
4141       }
4142 
4143       bool SwitchEQNEToPLMI;
4144       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4145       InFlag = N->getOperand(4);
4146 
4147       if (SwitchEQNEToPLMI) {
4148         switch ((ARMCC::CondCodes)CC) {
4149         default: llvm_unreachable("CMPZ must be either NE or EQ!");
4150         case ARMCC::NE:
4151           CC = (unsigned)ARMCC::MI;
4152           break;
4153         case ARMCC::EQ:
4154           CC = (unsigned)ARMCC::PL;
4155           break;
4156         }
4157       }
4158     }
4159 
4160     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4161     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
4162     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4163                                              MVT::Glue, Ops);
4164     Chain = SDValue(ResNode, 0);
4165     if (N->getNumValues() == 2) {
4166       InFlag = SDValue(ResNode, 1);
4167       ReplaceUses(SDValue(N, 1), InFlag);
4168     }
4169     ReplaceUses(SDValue(N, 0),
4170                 SDValue(Chain.getNode(), Chain.getResNo()));
4171     CurDAG->RemoveDeadNode(N);
4172     return;
4173   }
4174 
4175   case ARMISD::CMPZ: {
4176     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4177     //   This allows us to avoid materializing the expensive negative constant.
4178     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
4179     //   for its glue output.
4180     SDValue X = N->getOperand(0);
4181     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4182     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4183       int64_t Addend = -C->getSExtValue();
4184 
4185       SDNode *Add = nullptr;
4186       // ADDS can be better than CMN if the immediate fits in a
4187       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4188       // Outside that range we can just use a CMN which is 32-bit but has a
4189       // 12-bit immediate range.
4190       if (Addend < 1<<8) {
4191         if (Subtarget->isThumb2()) {
4192           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4193                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4194                             CurDAG->getRegister(0, MVT::i32) };
4195           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4196         } else {
4197           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4198           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4199                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4200                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4201           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4202         }
4203       }
4204       if (Add) {
4205         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4206         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4207       }
4208     }
4209     // Other cases are autogenerated.
4210     break;
4211   }
4212 
4213   case ARMISD::CMOV: {
4214     SDValue InFlag = N->getOperand(4);
4215 
4216     if (InFlag.getOpcode() == ARMISD::CMPZ) {
4217       bool SwitchEQNEToPLMI;
4218       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
4219 
4220       if (SwitchEQNEToPLMI) {
4221         SDValue ARMcc = N->getOperand(2);
4222         ARMCC::CondCodes CC =
4223           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
4224 
4225         switch (CC) {
4226         default: llvm_unreachable("CMPZ must be either NE or EQ!");
4227         case ARMCC::NE:
4228           CC = ARMCC::MI;
4229           break;
4230         case ARMCC::EQ:
4231           CC = ARMCC::PL;
4232           break;
4233         }
4234         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4235         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4236                          N->getOperand(3), N->getOperand(4)};
4237         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4238       }
4239 
4240     }
4241     // Other cases are autogenerated.
4242     break;
4243   }
4244 
4245   case ARMISD::VZIP: {
4246     unsigned Opc = 0;
4247     EVT VT = N->getValueType(0);
4248     switch (VT.getSimpleVT().SimpleTy) {
4249     default: return;
4250     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
4251     case MVT::v4f16:
4252     case MVT::v4i16: Opc = ARM::VZIPd16; break;
4253     case MVT::v2f32:
4254     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4255     case MVT::v2i32: Opc = ARM::VTRNd32; break;
4256     case MVT::v16i8: Opc = ARM::VZIPq8; break;
4257     case MVT::v8f16:
4258     case MVT::v8i16: Opc = ARM::VZIPq16; break;
4259     case MVT::v4f32:
4260     case MVT::v4i32: Opc = ARM::VZIPq32; break;
4261     }
4262     SDValue Pred = getAL(CurDAG, dl);
4263     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4264     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4265     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4266     return;
4267   }
4268   case ARMISD::VUZP: {
4269     unsigned Opc = 0;
4270     EVT VT = N->getValueType(0);
4271     switch (VT.getSimpleVT().SimpleTy) {
4272     default: return;
4273     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
4274     case MVT::v4f16:
4275     case MVT::v4i16: Opc = ARM::VUZPd16; break;
4276     case MVT::v2f32:
4277     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4278     case MVT::v2i32: Opc = ARM::VTRNd32; break;
4279     case MVT::v16i8: Opc = ARM::VUZPq8; break;
4280     case MVT::v8f16:
4281     case MVT::v8i16: Opc = ARM::VUZPq16; break;
4282     case MVT::v4f32:
4283     case MVT::v4i32: Opc = ARM::VUZPq32; break;
4284     }
4285     SDValue Pred = getAL(CurDAG, dl);
4286     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4287     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4288     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4289     return;
4290   }
4291   case ARMISD::VTRN: {
4292     unsigned Opc = 0;
4293     EVT VT = N->getValueType(0);
4294     switch (VT.getSimpleVT().SimpleTy) {
4295     default: return;
4296     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
4297     case MVT::v4f16:
4298     case MVT::v4i16: Opc = ARM::VTRNd16; break;
4299     case MVT::v2f32:
4300     case MVT::v2i32: Opc = ARM::VTRNd32; break;
4301     case MVT::v16i8: Opc = ARM::VTRNq8; break;
4302     case MVT::v8f16:
4303     case MVT::v8i16: Opc = ARM::VTRNq16; break;
4304     case MVT::v4f32:
4305     case MVT::v4i32: Opc = ARM::VTRNq32; break;
4306     }
4307     SDValue Pred = getAL(CurDAG, dl);
4308     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4309     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
4310     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4311     return;
4312   }
4313   case ARMISD::BUILD_VECTOR: {
4314     EVT VecVT = N->getValueType(0);
4315     EVT EltVT = VecVT.getVectorElementType();
4316     unsigned NumElts = VecVT.getVectorNumElements();
4317     if (EltVT == MVT::f64) {
4318       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4319       ReplaceNode(
4320           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4321       return;
4322     }
4323     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4324     if (NumElts == 2) {
4325       ReplaceNode(
4326           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4327       return;
4328     }
4329     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4330     ReplaceNode(N,
4331                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4332                                     N->getOperand(2), N->getOperand(3)));
4333     return;
4334   }
4335 
4336   case ARMISD::VLD1DUP: {
4337     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4338                                          ARM::VLD1DUPd32 };
4339     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4340                                          ARM::VLD1DUPq32 };
4341     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4342     return;
4343   }
4344 
4345   case ARMISD::VLD2DUP: {
4346     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4347                                         ARM::VLD2DUPd32 };
4348     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4349     return;
4350   }
4351 
4352   case ARMISD::VLD3DUP: {
4353     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4354                                         ARM::VLD3DUPd16Pseudo,
4355                                         ARM::VLD3DUPd32Pseudo };
4356     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4357     return;
4358   }
4359 
4360   case ARMISD::VLD4DUP: {
4361     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4362                                         ARM::VLD4DUPd16Pseudo,
4363                                         ARM::VLD4DUPd32Pseudo };
4364     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4365     return;
4366   }
4367 
4368   case ARMISD::VLD1DUP_UPD: {
4369     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4370                                          ARM::VLD1DUPd16wb_fixed,
4371                                          ARM::VLD1DUPd32wb_fixed };
4372     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4373                                          ARM::VLD1DUPq16wb_fixed,
4374                                          ARM::VLD1DUPq32wb_fixed };
4375     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4376     return;
4377   }
4378 
4379   case ARMISD::VLD2DUP_UPD: {
4380     static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4381                                          ARM::VLD2DUPd16wb_fixed,
4382                                          ARM::VLD2DUPd32wb_fixed,
4383                                          ARM::VLD1q64wb_fixed };
4384     static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4385                                           ARM::VLD2DUPq16EvenPseudo,
4386                                           ARM::VLD2DUPq32EvenPseudo };
4387     static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4388                                           ARM::VLD2DUPq16OddPseudoWB_fixed,
4389                                           ARM::VLD2DUPq32OddPseudoWB_fixed };
4390     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4391     return;
4392   }
4393 
4394   case ARMISD::VLD3DUP_UPD: {
4395     static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4396                                          ARM::VLD3DUPd16Pseudo_UPD,
4397                                          ARM::VLD3DUPd32Pseudo_UPD,
4398                                          ARM::VLD1d64TPseudoWB_fixed };
4399     static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4400                                           ARM::VLD3DUPq16EvenPseudo,
4401                                           ARM::VLD3DUPq32EvenPseudo };
4402     static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4403                                           ARM::VLD3DUPq16OddPseudo_UPD,
4404                                           ARM::VLD3DUPq32OddPseudo_UPD };
4405     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4406     return;
4407   }
4408 
4409   case ARMISD::VLD4DUP_UPD: {
4410     static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4411                                          ARM::VLD4DUPd16Pseudo_UPD,
4412                                          ARM::VLD4DUPd32Pseudo_UPD,
4413                                          ARM::VLD1d64QPseudoWB_fixed };
4414     static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4415                                           ARM::VLD4DUPq16EvenPseudo,
4416                                           ARM::VLD4DUPq32EvenPseudo };
4417     static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4418                                           ARM::VLD4DUPq16OddPseudo_UPD,
4419                                           ARM::VLD4DUPq32OddPseudo_UPD };
4420     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4421     return;
4422   }
4423 
4424   case ARMISD::VLD1_UPD: {
4425     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4426                                          ARM::VLD1d16wb_fixed,
4427                                          ARM::VLD1d32wb_fixed,
4428                                          ARM::VLD1d64wb_fixed };
4429     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4430                                          ARM::VLD1q16wb_fixed,
4431                                          ARM::VLD1q32wb_fixed,
4432                                          ARM::VLD1q64wb_fixed };
4433     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4434     return;
4435   }
4436 
4437   case ARMISD::VLD2_UPD: {
4438     if (Subtarget->hasNEON()) {
4439       static const uint16_t DOpcodes[] = {
4440           ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4441           ARM::VLD1q64wb_fixed};
4442       static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4443                                           ARM::VLD2q16PseudoWB_fixed,
4444                                           ARM::VLD2q32PseudoWB_fixed};
4445       SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4446     } else {
4447       static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4448                                           ARM::MVE_VLD21_8_wb};
4449       static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4450                                            ARM::MVE_VLD21_16_wb};
4451       static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4452                                            ARM::MVE_VLD21_32_wb};
4453       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4454       SelectMVE_VLD(N, 2, Opcodes, true);
4455     }
4456     return;
4457   }
4458 
4459   case ARMISD::VLD3_UPD: {
4460     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4461                                          ARM::VLD3d16Pseudo_UPD,
4462                                          ARM::VLD3d32Pseudo_UPD,
4463                                          ARM::VLD1d64TPseudoWB_fixed};
4464     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4465                                           ARM::VLD3q16Pseudo_UPD,
4466                                           ARM::VLD3q32Pseudo_UPD };
4467     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4468                                           ARM::VLD3q16oddPseudo_UPD,
4469                                           ARM::VLD3q32oddPseudo_UPD };
4470     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4471     return;
4472   }
4473 
4474   case ARMISD::VLD4_UPD: {
4475     if (Subtarget->hasNEON()) {
4476       static const uint16_t DOpcodes[] = {
4477           ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4478           ARM::VLD1d64QPseudoWB_fixed};
4479       static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4480                                            ARM::VLD4q16Pseudo_UPD,
4481                                            ARM::VLD4q32Pseudo_UPD};
4482       static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4483                                            ARM::VLD4q16oddPseudo_UPD,
4484                                            ARM::VLD4q32oddPseudo_UPD};
4485       SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4486     } else {
4487       static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4488                                           ARM::MVE_VLD42_8,
4489                                           ARM::MVE_VLD43_8_wb};
4490       static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4491                                            ARM::MVE_VLD42_16,
4492                                            ARM::MVE_VLD43_16_wb};
4493       static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4494                                            ARM::MVE_VLD42_32,
4495                                            ARM::MVE_VLD43_32_wb};
4496       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4497       SelectMVE_VLD(N, 4, Opcodes, true);
4498     }
4499     return;
4500   }
4501 
4502   case ARMISD::VLD1x2_UPD: {
4503     if (Subtarget->hasNEON()) {
4504       static const uint16_t DOpcodes[] = {
4505           ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4506           ARM::VLD1q64wb_fixed};
4507       static const uint16_t QOpcodes[] = {
4508           ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4509           ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4510       SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4511       return;
4512     }
4513     break;
4514   }
4515 
4516   case ARMISD::VLD1x3_UPD: {
4517     if (Subtarget->hasNEON()) {
4518       static const uint16_t DOpcodes[] = {
4519           ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4520           ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4521       static const uint16_t QOpcodes0[] = {
4522           ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4523           ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4524       static const uint16_t QOpcodes1[] = {
4525           ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4526           ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4527       SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4528       return;
4529     }
4530     break;
4531   }
4532 
4533   case ARMISD::VLD1x4_UPD: {
4534     if (Subtarget->hasNEON()) {
4535       static const uint16_t DOpcodes[] = {
4536           ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4537           ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4538       static const uint16_t QOpcodes0[] = {
4539           ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4540           ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4541       static const uint16_t QOpcodes1[] = {
4542           ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4543           ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4544       SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4545       return;
4546     }
4547     break;
4548   }
4549 
4550   case ARMISD::VLD2LN_UPD: {
4551     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4552                                          ARM::VLD2LNd16Pseudo_UPD,
4553                                          ARM::VLD2LNd32Pseudo_UPD };
4554     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4555                                          ARM::VLD2LNq32Pseudo_UPD };
4556     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4557     return;
4558   }
4559 
4560   case ARMISD::VLD3LN_UPD: {
4561     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4562                                          ARM::VLD3LNd16Pseudo_UPD,
4563                                          ARM::VLD3LNd32Pseudo_UPD };
4564     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4565                                          ARM::VLD3LNq32Pseudo_UPD };
4566     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4567     return;
4568   }
4569 
4570   case ARMISD::VLD4LN_UPD: {
4571     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4572                                          ARM::VLD4LNd16Pseudo_UPD,
4573                                          ARM::VLD4LNd32Pseudo_UPD };
4574     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4575                                          ARM::VLD4LNq32Pseudo_UPD };
4576     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4577     return;
4578   }
4579 
4580   case ARMISD::VST1_UPD: {
4581     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4582                                          ARM::VST1d16wb_fixed,
4583                                          ARM::VST1d32wb_fixed,
4584                                          ARM::VST1d64wb_fixed };
4585     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4586                                          ARM::VST1q16wb_fixed,
4587                                          ARM::VST1q32wb_fixed,
4588                                          ARM::VST1q64wb_fixed };
4589     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4590     return;
4591   }
4592 
4593   case ARMISD::VST2_UPD: {
4594     if (Subtarget->hasNEON()) {
4595       static const uint16_t DOpcodes[] = {
4596           ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4597           ARM::VST1q64wb_fixed};
4598       static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4599                                           ARM::VST2q16PseudoWB_fixed,
4600                                           ARM::VST2q32PseudoWB_fixed};
4601       SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4602       return;
4603     }
4604     break;
4605   }
4606 
4607   case ARMISD::VST3_UPD: {
4608     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4609                                          ARM::VST3d16Pseudo_UPD,
4610                                          ARM::VST3d32Pseudo_UPD,
4611                                          ARM::VST1d64TPseudoWB_fixed};
4612     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4613                                           ARM::VST3q16Pseudo_UPD,
4614                                           ARM::VST3q32Pseudo_UPD };
4615     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4616                                           ARM::VST3q16oddPseudo_UPD,
4617                                           ARM::VST3q32oddPseudo_UPD };
4618     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4619     return;
4620   }
4621 
4622   case ARMISD::VST4_UPD: {
4623     if (Subtarget->hasNEON()) {
4624       static const uint16_t DOpcodes[] = {
4625           ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4626           ARM::VST1d64QPseudoWB_fixed};
4627       static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4628                                            ARM::VST4q16Pseudo_UPD,
4629                                            ARM::VST4q32Pseudo_UPD};
4630       static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4631                                            ARM::VST4q16oddPseudo_UPD,
4632                                            ARM::VST4q32oddPseudo_UPD};
4633       SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4634       return;
4635     }
4636     break;
4637   }
4638 
4639   case ARMISD::VST1x2_UPD: {
4640     if (Subtarget->hasNEON()) {
4641       static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4642                                            ARM::VST1q16wb_fixed,
4643                                            ARM::VST1q32wb_fixed,
4644                                            ARM::VST1q64wb_fixed};
4645       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4646                                            ARM::VST1d16QPseudoWB_fixed,
4647                                            ARM::VST1d32QPseudoWB_fixed,
4648                                            ARM::VST1d64QPseudoWB_fixed };
4649       SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4650       return;
4651     }
4652     break;
4653   }
4654 
4655   case ARMISD::VST1x3_UPD: {
4656     if (Subtarget->hasNEON()) {
4657       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4658                                            ARM::VST1d16TPseudoWB_fixed,
4659                                            ARM::VST1d32TPseudoWB_fixed,
4660                                            ARM::VST1d64TPseudoWB_fixed };
4661       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4662                                             ARM::VST1q16LowTPseudo_UPD,
4663                                             ARM::VST1q32LowTPseudo_UPD,
4664                                             ARM::VST1q64LowTPseudo_UPD };
4665       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4666                                             ARM::VST1q16HighTPseudo_UPD,
4667                                             ARM::VST1q32HighTPseudo_UPD,
4668                                             ARM::VST1q64HighTPseudo_UPD };
4669       SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4670       return;
4671     }
4672     break;
4673   }
4674 
4675   case ARMISD::VST1x4_UPD: {
4676     if (Subtarget->hasNEON()) {
4677       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4678                                            ARM::VST1d16QPseudoWB_fixed,
4679                                            ARM::VST1d32QPseudoWB_fixed,
4680                                            ARM::VST1d64QPseudoWB_fixed };
4681       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4682                                             ARM::VST1q16LowQPseudo_UPD,
4683                                             ARM::VST1q32LowQPseudo_UPD,
4684                                             ARM::VST1q64LowQPseudo_UPD };
4685       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4686                                             ARM::VST1q16HighQPseudo_UPD,
4687                                             ARM::VST1q32HighQPseudo_UPD,
4688                                             ARM::VST1q64HighQPseudo_UPD };
4689       SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4690       return;
4691     }
4692     break;
4693   }
4694   case ARMISD::VST2LN_UPD: {
4695     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4696                                          ARM::VST2LNd16Pseudo_UPD,
4697                                          ARM::VST2LNd32Pseudo_UPD };
4698     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4699                                          ARM::VST2LNq32Pseudo_UPD };
4700     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4701     return;
4702   }
4703 
4704   case ARMISD::VST3LN_UPD: {
4705     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4706                                          ARM::VST3LNd16Pseudo_UPD,
4707                                          ARM::VST3LNd32Pseudo_UPD };
4708     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4709                                          ARM::VST3LNq32Pseudo_UPD };
4710     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4711     return;
4712   }
4713 
4714   case ARMISD::VST4LN_UPD: {
4715     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4716                                          ARM::VST4LNd16Pseudo_UPD,
4717                                          ARM::VST4LNd32Pseudo_UPD };
4718     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4719                                          ARM::VST4LNq32Pseudo_UPD };
4720     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4721     return;
4722   }
4723 
4724   case ISD::INTRINSIC_VOID:
4725   case ISD::INTRINSIC_W_CHAIN: {
4726     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4727     switch (IntNo) {
4728     default:
4729       break;
4730 
4731     case Intrinsic::arm_mrrc:
4732     case Intrinsic::arm_mrrc2: {
4733       SDLoc dl(N);
4734       SDValue Chain = N->getOperand(0);
4735       unsigned Opc;
4736 
4737       if (Subtarget->isThumb())
4738         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4739       else
4740         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4741 
4742       SmallVector<SDValue, 5> Ops;
4743       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
4744       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
4745       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
4746 
4747       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4748       // instruction will always be '1111' but it is possible in assembly language to specify
4749       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4750       if (Opc != ARM::MRRC2) {
4751         Ops.push_back(getAL(CurDAG, dl));
4752         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4753       }
4754 
4755       Ops.push_back(Chain);
4756 
4757       // Writes to two registers.
4758       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4759 
4760       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4761       return;
4762     }
4763     case Intrinsic::arm_ldaexd:
4764     case Intrinsic::arm_ldrexd: {
4765       SDLoc dl(N);
4766       SDValue Chain = N->getOperand(0);
4767       SDValue MemAddr = N->getOperand(2);
4768       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4769 
4770       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4771       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4772                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4773 
4774       // arm_ldrexd returns a i64 value in {i32, i32}
4775       std::vector<EVT> ResTys;
4776       if (isThumb) {
4777         ResTys.push_back(MVT::i32);
4778         ResTys.push_back(MVT::i32);
4779       } else
4780         ResTys.push_back(MVT::Untyped);
4781       ResTys.push_back(MVT::Other);
4782 
4783       // Place arguments in the right order.
4784       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4785                        CurDAG->getRegister(0, MVT::i32), Chain};
4786       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4787       // Transfer memoperands.
4788       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4789       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4790 
4791       // Remap uses.
4792       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4793       if (!SDValue(N, 0).use_empty()) {
4794         SDValue Result;
4795         if (isThumb)
4796           Result = SDValue(Ld, 0);
4797         else {
4798           SDValue SubRegIdx =
4799             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4800           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4801               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4802           Result = SDValue(ResNode,0);
4803         }
4804         ReplaceUses(SDValue(N, 0), Result);
4805       }
4806       if (!SDValue(N, 1).use_empty()) {
4807         SDValue Result;
4808         if (isThumb)
4809           Result = SDValue(Ld, 1);
4810         else {
4811           SDValue SubRegIdx =
4812             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4813           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4814               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4815           Result = SDValue(ResNode,0);
4816         }
4817         ReplaceUses(SDValue(N, 1), Result);
4818       }
4819       ReplaceUses(SDValue(N, 2), OutChain);
4820       CurDAG->RemoveDeadNode(N);
4821       return;
4822     }
4823     case Intrinsic::arm_stlexd:
4824     case Intrinsic::arm_strexd: {
4825       SDLoc dl(N);
4826       SDValue Chain = N->getOperand(0);
4827       SDValue Val0 = N->getOperand(2);
4828       SDValue Val1 = N->getOperand(3);
4829       SDValue MemAddr = N->getOperand(4);
4830 
4831       // Store exclusive double return a i32 value which is the return status
4832       // of the issued store.
4833       const EVT ResTys[] = {MVT::i32, MVT::Other};
4834 
4835       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4836       // Place arguments in the right order.
4837       SmallVector<SDValue, 7> Ops;
4838       if (isThumb) {
4839         Ops.push_back(Val0);
4840         Ops.push_back(Val1);
4841       } else
4842         // arm_strexd uses GPRPair.
4843         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4844       Ops.push_back(MemAddr);
4845       Ops.push_back(getAL(CurDAG, dl));
4846       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4847       Ops.push_back(Chain);
4848 
4849       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4850       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4851                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4852 
4853       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4854       // Transfer memoperands.
4855       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4856       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4857 
4858       ReplaceNode(N, St);
4859       return;
4860     }
4861 
4862     case Intrinsic::arm_neon_vld1: {
4863       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4864                                            ARM::VLD1d32, ARM::VLD1d64 };
4865       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4866                                            ARM::VLD1q32, ARM::VLD1q64};
4867       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4868       return;
4869     }
4870 
4871     case Intrinsic::arm_neon_vld1x2: {
4872       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4873                                            ARM::VLD1q32, ARM::VLD1q64 };
4874       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4875                                            ARM::VLD1d16QPseudo,
4876                                            ARM::VLD1d32QPseudo,
4877                                            ARM::VLD1d64QPseudo };
4878       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4879       return;
4880     }
4881 
4882     case Intrinsic::arm_neon_vld1x3: {
4883       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4884                                            ARM::VLD1d16TPseudo,
4885                                            ARM::VLD1d32TPseudo,
4886                                            ARM::VLD1d64TPseudo };
4887       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4888                                             ARM::VLD1q16LowTPseudo_UPD,
4889                                             ARM::VLD1q32LowTPseudo_UPD,
4890                                             ARM::VLD1q64LowTPseudo_UPD };
4891       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4892                                             ARM::VLD1q16HighTPseudo,
4893                                             ARM::VLD1q32HighTPseudo,
4894                                             ARM::VLD1q64HighTPseudo };
4895       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4896       return;
4897     }
4898 
4899     case Intrinsic::arm_neon_vld1x4: {
4900       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4901                                            ARM::VLD1d16QPseudo,
4902                                            ARM::VLD1d32QPseudo,
4903                                            ARM::VLD1d64QPseudo };
4904       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4905                                             ARM::VLD1q16LowQPseudo_UPD,
4906                                             ARM::VLD1q32LowQPseudo_UPD,
4907                                             ARM::VLD1q64LowQPseudo_UPD };
4908       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4909                                             ARM::VLD1q16HighQPseudo,
4910                                             ARM::VLD1q32HighQPseudo,
4911                                             ARM::VLD1q64HighQPseudo };
4912       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4913       return;
4914     }
4915 
4916     case Intrinsic::arm_neon_vld2: {
4917       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4918                                            ARM::VLD2d32, ARM::VLD1q64 };
4919       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4920                                            ARM::VLD2q32Pseudo };
4921       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4922       return;
4923     }
4924 
4925     case Intrinsic::arm_neon_vld3: {
4926       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4927                                            ARM::VLD3d16Pseudo,
4928                                            ARM::VLD3d32Pseudo,
4929                                            ARM::VLD1d64TPseudo };
4930       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4931                                             ARM::VLD3q16Pseudo_UPD,
4932                                             ARM::VLD3q32Pseudo_UPD };
4933       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4934                                             ARM::VLD3q16oddPseudo,
4935                                             ARM::VLD3q32oddPseudo };
4936       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4937       return;
4938     }
4939 
4940     case Intrinsic::arm_neon_vld4: {
4941       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4942                                            ARM::VLD4d16Pseudo,
4943                                            ARM::VLD4d32Pseudo,
4944                                            ARM::VLD1d64QPseudo };
4945       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4946                                             ARM::VLD4q16Pseudo_UPD,
4947                                             ARM::VLD4q32Pseudo_UPD };
4948       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4949                                             ARM::VLD4q16oddPseudo,
4950                                             ARM::VLD4q32oddPseudo };
4951       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4952       return;
4953     }
4954 
4955     case Intrinsic::arm_neon_vld2dup: {
4956       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4957                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
4958       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4959                                             ARM::VLD2DUPq16EvenPseudo,
4960                                             ARM::VLD2DUPq32EvenPseudo };
4961       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4962                                             ARM::VLD2DUPq16OddPseudo,
4963                                             ARM::VLD2DUPq32OddPseudo };
4964       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4965                    DOpcodes, QOpcodes0, QOpcodes1);
4966       return;
4967     }
4968 
4969     case Intrinsic::arm_neon_vld3dup: {
4970       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4971                                            ARM::VLD3DUPd16Pseudo,
4972                                            ARM::VLD3DUPd32Pseudo,
4973                                            ARM::VLD1d64TPseudo };
4974       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4975                                             ARM::VLD3DUPq16EvenPseudo,
4976                                             ARM::VLD3DUPq32EvenPseudo };
4977       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4978                                             ARM::VLD3DUPq16OddPseudo,
4979                                             ARM::VLD3DUPq32OddPseudo };
4980       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4981                    DOpcodes, QOpcodes0, QOpcodes1);
4982       return;
4983     }
4984 
4985     case Intrinsic::arm_neon_vld4dup: {
4986       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4987                                            ARM::VLD4DUPd16Pseudo,
4988                                            ARM::VLD4DUPd32Pseudo,
4989                                            ARM::VLD1d64QPseudo };
4990       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4991                                             ARM::VLD4DUPq16EvenPseudo,
4992                                             ARM::VLD4DUPq32EvenPseudo };
4993       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4994                                             ARM::VLD4DUPq16OddPseudo,
4995                                             ARM::VLD4DUPq32OddPseudo };
4996       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4997                    DOpcodes, QOpcodes0, QOpcodes1);
4998       return;
4999     }
5000 
5001     case Intrinsic::arm_neon_vld2lane: {
5002       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
5003                                            ARM::VLD2LNd16Pseudo,
5004                                            ARM::VLD2LNd32Pseudo };
5005       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
5006                                            ARM::VLD2LNq32Pseudo };
5007       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
5008       return;
5009     }
5010 
5011     case Intrinsic::arm_neon_vld3lane: {
5012       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5013                                            ARM::VLD3LNd16Pseudo,
5014                                            ARM::VLD3LNd32Pseudo };
5015       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5016                                            ARM::VLD3LNq32Pseudo };
5017       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5018       return;
5019     }
5020 
5021     case Intrinsic::arm_neon_vld4lane: {
5022       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5023                                            ARM::VLD4LNd16Pseudo,
5024                                            ARM::VLD4LNd32Pseudo };
5025       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5026                                            ARM::VLD4LNq32Pseudo };
5027       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5028       return;
5029     }
5030 
5031     case Intrinsic::arm_neon_vst1: {
5032       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5033                                            ARM::VST1d32, ARM::VST1d64 };
5034       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5035                                            ARM::VST1q32, ARM::VST1q64 };
5036       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5037       return;
5038     }
5039 
5040     case Intrinsic::arm_neon_vst1x2: {
5041       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5042                                            ARM::VST1q32, ARM::VST1q64 };
5043       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5044                                            ARM::VST1d16QPseudo,
5045                                            ARM::VST1d32QPseudo,
5046                                            ARM::VST1d64QPseudo };
5047       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5048       return;
5049     }
5050 
5051     case Intrinsic::arm_neon_vst1x3: {
5052       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5053                                            ARM::VST1d16TPseudo,
5054                                            ARM::VST1d32TPseudo,
5055                                            ARM::VST1d64TPseudo };
5056       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5057                                             ARM::VST1q16LowTPseudo_UPD,
5058                                             ARM::VST1q32LowTPseudo_UPD,
5059                                             ARM::VST1q64LowTPseudo_UPD };
5060       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5061                                             ARM::VST1q16HighTPseudo,
5062                                             ARM::VST1q32HighTPseudo,
5063                                             ARM::VST1q64HighTPseudo };
5064       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5065       return;
5066     }
5067 
5068     case Intrinsic::arm_neon_vst1x4: {
5069       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5070                                            ARM::VST1d16QPseudo,
5071                                            ARM::VST1d32QPseudo,
5072                                            ARM::VST1d64QPseudo };
5073       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5074                                             ARM::VST1q16LowQPseudo_UPD,
5075                                             ARM::VST1q32LowQPseudo_UPD,
5076                                             ARM::VST1q64LowQPseudo_UPD };
5077       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5078                                             ARM::VST1q16HighQPseudo,
5079                                             ARM::VST1q32HighQPseudo,
5080                                             ARM::VST1q64HighQPseudo };
5081       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5082       return;
5083     }
5084 
5085     case Intrinsic::arm_neon_vst2: {
5086       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5087                                            ARM::VST2d32, ARM::VST1q64 };
5088       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5089                                            ARM::VST2q32Pseudo };
5090       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5091       return;
5092     }
5093 
5094     case Intrinsic::arm_neon_vst3: {
5095       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5096                                            ARM::VST3d16Pseudo,
5097                                            ARM::VST3d32Pseudo,
5098                                            ARM::VST1d64TPseudo };
5099       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5100                                             ARM::VST3q16Pseudo_UPD,
5101                                             ARM::VST3q32Pseudo_UPD };
5102       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5103                                             ARM::VST3q16oddPseudo,
5104                                             ARM::VST3q32oddPseudo };
5105       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5106       return;
5107     }
5108 
5109     case Intrinsic::arm_neon_vst4: {
5110       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5111                                            ARM::VST4d16Pseudo,
5112                                            ARM::VST4d32Pseudo,
5113                                            ARM::VST1d64QPseudo };
5114       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5115                                             ARM::VST4q16Pseudo_UPD,
5116                                             ARM::VST4q32Pseudo_UPD };
5117       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5118                                             ARM::VST4q16oddPseudo,
5119                                             ARM::VST4q32oddPseudo };
5120       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5121       return;
5122     }
5123 
5124     case Intrinsic::arm_neon_vst2lane: {
5125       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5126                                            ARM::VST2LNd16Pseudo,
5127                                            ARM::VST2LNd32Pseudo };
5128       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5129                                            ARM::VST2LNq32Pseudo };
5130       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5131       return;
5132     }
5133 
5134     case Intrinsic::arm_neon_vst3lane: {
5135       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5136                                            ARM::VST3LNd16Pseudo,
5137                                            ARM::VST3LNd32Pseudo };
5138       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5139                                            ARM::VST3LNq32Pseudo };
5140       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5141       return;
5142     }
5143 
5144     case Intrinsic::arm_neon_vst4lane: {
5145       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5146                                            ARM::VST4LNd16Pseudo,
5147                                            ARM::VST4LNd32Pseudo };
5148       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5149                                            ARM::VST4LNq32Pseudo };
5150       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5151       return;
5152     }
5153 
5154     case Intrinsic::arm_mve_vldr_gather_base_wb:
5155     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5156       static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5157                                          ARM::MVE_VLDRDU64_qi_pre};
5158       SelectMVE_WB(N, Opcodes,
5159                    IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5160       return;
5161     }
5162 
5163     case Intrinsic::arm_mve_vld2q: {
5164       static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5165       static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5166                                            ARM::MVE_VLD21_16};
5167       static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5168                                            ARM::MVE_VLD21_32};
5169       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5170       SelectMVE_VLD(N, 2, Opcodes, false);
5171       return;
5172     }
5173 
5174     case Intrinsic::arm_mve_vld4q: {
5175       static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5176                                           ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5177       static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5178                                            ARM::MVE_VLD42_16,
5179                                            ARM::MVE_VLD43_16};
5180       static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5181                                            ARM::MVE_VLD42_32,
5182                                            ARM::MVE_VLD43_32};
5183       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5184       SelectMVE_VLD(N, 4, Opcodes, false);
5185       return;
5186     }
5187     }
5188     break;
5189   }
5190 
5191   case ISD::INTRINSIC_WO_CHAIN: {
5192     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
5193     switch (IntNo) {
5194     default:
5195       break;
5196 
5197     // Scalar f32 -> bf16
5198     case Intrinsic::arm_neon_vcvtbfp2bf: {
5199       SDLoc dl(N);
5200       const SDValue &Src = N->getOperand(1);
5201       llvm::EVT DestTy = N->getValueType(0);
5202       SDValue Pred = getAL(CurDAG, dl);
5203       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5204       SDValue Ops[] = { Src, Src, Pred, Reg0 };
5205       CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5206       return;
5207     }
5208 
5209     // Vector v4f32 -> v4bf16
5210     case Intrinsic::arm_neon_vcvtfp2bf: {
5211       SDLoc dl(N);
5212       const SDValue &Src = N->getOperand(1);
5213       SDValue Pred = getAL(CurDAG, dl);
5214       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5215       SDValue Ops[] = { Src, Pred, Reg0 };
5216       CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5217       return;
5218     }
5219 
5220     case Intrinsic::arm_mve_urshrl:
5221       SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5222       return;
5223     case Intrinsic::arm_mve_uqshll:
5224       SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5225       return;
5226     case Intrinsic::arm_mve_srshrl:
5227       SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5228       return;
5229     case Intrinsic::arm_mve_sqshll:
5230       SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5231       return;
5232     case Intrinsic::arm_mve_uqrshll:
5233       SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5234       return;
5235     case Intrinsic::arm_mve_sqrshrl:
5236       SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5237       return;
5238 
5239     case Intrinsic::arm_mve_vadc:
5240     case Intrinsic::arm_mve_vadc_predicated:
5241       SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5242                         IntNo == Intrinsic::arm_mve_vadc_predicated);
5243       return;
5244     case Intrinsic::arm_mve_vsbc:
5245     case Intrinsic::arm_mve_vsbc_predicated:
5246       SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5247                         IntNo == Intrinsic::arm_mve_vsbc_predicated);
5248       return;
5249     case Intrinsic::arm_mve_vshlc:
5250     case Intrinsic::arm_mve_vshlc_predicated:
5251       SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5252       return;
5253 
5254     case Intrinsic::arm_mve_vmlldava:
5255     case Intrinsic::arm_mve_vmlldava_predicated: {
5256       static const uint16_t OpcodesU[] = {
5257           ARM::MVE_VMLALDAVu16,   ARM::MVE_VMLALDAVu32,
5258           ARM::MVE_VMLALDAVau16,  ARM::MVE_VMLALDAVau32,
5259       };
5260       static const uint16_t OpcodesS[] = {
5261           ARM::MVE_VMLALDAVs16,   ARM::MVE_VMLALDAVs32,
5262           ARM::MVE_VMLALDAVas16,  ARM::MVE_VMLALDAVas32,
5263           ARM::MVE_VMLALDAVxs16,  ARM::MVE_VMLALDAVxs32,
5264           ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5265           ARM::MVE_VMLSLDAVs16,   ARM::MVE_VMLSLDAVs32,
5266           ARM::MVE_VMLSLDAVas16,  ARM::MVE_VMLSLDAVas32,
5267           ARM::MVE_VMLSLDAVxs16,  ARM::MVE_VMLSLDAVxs32,
5268           ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5269       };
5270       SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5271                         OpcodesS, OpcodesU);
5272       return;
5273     }
5274 
5275     case Intrinsic::arm_mve_vrmlldavha:
5276     case Intrinsic::arm_mve_vrmlldavha_predicated: {
5277       static const uint16_t OpcodesU[] = {
5278           ARM::MVE_VRMLALDAVHu32,  ARM::MVE_VRMLALDAVHau32,
5279       };
5280       static const uint16_t OpcodesS[] = {
5281           ARM::MVE_VRMLALDAVHs32,  ARM::MVE_VRMLALDAVHas32,
5282           ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5283           ARM::MVE_VRMLSLDAVHs32,  ARM::MVE_VRMLSLDAVHas32,
5284           ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5285       };
5286       SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5287                           OpcodesS, OpcodesU);
5288       return;
5289     }
5290 
5291     case Intrinsic::arm_mve_vidup:
5292     case Intrinsic::arm_mve_vidup_predicated: {
5293       static const uint16_t Opcodes[] = {
5294           ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5295       };
5296       SelectMVE_VxDUP(N, Opcodes, false,
5297                       IntNo == Intrinsic::arm_mve_vidup_predicated);
5298       return;
5299     }
5300 
5301     case Intrinsic::arm_mve_vddup:
5302     case Intrinsic::arm_mve_vddup_predicated: {
5303       static const uint16_t Opcodes[] = {
5304           ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5305       };
5306       SelectMVE_VxDUP(N, Opcodes, false,
5307                       IntNo == Intrinsic::arm_mve_vddup_predicated);
5308       return;
5309     }
5310 
5311     case Intrinsic::arm_mve_viwdup:
5312     case Intrinsic::arm_mve_viwdup_predicated: {
5313       static const uint16_t Opcodes[] = {
5314           ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5315       };
5316       SelectMVE_VxDUP(N, Opcodes, true,
5317                       IntNo == Intrinsic::arm_mve_viwdup_predicated);
5318       return;
5319     }
5320 
5321     case Intrinsic::arm_mve_vdwdup:
5322     case Intrinsic::arm_mve_vdwdup_predicated: {
5323       static const uint16_t Opcodes[] = {
5324           ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5325       };
5326       SelectMVE_VxDUP(N, Opcodes, true,
5327                       IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5328       return;
5329     }
5330 
5331     case Intrinsic::arm_cde_cx1d:
5332     case Intrinsic::arm_cde_cx1da:
5333     case Intrinsic::arm_cde_cx2d:
5334     case Intrinsic::arm_cde_cx2da:
5335     case Intrinsic::arm_cde_cx3d:
5336     case Intrinsic::arm_cde_cx3da: {
5337       bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5338                       IntNo == Intrinsic::arm_cde_cx2da ||
5339                       IntNo == Intrinsic::arm_cde_cx3da;
5340       size_t NumExtraOps;
5341       uint16_t Opcode;
5342       switch (IntNo) {
5343       case Intrinsic::arm_cde_cx1d:
5344       case Intrinsic::arm_cde_cx1da:
5345         NumExtraOps = 0;
5346         Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5347         break;
5348       case Intrinsic::arm_cde_cx2d:
5349       case Intrinsic::arm_cde_cx2da:
5350         NumExtraOps = 1;
5351         Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5352         break;
5353       case Intrinsic::arm_cde_cx3d:
5354       case Intrinsic::arm_cde_cx3da:
5355         NumExtraOps = 2;
5356         Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5357         break;
5358       default:
5359         llvm_unreachable("Unexpected opcode");
5360       }
5361       SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5362       return;
5363     }
5364     }
5365     break;
5366   }
5367 
5368   case ISD::ATOMIC_CMP_SWAP:
5369     SelectCMP_SWAP(N);
5370     return;
5371   }
5372 
5373   SelectCode(N);
5374 }
5375 
5376 // Inspect a register string of the form
5377 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5378 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5379 // and obtain the integer operands from them, adding these operands to the
5380 // provided vector.
5381 static void getIntOperandsFromRegisterString(StringRef RegString,
5382                                              SelectionDAG *CurDAG,
5383                                              const SDLoc &DL,
5384                                              std::vector<SDValue> &Ops) {
5385   SmallVector<StringRef, 5> Fields;
5386   RegString.split(Fields, ':');
5387 
5388   if (Fields.size() > 1) {
5389     bool AllIntFields = true;
5390 
5391     for (StringRef Field : Fields) {
5392       // Need to trim out leading 'cp' characters and get the integer field.
5393       unsigned IntField;
5394       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5395       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5396     }
5397 
5398     assert(AllIntFields &&
5399             "Unexpected non-integer value in special register string.");
5400     (void)AllIntFields;
5401   }
5402 }
5403 
5404 // Maps a Banked Register string to its mask value. The mask value returned is
5405 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5406 // mask operand, which expresses which register is to be used, e.g. r8, and in
5407 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5408 // was invalid.
5409 static inline int getBankedRegisterMask(StringRef RegString) {
5410   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5411   if (!TheReg)
5412      return -1;
5413   return TheReg->Encoding;
5414 }
5415 
5416 // The flags here are common to those allowed for apsr in the A class cores and
5417 // those allowed for the special registers in the M class cores. Returns a
5418 // value representing which flags were present, -1 if invalid.
5419 static inline int getMClassFlagsMask(StringRef Flags) {
5420   return StringSwitch<int>(Flags)
5421           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5422                          // correct when flags are not permitted
5423           .Case("g", 0x1)
5424           .Case("nzcvq", 0x2)
5425           .Case("nzcvqg", 0x3)
5426           .Default(-1);
5427 }
5428 
5429 // Maps MClass special registers string to its value for use in the
5430 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5431 // Returns -1 to signify that the string was invalid.
5432 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5433   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5434   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5435   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5436     return -1;
5437   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5438 }
5439 
5440 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5441   // The mask operand contains the special register (R Bit) in bit 4, whether
5442   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5443   // bits 3-0 contains the fields to be accessed in the special register, set by
5444   // the flags provided with the register.
5445   int Mask = 0;
5446   if (Reg == "apsr") {
5447     // The flags permitted for apsr are the same flags that are allowed in
5448     // M class registers. We get the flag value and then shift the flags into
5449     // the correct place to combine with the mask.
5450     Mask = getMClassFlagsMask(Flags);
5451     if (Mask == -1)
5452       return -1;
5453     return Mask << 2;
5454   }
5455 
5456   if (Reg != "cpsr" && Reg != "spsr") {
5457     return -1;
5458   }
5459 
5460   // This is the same as if the flags were "fc"
5461   if (Flags.empty() || Flags == "all")
5462     return Mask | 0x9;
5463 
5464   // Inspect the supplied flags string and set the bits in the mask for
5465   // the relevant and valid flags allowed for cpsr and spsr.
5466   for (char Flag : Flags) {
5467     int FlagVal;
5468     switch (Flag) {
5469       case 'c':
5470         FlagVal = 0x1;
5471         break;
5472       case 'x':
5473         FlagVal = 0x2;
5474         break;
5475       case 's':
5476         FlagVal = 0x4;
5477         break;
5478       case 'f':
5479         FlagVal = 0x8;
5480         break;
5481       default:
5482         FlagVal = 0;
5483     }
5484 
5485     // This avoids allowing strings where the same flag bit appears twice.
5486     if (!FlagVal || (Mask & FlagVal))
5487       return -1;
5488     Mask |= FlagVal;
5489   }
5490 
5491   // If the register is spsr then we need to set the R bit.
5492   if (Reg == "spsr")
5493     Mask |= 0x10;
5494 
5495   return Mask;
5496 }
5497 
5498 // Lower the read_register intrinsic to ARM specific DAG nodes
5499 // using the supplied metadata string to select the instruction node to use
5500 // and the registers/masks to construct as operands for the node.
5501 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5502   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
5503   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
5504   bool IsThumb2 = Subtarget->isThumb2();
5505   SDLoc DL(N);
5506 
5507   std::vector<SDValue> Ops;
5508   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5509 
5510   if (!Ops.empty()) {
5511     // If the special register string was constructed of fields (as defined
5512     // in the ACLE) then need to lower to MRC node (32 bit) or
5513     // MRRC node(64 bit), we can make the distinction based on the number of
5514     // operands we have.
5515     unsigned Opcode;
5516     SmallVector<EVT, 3> ResTypes;
5517     if (Ops.size() == 5){
5518       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5519       ResTypes.append({ MVT::i32, MVT::Other });
5520     } else {
5521       assert(Ops.size() == 3 &&
5522               "Invalid number of fields in special register string.");
5523       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5524       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5525     }
5526 
5527     Ops.push_back(getAL(CurDAG, DL));
5528     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5529     Ops.push_back(N->getOperand(0));
5530     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5531     return true;
5532   }
5533 
5534   std::string SpecialReg = RegString->getString().lower();
5535 
5536   int BankedReg = getBankedRegisterMask(SpecialReg);
5537   if (BankedReg != -1) {
5538     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5539             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5540             N->getOperand(0) };
5541     ReplaceNode(
5542         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5543                                   DL, MVT::i32, MVT::Other, Ops));
5544     return true;
5545   }
5546 
5547   // The VFP registers are read by creating SelectionDAG nodes with opcodes
5548   // corresponding to the register that is being read from. So we switch on the
5549   // string to find which opcode we need to use.
5550   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5551                     .Case("fpscr", ARM::VMRS)
5552                     .Case("fpexc", ARM::VMRS_FPEXC)
5553                     .Case("fpsid", ARM::VMRS_FPSID)
5554                     .Case("mvfr0", ARM::VMRS_MVFR0)
5555                     .Case("mvfr1", ARM::VMRS_MVFR1)
5556                     .Case("mvfr2", ARM::VMRS_MVFR2)
5557                     .Case("fpinst", ARM::VMRS_FPINST)
5558                     .Case("fpinst2", ARM::VMRS_FPINST2)
5559                     .Default(0);
5560 
5561   // If an opcode was found then we can lower the read to a VFP instruction.
5562   if (Opcode) {
5563     if (!Subtarget->hasVFP2Base())
5564       return false;
5565     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5566       return false;
5567 
5568     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5569             N->getOperand(0) };
5570     ReplaceNode(N,
5571                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5572     return true;
5573   }
5574 
5575   // If the target is M Class then need to validate that the register string
5576   // is an acceptable value, so check that a mask can be constructed from the
5577   // string.
5578   if (Subtarget->isMClass()) {
5579     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5580     if (SYSmValue == -1)
5581       return false;
5582 
5583     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5584                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5585                       N->getOperand(0) };
5586     ReplaceNode(
5587         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5588     return true;
5589   }
5590 
5591   // Here we know the target is not M Class so we need to check if it is one
5592   // of the remaining possible values which are apsr, cpsr or spsr.
5593   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5594     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5595             N->getOperand(0) };
5596     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5597                                           DL, MVT::i32, MVT::Other, Ops));
5598     return true;
5599   }
5600 
5601   if (SpecialReg == "spsr") {
5602     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5603             N->getOperand(0) };
5604     ReplaceNode(
5605         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5606                                   MVT::i32, MVT::Other, Ops));
5607     return true;
5608   }
5609 
5610   return false;
5611 }
5612 
5613 // Lower the write_register intrinsic to ARM specific DAG nodes
5614 // using the supplied metadata string to select the instruction node to use
5615 // and the registers/masks to use in the nodes
5616 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5617   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
5618   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
5619   bool IsThumb2 = Subtarget->isThumb2();
5620   SDLoc DL(N);
5621 
5622   std::vector<SDValue> Ops;
5623   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5624 
5625   if (!Ops.empty()) {
5626     // If the special register string was constructed of fields (as defined
5627     // in the ACLE) then need to lower to MCR node (32 bit) or
5628     // MCRR node(64 bit), we can make the distinction based on the number of
5629     // operands we have.
5630     unsigned Opcode;
5631     if (Ops.size() == 5) {
5632       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5633       Ops.insert(Ops.begin()+2, N->getOperand(2));
5634     } else {
5635       assert(Ops.size() == 3 &&
5636               "Invalid number of fields in special register string.");
5637       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5638       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5639       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5640     }
5641 
5642     Ops.push_back(getAL(CurDAG, DL));
5643     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5644     Ops.push_back(N->getOperand(0));
5645 
5646     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5647     return true;
5648   }
5649 
5650   std::string SpecialReg = RegString->getString().lower();
5651   int BankedReg = getBankedRegisterMask(SpecialReg);
5652   if (BankedReg != -1) {
5653     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5654             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5655             N->getOperand(0) };
5656     ReplaceNode(
5657         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5658                                   DL, MVT::Other, Ops));
5659     return true;
5660   }
5661 
5662   // The VFP registers are written to by creating SelectionDAG nodes with
5663   // opcodes corresponding to the register that is being written. So we switch
5664   // on the string to find which opcode we need to use.
5665   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5666                     .Case("fpscr", ARM::VMSR)
5667                     .Case("fpexc", ARM::VMSR_FPEXC)
5668                     .Case("fpsid", ARM::VMSR_FPSID)
5669                     .Case("fpinst", ARM::VMSR_FPINST)
5670                     .Case("fpinst2", ARM::VMSR_FPINST2)
5671                     .Default(0);
5672 
5673   if (Opcode) {
5674     if (!Subtarget->hasVFP2Base())
5675       return false;
5676     Ops = { N->getOperand(2), getAL(CurDAG, DL),
5677             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5678     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5679     return true;
5680   }
5681 
5682   std::pair<StringRef, StringRef> Fields;
5683   Fields = StringRef(SpecialReg).rsplit('_');
5684   std::string Reg = Fields.first.str();
5685   StringRef Flags = Fields.second;
5686 
5687   // If the target was M Class then need to validate the special register value
5688   // and retrieve the mask for use in the instruction node.
5689   if (Subtarget->isMClass()) {
5690     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5691     if (SYSmValue == -1)
5692       return false;
5693 
5694     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5695                       N->getOperand(2), getAL(CurDAG, DL),
5696                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5697     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5698     return true;
5699   }
5700 
5701   // We then check to see if a valid mask can be constructed for one of the
5702   // register string values permitted for the A and R class cores. These values
5703   // are apsr, spsr and cpsr; these are also valid on older cores.
5704   int Mask = getARClassRegisterMask(Reg, Flags);
5705   if (Mask != -1) {
5706     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5707             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5708             N->getOperand(0) };
5709     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5710                                           DL, MVT::Other, Ops));
5711     return true;
5712   }
5713 
5714   return false;
5715 }
5716 
5717 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5718   std::vector<SDValue> AsmNodeOperands;
5719   unsigned Flag, Kind;
5720   bool Changed = false;
5721   unsigned NumOps = N->getNumOperands();
5722 
5723   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5724   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5725   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5726   // respectively. Since there is no constraint to explicitly specify a
5727   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5728   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5729   // them into a GPRPair.
5730 
5731   SDLoc dl(N);
5732   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
5733                                    : SDValue(nullptr,0);
5734 
5735   SmallVector<bool, 8> OpChanged;
5736   // Glue node will be appended late.
5737   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5738     SDValue op = N->getOperand(i);
5739     AsmNodeOperands.push_back(op);
5740 
5741     if (i < InlineAsm::Op_FirstOperand)
5742       continue;
5743 
5744     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
5745       Flag = C->getZExtValue();
5746       Kind = InlineAsm::getKind(Flag);
5747     }
5748     else
5749       continue;
5750 
5751     // Immediate operands to inline asm in the SelectionDAG are modeled with
5752     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
5753     // the second is a constant with the value of the immediate. If we get here
5754     // and we have a Kind_Imm, skip the next operand, and continue.
5755     if (Kind == InlineAsm::Kind_Imm) {
5756       SDValue op = N->getOperand(++i);
5757       AsmNodeOperands.push_back(op);
5758       continue;
5759     }
5760 
5761     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
5762     if (NumRegs)
5763       OpChanged.push_back(false);
5764 
5765     unsigned DefIdx = 0;
5766     bool IsTiedToChangedOp = false;
5767     // If it's a use that is tied with a previous def, it has no
5768     // reg class constraint.
5769     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
5770       IsTiedToChangedOp = OpChanged[DefIdx];
5771 
5772     // Memory operands to inline asm in the SelectionDAG are modeled with two
5773     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
5774     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
5775     // it doesn't get misinterpreted), and continue. We do this here because
5776     // it's important to update the OpChanged array correctly before moving on.
5777     if (Kind == InlineAsm::Kind_Mem) {
5778       SDValue op = N->getOperand(++i);
5779       AsmNodeOperands.push_back(op);
5780       continue;
5781     }
5782 
5783     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
5784         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
5785       continue;
5786 
5787     unsigned RC;
5788     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
5789     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5790         || NumRegs != 2)
5791       continue;
5792 
5793     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5794     SDValue V0 = N->getOperand(i+1);
5795     SDValue V1 = N->getOperand(i+2);
5796     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
5797     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
5798     SDValue PairedReg;
5799     MachineRegisterInfo &MRI = MF->getRegInfo();
5800 
5801     if (Kind == InlineAsm::Kind_RegDef ||
5802         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
5803       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5804       // the original GPRs.
5805 
5806       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5807       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5808       SDValue Chain = SDValue(N,0);
5809 
5810       SDNode *GU = N->getGluedUser();
5811       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5812                                                Chain.getValue(1));
5813 
5814       // Extract values from a GPRPair reg and copy to the original GPR reg.
5815       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5816                                                     RegCopy);
5817       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5818                                                     RegCopy);
5819       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5820                                         RegCopy.getValue(1));
5821       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5822 
5823       // Update the original glue user.
5824       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5825       Ops.push_back(T1.getValue(1));
5826       CurDAG->UpdateNodeOperands(GU, Ops);
5827     }
5828     else {
5829       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
5830       // GPRPair and then pass the GPRPair to the inline asm.
5831       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5832 
5833       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5834       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5835                                           Chain.getValue(1));
5836       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5837                                           T0.getValue(1));
5838       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5839 
5840       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5841       // i32 VRs of inline asm with it.
5842       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5843       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5844       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5845 
5846       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5847       Glue = Chain.getValue(1);
5848     }
5849 
5850     Changed = true;
5851 
5852     if(PairedReg.getNode()) {
5853       OpChanged[OpChanged.size() -1 ] = true;
5854       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
5855       if (IsTiedToChangedOp)
5856         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
5857       else
5858         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
5859       // Replace the current flag.
5860       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5861           Flag, dl, MVT::i32);
5862       // Add the new register node and skip the original two GPRs.
5863       AsmNodeOperands.push_back(PairedReg);
5864       // Skip the next two GPRs.
5865       i += 2;
5866     }
5867   }
5868 
5869   if (Glue.getNode())
5870     AsmNodeOperands.push_back(Glue);
5871   if (!Changed)
5872     return false;
5873 
5874   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5875       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5876   New->setNodeId(-1);
5877   ReplaceNode(N, New.getNode());
5878   return true;
5879 }
5880 
5881 
5882 bool ARMDAGToDAGISel::
5883 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
5884                              std::vector<SDValue> &OutOps) {
5885   switch(ConstraintID) {
5886   default:
5887     llvm_unreachable("Unexpected asm memory constraint");
5888   case InlineAsm::Constraint_m:
5889   case InlineAsm::Constraint_o:
5890   case InlineAsm::Constraint_Q:
5891   case InlineAsm::Constraint_Um:
5892   case InlineAsm::Constraint_Un:
5893   case InlineAsm::Constraint_Uq:
5894   case InlineAsm::Constraint_Us:
5895   case InlineAsm::Constraint_Ut:
5896   case InlineAsm::Constraint_Uv:
5897   case InlineAsm::Constraint_Uy:
5898     // Require the address to be in a register.  That is safe for all ARM
5899     // variants and it is hard to do anything much smarter without knowing
5900     // how the operand is used.
5901     OutOps.push_back(Op);
5902     return false;
5903   }
5904   return true;
5905 }
5906 
5907 /// createARMISelDag - This pass converts a legalized DAG into a
5908 /// ARM-specific DAG, ready for instruction scheduling.
5909 ///
5910 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5911                                      CodeGenOpt::Level OptLevel) {
5912   return new ARMDAGToDAGISel(TM, OptLevel);
5913 }
5914