xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (revision 7029da5c36f2d3cf6bb6c81bf551229f416399e8)
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/StringSwitch.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineFunction.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/CodeGen/TargetLowering.h"
26 #include "llvm/IR/CallingConv.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "arm-isel"
40 
41 static cl::opt<bool>
42 DisableShifterOp("disable-shifter-op", cl::Hidden,
43   cl::desc("Disable isel of shifter-op"),
44   cl::init(false));
45 
46 //===--------------------------------------------------------------------===//
47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
48 /// instructions for SelectionDAG operations.
49 ///
50 namespace {
51 
52 class ARMDAGToDAGISel : public SelectionDAGISel {
53   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
54   /// make the right decision when generating code for different targets.
55   const ARMSubtarget *Subtarget;
56 
57 public:
58   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
59       : SelectionDAGISel(tm, OptLevel) {}
60 
61   bool runOnMachineFunction(MachineFunction &MF) override {
62     // Reset the subtarget each time through.
63     Subtarget = &MF.getSubtarget<ARMSubtarget>();
64     SelectionDAGISel::runOnMachineFunction(MF);
65     return true;
66   }
67 
68   StringRef getPassName() const override { return "ARM Instruction Selection"; }
69 
70   void PreprocessISelDAG() override;
71 
72   /// getI32Imm - Return a target constant of type i32 with the specified
73   /// value.
74   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
75     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
76   }
77 
78   void Select(SDNode *N) override;
79 
80   bool hasNoVMLxHazardUse(SDNode *N) const;
81   bool isShifterOpProfitable(const SDValue &Shift,
82                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
83   bool SelectRegShifterOperand(SDValue N, SDValue &A,
84                                SDValue &B, SDValue &C,
85                                bool CheckProfitability = true);
86   bool SelectImmShifterOperand(SDValue N, SDValue &A,
87                                SDValue &B, bool CheckProfitability = true);
88   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
89                                     SDValue &B, SDValue &C) {
90     // Don't apply the profitability check
91     return SelectRegShifterOperand(N, A, B, C, false);
92   }
93   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
94                                     SDValue &B) {
95     // Don't apply the profitability check
96     return SelectImmShifterOperand(N, A, B, false);
97   }
98 
99   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
100 
101   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
102   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
103 
104   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
105     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
106     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
107     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
108     return true;
109   }
110 
111   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
112                              SDValue &Offset, SDValue &Opc);
113   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
114                              SDValue &Offset, SDValue &Opc);
115   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
116                              SDValue &Offset, SDValue &Opc);
117   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
118   bool SelectAddrMode3(SDValue N, SDValue &Base,
119                        SDValue &Offset, SDValue &Opc);
120   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
121                              SDValue &Offset, SDValue &Opc);
122   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
123   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
124   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
125   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
126   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
127 
128   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
129 
130   // Thumb Addressing Modes:
131   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
132   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
133   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
134                                 SDValue &OffImm);
135   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
136                                  SDValue &OffImm);
137   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
138                                  SDValue &OffImm);
139   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
140                                  SDValue &OffImm);
141   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
142 
143   // Thumb 2 Addressing Modes:
144   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
145   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
146                             SDValue &OffImm);
147   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
148                                  SDValue &OffImm);
149   template<unsigned Shift>
150   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
151                             SDValue &OffImm);
152   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
153                              SDValue &OffReg, SDValue &ShImm);
154   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
155 
156   inline bool is_so_imm(unsigned Imm) const {
157     return ARM_AM::getSOImmVal(Imm) != -1;
158   }
159 
160   inline bool is_so_imm_not(unsigned Imm) const {
161     return ARM_AM::getSOImmVal(~Imm) != -1;
162   }
163 
164   inline bool is_t2_so_imm(unsigned Imm) const {
165     return ARM_AM::getT2SOImmVal(Imm) != -1;
166   }
167 
168   inline bool is_t2_so_imm_not(unsigned Imm) const {
169     return ARM_AM::getT2SOImmVal(~Imm) != -1;
170   }
171 
172   // Include the pieces autogenerated from the target description.
173 #include "ARMGenDAGISel.inc"
174 
175 private:
176   void transferMemOperands(SDNode *Src, SDNode *Dst);
177 
178   /// Indexed (pre/post inc/dec) load matching code for ARM.
179   bool tryARMIndexedLoad(SDNode *N);
180   bool tryT1IndexedLoad(SDNode *N);
181   bool tryT2IndexedLoad(SDNode *N);
182 
183   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
184   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
185   /// loads of D registers and even subregs and odd subregs of Q registers.
186   /// For NumVecs <= 2, QOpcodes1 is not used.
187   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
188                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
189                  const uint16_t *QOpcodes1);
190 
191   /// SelectVST - Select NEON store intrinsics.  NumVecs should
192   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
193   /// stores of D registers and even subregs and odd subregs of Q registers.
194   /// For NumVecs <= 2, QOpcodes1 is not used.
195   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
196                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
197                  const uint16_t *QOpcodes1);
198 
199   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
200   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
201   /// load/store of D registers and Q registers.
202   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
203                        unsigned NumVecs, const uint16_t *DOpcodes,
204                        const uint16_t *QOpcodes);
205 
206   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
207   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
208   /// for loading D registers.
209   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
210                     unsigned NumVecs, const uint16_t *DOpcodes,
211                     const uint16_t *QOpcodes0 = nullptr,
212                     const uint16_t *QOpcodes1 = nullptr);
213 
214   /// Try to select SBFX/UBFX instructions for ARM.
215   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
216 
217   // Select special operations if node forms integer ABS pattern
218   bool tryABSOp(SDNode *N);
219 
220   bool tryReadRegister(SDNode *N);
221   bool tryWriteRegister(SDNode *N);
222 
223   bool tryInlineAsm(SDNode *N);
224 
225   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
226 
227   void SelectCMP_SWAP(SDNode *N);
228 
229   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
230   /// inline asm expressions.
231   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
232                                     std::vector<SDValue> &OutOps) override;
233 
234   // Form pairs of consecutive R, S, D, or Q registers.
235   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
236   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
237   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
238   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
239 
240   // Form sequences of 4 consecutive S, D, or Q registers.
241   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
242   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
243   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
244 
245   // Get the alignment operand for a NEON VLD or VST instruction.
246   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
247                         bool is64BitVector);
248 
249   /// Returns the number of instructions required to materialize the given
250   /// constant in a register, or 3 if a literal pool load is needed.
251   unsigned ConstantMaterializationCost(unsigned Val) const;
252 
253   /// Checks if N is a multiplication by a constant where we can extract out a
254   /// power of two from the constant so that it can be used in a shift, but only
255   /// if it simplifies the materialization of the constant. Returns true if it
256   /// is, and assigns to PowerOfTwo the power of two that should be extracted
257   /// out and to NewMulConst the new constant to be multiplied by.
258   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
259                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
260 
261   /// Replace N with M in CurDAG, in a way that also ensures that M gets
262   /// selected when N would have been selected.
263   void replaceDAGValue(const SDValue &N, SDValue M);
264 };
265 }
266 
267 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
268 /// operand. If so Imm will receive the 32-bit value.
269 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
270   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
271     Imm = cast<ConstantSDNode>(N)->getZExtValue();
272     return true;
273   }
274   return false;
275 }
276 
277 // isInt32Immediate - This method tests to see if a constant operand.
278 // If so Imm will receive the 32 bit value.
279 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
280   return isInt32Immediate(N.getNode(), Imm);
281 }
282 
283 // isOpcWithIntImmediate - This method tests to see if the node is a specific
284 // opcode and that it has a immediate integer right operand.
285 // If so Imm will receive the 32 bit value.
286 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
287   return N->getOpcode() == Opc &&
288          isInt32Immediate(N->getOperand(1).getNode(), Imm);
289 }
290 
291 /// Check whether a particular node is a constant value representable as
292 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
293 ///
294 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
295 static bool isScaledConstantInRange(SDValue Node, int Scale,
296                                     int RangeMin, int RangeMax,
297                                     int &ScaledConstant) {
298   assert(Scale > 0 && "Invalid scale!");
299 
300   // Check that this is a constant.
301   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
302   if (!C)
303     return false;
304 
305   ScaledConstant = (int) C->getZExtValue();
306   if ((ScaledConstant % Scale) != 0)
307     return false;
308 
309   ScaledConstant /= Scale;
310   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
311 }
312 
313 void ARMDAGToDAGISel::PreprocessISelDAG() {
314   if (!Subtarget->hasV6T2Ops())
315     return;
316 
317   bool isThumb2 = Subtarget->isThumb();
318   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
319        E = CurDAG->allnodes_end(); I != E; ) {
320     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
321 
322     if (N->getOpcode() != ISD::ADD)
323       continue;
324 
325     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
326     // leading zeros, followed by consecutive set bits, followed by 1 or 2
327     // trailing zeros, e.g. 1020.
328     // Transform the expression to
329     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
330     // of trailing zeros of c2. The left shift would be folded as an shifter
331     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
332     // node (UBFX).
333 
334     SDValue N0 = N->getOperand(0);
335     SDValue N1 = N->getOperand(1);
336     unsigned And_imm = 0;
337     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
338       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
339         std::swap(N0, N1);
340     }
341     if (!And_imm)
342       continue;
343 
344     // Check if the AND mask is an immediate of the form: 000.....1111111100
345     unsigned TZ = countTrailingZeros(And_imm);
346     if (TZ != 1 && TZ != 2)
347       // Be conservative here. Shifter operands aren't always free. e.g. On
348       // Swift, left shifter operand of 1 / 2 for free but others are not.
349       // e.g.
350       //  ubfx   r3, r1, #16, #8
351       //  ldr.w  r3, [r0, r3, lsl #2]
352       // vs.
353       //  mov.w  r9, #1020
354       //  and.w  r2, r9, r1, lsr #14
355       //  ldr    r2, [r0, r2]
356       continue;
357     And_imm >>= TZ;
358     if (And_imm & (And_imm + 1))
359       continue;
360 
361     // Look for (and (srl X, c1), c2).
362     SDValue Srl = N1.getOperand(0);
363     unsigned Srl_imm = 0;
364     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
365         (Srl_imm <= 2))
366       continue;
367 
368     // Make sure first operand is not a shifter operand which would prevent
369     // folding of the left shift.
370     SDValue CPTmp0;
371     SDValue CPTmp1;
372     SDValue CPTmp2;
373     if (isThumb2) {
374       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
375         continue;
376     } else {
377       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
378           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
379         continue;
380     }
381 
382     // Now make the transformation.
383     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
384                           Srl.getOperand(0),
385                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
386                                               MVT::i32));
387     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
388                          Srl,
389                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
390     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
391                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
392     CurDAG->UpdateNodeOperands(N, N0, N1);
393   }
394 }
395 
396 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
397 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
398 /// least on current ARM implementations) which should be avoidded.
399 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
400   if (OptLevel == CodeGenOpt::None)
401     return true;
402 
403   if (!Subtarget->hasVMLxHazards())
404     return true;
405 
406   if (!N->hasOneUse())
407     return false;
408 
409   SDNode *Use = *N->use_begin();
410   if (Use->getOpcode() == ISD::CopyToReg)
411     return true;
412   if (Use->isMachineOpcode()) {
413     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
414         CurDAG->getSubtarget().getInstrInfo());
415 
416     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
417     if (MCID.mayStore())
418       return true;
419     unsigned Opcode = MCID.getOpcode();
420     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
421       return true;
422     // vmlx feeding into another vmlx. We actually want to unfold
423     // the use later in the MLxExpansion pass. e.g.
424     // vmla
425     // vmla (stall 8 cycles)
426     //
427     // vmul (5 cycles)
428     // vadd (5 cycles)
429     // vmla
430     // This adds up to about 18 - 19 cycles.
431     //
432     // vmla
433     // vmul (stall 4 cycles)
434     // vadd adds up to about 14 cycles.
435     return TII->isFpMLxInstruction(Opcode);
436   }
437 
438   return false;
439 }
440 
441 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
442                                             ARM_AM::ShiftOpc ShOpcVal,
443                                             unsigned ShAmt) {
444   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
445     return true;
446   if (Shift.hasOneUse())
447     return true;
448   // R << 2 is free.
449   return ShOpcVal == ARM_AM::lsl &&
450          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
451 }
452 
453 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
454   if (Subtarget->isThumb()) {
455     if (Val <= 255) return 1;                               // MOV
456     if (Subtarget->hasV6T2Ops() &&
457         (Val <= 0xffff ||                                   // MOV
458          ARM_AM::getT2SOImmVal(Val) != -1 ||                // MOVW
459          ARM_AM::getT2SOImmVal(~Val) != -1))                // MVN
460       return 1;
461     if (Val <= 510) return 2;                               // MOV + ADDi8
462     if (~Val <= 255) return 2;                              // MOV + MVN
463     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
464   } else {
465     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
466     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
467     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
468     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
469   }
470   if (Subtarget->useMovt()) return 2; // MOVW + MOVT
471   return 3; // Literal pool load
472 }
473 
474 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
475                                              unsigned MaxShift,
476                                              unsigned &PowerOfTwo,
477                                              SDValue &NewMulConst) const {
478   assert(N.getOpcode() == ISD::MUL);
479   assert(MaxShift > 0);
480 
481   // If the multiply is used in more than one place then changing the constant
482   // will make other uses incorrect, so don't.
483   if (!N.hasOneUse()) return false;
484   // Check if the multiply is by a constant
485   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
486   if (!MulConst) return false;
487   // If the constant is used in more than one place then modifying it will mean
488   // we need to materialize two constants instead of one, which is a bad idea.
489   if (!MulConst->hasOneUse()) return false;
490   unsigned MulConstVal = MulConst->getZExtValue();
491   if (MulConstVal == 0) return false;
492 
493   // Find the largest power of 2 that MulConstVal is a multiple of
494   PowerOfTwo = MaxShift;
495   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
496     --PowerOfTwo;
497     if (PowerOfTwo == 0) return false;
498   }
499 
500   // Only optimise if the new cost is better
501   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
502   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
503   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
504   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
505   return NewCost < OldCost;
506 }
507 
508 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
509   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
510   ReplaceUses(N, M);
511 }
512 
513 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
514                                               SDValue &BaseReg,
515                                               SDValue &Opc,
516                                               bool CheckProfitability) {
517   if (DisableShifterOp)
518     return false;
519 
520   // If N is a multiply-by-constant and it's profitable to extract a shift and
521   // use it in a shifted operand do so.
522   if (N.getOpcode() == ISD::MUL) {
523     unsigned PowerOfTwo = 0;
524     SDValue NewMulConst;
525     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
526       HandleSDNode Handle(N);
527       SDLoc Loc(N);
528       replaceDAGValue(N.getOperand(1), NewMulConst);
529       BaseReg = Handle.getValue();
530       Opc = CurDAG->getTargetConstant(
531           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
532       return true;
533     }
534   }
535 
536   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
537 
538   // Don't match base register only case. That is matched to a separate
539   // lower complexity pattern with explicit register operand.
540   if (ShOpcVal == ARM_AM::no_shift) return false;
541 
542   BaseReg = N.getOperand(0);
543   unsigned ShImmVal = 0;
544   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
545   if (!RHS) return false;
546   ShImmVal = RHS->getZExtValue() & 31;
547   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
548                                   SDLoc(N), MVT::i32);
549   return true;
550 }
551 
552 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
553                                               SDValue &BaseReg,
554                                               SDValue &ShReg,
555                                               SDValue &Opc,
556                                               bool CheckProfitability) {
557   if (DisableShifterOp)
558     return false;
559 
560   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
561 
562   // Don't match base register only case. That is matched to a separate
563   // lower complexity pattern with explicit register operand.
564   if (ShOpcVal == ARM_AM::no_shift) return false;
565 
566   BaseReg = N.getOperand(0);
567   unsigned ShImmVal = 0;
568   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
569   if (RHS) return false;
570 
571   ShReg = N.getOperand(1);
572   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
573     return false;
574   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
575                                   SDLoc(N), MVT::i32);
576   return true;
577 }
578 
579 // Determine whether an ISD::OR's operands are suitable to turn the operation
580 // into an addition, which often has more compact encodings.
581 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
582   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
583   Out = N;
584   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
585 }
586 
587 
588 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
589                                           SDValue &Base,
590                                           SDValue &OffImm) {
591   // Match simple R + imm12 operands.
592 
593   // Base only.
594   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
595       !CurDAG->isBaseWithConstantOffset(N)) {
596     if (N.getOpcode() == ISD::FrameIndex) {
597       // Match frame index.
598       int FI = cast<FrameIndexSDNode>(N)->getIndex();
599       Base = CurDAG->getTargetFrameIndex(
600           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
601       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
602       return true;
603     }
604 
605     if (N.getOpcode() == ARMISD::Wrapper &&
606         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
607         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
608         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
609       Base = N.getOperand(0);
610     } else
611       Base = N;
612     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
613     return true;
614   }
615 
616   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
617     int RHSC = (int)RHS->getSExtValue();
618     if (N.getOpcode() == ISD::SUB)
619       RHSC = -RHSC;
620 
621     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
622       Base   = N.getOperand(0);
623       if (Base.getOpcode() == ISD::FrameIndex) {
624         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
625         Base = CurDAG->getTargetFrameIndex(
626             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
627       }
628       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
629       return true;
630     }
631   }
632 
633   // Base only.
634   Base = N;
635   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
636   return true;
637 }
638 
639 
640 
641 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
642                                       SDValue &Opc) {
643   if (N.getOpcode() == ISD::MUL &&
644       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
645     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
646       // X * [3,5,9] -> X + X * [2,4,8] etc.
647       int RHSC = (int)RHS->getZExtValue();
648       if (RHSC & 1) {
649         RHSC = RHSC & ~1;
650         ARM_AM::AddrOpc AddSub = ARM_AM::add;
651         if (RHSC < 0) {
652           AddSub = ARM_AM::sub;
653           RHSC = - RHSC;
654         }
655         if (isPowerOf2_32(RHSC)) {
656           unsigned ShAmt = Log2_32(RHSC);
657           Base = Offset = N.getOperand(0);
658           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
659                                                             ARM_AM::lsl),
660                                           SDLoc(N), MVT::i32);
661           return true;
662         }
663       }
664     }
665   }
666 
667   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
668       // ISD::OR that is equivalent to an ISD::ADD.
669       !CurDAG->isBaseWithConstantOffset(N))
670     return false;
671 
672   // Leave simple R +/- imm12 operands for LDRi12
673   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
674     int RHSC;
675     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
676                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
677       return false;
678   }
679 
680   // Otherwise this is R +/- [possibly shifted] R.
681   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
682   ARM_AM::ShiftOpc ShOpcVal =
683     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
684   unsigned ShAmt = 0;
685 
686   Base   = N.getOperand(0);
687   Offset = N.getOperand(1);
688 
689   if (ShOpcVal != ARM_AM::no_shift) {
690     // Check to see if the RHS of the shift is a constant, if not, we can't fold
691     // it.
692     if (ConstantSDNode *Sh =
693            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
694       ShAmt = Sh->getZExtValue();
695       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
696         Offset = N.getOperand(1).getOperand(0);
697       else {
698         ShAmt = 0;
699         ShOpcVal = ARM_AM::no_shift;
700       }
701     } else {
702       ShOpcVal = ARM_AM::no_shift;
703     }
704   }
705 
706   // Try matching (R shl C) + (R).
707   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
708       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
709         N.getOperand(0).hasOneUse())) {
710     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
711     if (ShOpcVal != ARM_AM::no_shift) {
712       // Check to see if the RHS of the shift is a constant, if not, we can't
713       // fold it.
714       if (ConstantSDNode *Sh =
715           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
716         ShAmt = Sh->getZExtValue();
717         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
718           Offset = N.getOperand(0).getOperand(0);
719           Base = N.getOperand(1);
720         } else {
721           ShAmt = 0;
722           ShOpcVal = ARM_AM::no_shift;
723         }
724       } else {
725         ShOpcVal = ARM_AM::no_shift;
726       }
727     }
728   }
729 
730   // If Offset is a multiply-by-constant and it's profitable to extract a shift
731   // and use it in a shifted operand do so.
732   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
733     unsigned PowerOfTwo = 0;
734     SDValue NewMulConst;
735     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
736       HandleSDNode Handle(Offset);
737       replaceDAGValue(Offset.getOperand(1), NewMulConst);
738       Offset = Handle.getValue();
739       ShAmt = PowerOfTwo;
740       ShOpcVal = ARM_AM::lsl;
741     }
742   }
743 
744   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
745                                   SDLoc(N), MVT::i32);
746   return true;
747 }
748 
749 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
750                                             SDValue &Offset, SDValue &Opc) {
751   unsigned Opcode = Op->getOpcode();
752   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
753     ? cast<LoadSDNode>(Op)->getAddressingMode()
754     : cast<StoreSDNode>(Op)->getAddressingMode();
755   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
756     ? ARM_AM::add : ARM_AM::sub;
757   int Val;
758   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
759     return false;
760 
761   Offset = N;
762   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
763   unsigned ShAmt = 0;
764   if (ShOpcVal != ARM_AM::no_shift) {
765     // Check to see if the RHS of the shift is a constant, if not, we can't fold
766     // it.
767     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
768       ShAmt = Sh->getZExtValue();
769       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
770         Offset = N.getOperand(0);
771       else {
772         ShAmt = 0;
773         ShOpcVal = ARM_AM::no_shift;
774       }
775     } else {
776       ShOpcVal = ARM_AM::no_shift;
777     }
778   }
779 
780   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
781                                   SDLoc(N), MVT::i32);
782   return true;
783 }
784 
785 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
786                                             SDValue &Offset, SDValue &Opc) {
787   unsigned Opcode = Op->getOpcode();
788   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
789     ? cast<LoadSDNode>(Op)->getAddressingMode()
790     : cast<StoreSDNode>(Op)->getAddressingMode();
791   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
792     ? ARM_AM::add : ARM_AM::sub;
793   int Val;
794   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
795     if (AddSub == ARM_AM::sub) Val *= -1;
796     Offset = CurDAG->getRegister(0, MVT::i32);
797     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
798     return true;
799   }
800 
801   return false;
802 }
803 
804 
805 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
806                                             SDValue &Offset, SDValue &Opc) {
807   unsigned Opcode = Op->getOpcode();
808   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
809     ? cast<LoadSDNode>(Op)->getAddressingMode()
810     : cast<StoreSDNode>(Op)->getAddressingMode();
811   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
812     ? ARM_AM::add : ARM_AM::sub;
813   int Val;
814   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
815     Offset = CurDAG->getRegister(0, MVT::i32);
816     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
817                                                       ARM_AM::no_shift),
818                                     SDLoc(Op), MVT::i32);
819     return true;
820   }
821 
822   return false;
823 }
824 
825 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
826   Base = N;
827   return true;
828 }
829 
830 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
831                                       SDValue &Base, SDValue &Offset,
832                                       SDValue &Opc) {
833   if (N.getOpcode() == ISD::SUB) {
834     // X - C  is canonicalize to X + -C, no need to handle it here.
835     Base = N.getOperand(0);
836     Offset = N.getOperand(1);
837     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
838                                     MVT::i32);
839     return true;
840   }
841 
842   if (!CurDAG->isBaseWithConstantOffset(N)) {
843     Base = N;
844     if (N.getOpcode() == ISD::FrameIndex) {
845       int FI = cast<FrameIndexSDNode>(N)->getIndex();
846       Base = CurDAG->getTargetFrameIndex(
847           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
848     }
849     Offset = CurDAG->getRegister(0, MVT::i32);
850     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
851                                     MVT::i32);
852     return true;
853   }
854 
855   // If the RHS is +/- imm8, fold into addr mode.
856   int RHSC;
857   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
858                               -256 + 1, 256, RHSC)) { // 8 bits.
859     Base = N.getOperand(0);
860     if (Base.getOpcode() == ISD::FrameIndex) {
861       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
862       Base = CurDAG->getTargetFrameIndex(
863           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
864     }
865     Offset = CurDAG->getRegister(0, MVT::i32);
866 
867     ARM_AM::AddrOpc AddSub = ARM_AM::add;
868     if (RHSC < 0) {
869       AddSub = ARM_AM::sub;
870       RHSC = -RHSC;
871     }
872     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
873                                     MVT::i32);
874     return true;
875   }
876 
877   Base = N.getOperand(0);
878   Offset = N.getOperand(1);
879   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
880                                   MVT::i32);
881   return true;
882 }
883 
884 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
885                                             SDValue &Offset, SDValue &Opc) {
886   unsigned Opcode = Op->getOpcode();
887   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
888     ? cast<LoadSDNode>(Op)->getAddressingMode()
889     : cast<StoreSDNode>(Op)->getAddressingMode();
890   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
891     ? ARM_AM::add : ARM_AM::sub;
892   int Val;
893   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
894     Offset = CurDAG->getRegister(0, MVT::i32);
895     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
896                                     MVT::i32);
897     return true;
898   }
899 
900   Offset = N;
901   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
902                                   MVT::i32);
903   return true;
904 }
905 
906 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
907                                         bool FP16) {
908   if (!CurDAG->isBaseWithConstantOffset(N)) {
909     Base = N;
910     if (N.getOpcode() == ISD::FrameIndex) {
911       int FI = cast<FrameIndexSDNode>(N)->getIndex();
912       Base = CurDAG->getTargetFrameIndex(
913           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
914     } else if (N.getOpcode() == ARMISD::Wrapper &&
915                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
916                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
917                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
918       Base = N.getOperand(0);
919     }
920     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
921                                        SDLoc(N), MVT::i32);
922     return true;
923   }
924 
925   // If the RHS is +/- imm8, fold into addr mode.
926   int RHSC;
927   const int Scale = FP16 ? 2 : 4;
928 
929   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
930     Base = N.getOperand(0);
931     if (Base.getOpcode() == ISD::FrameIndex) {
932       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
933       Base = CurDAG->getTargetFrameIndex(
934           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
935     }
936 
937     ARM_AM::AddrOpc AddSub = ARM_AM::add;
938     if (RHSC < 0) {
939       AddSub = ARM_AM::sub;
940       RHSC = -RHSC;
941     }
942 
943     if (FP16)
944       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
945                                          SDLoc(N), MVT::i32);
946     else
947       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
948                                          SDLoc(N), MVT::i32);
949 
950     return true;
951   }
952 
953   Base = N;
954 
955   if (FP16)
956     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
957                                        SDLoc(N), MVT::i32);
958   else
959     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
960                                        SDLoc(N), MVT::i32);
961 
962   return true;
963 }
964 
965 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
966                                       SDValue &Base, SDValue &Offset) {
967   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
968 }
969 
970 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
971                                           SDValue &Base, SDValue &Offset) {
972   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
973 }
974 
975 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
976                                       SDValue &Align) {
977   Addr = N;
978 
979   unsigned Alignment = 0;
980 
981   MemSDNode *MemN = cast<MemSDNode>(Parent);
982 
983   if (isa<LSBaseSDNode>(MemN) ||
984       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
985         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
986        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
987     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
988     // The maximum alignment is equal to the memory size being referenced.
989     unsigned MMOAlign = MemN->getAlignment();
990     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
991     if (MMOAlign >= MemSize && MemSize > 1)
992       Alignment = MemSize;
993   } else {
994     // All other uses of addrmode6 are for intrinsics.  For now just record
995     // the raw alignment value; it will be refined later based on the legal
996     // alignment operands for the intrinsic.
997     Alignment = MemN->getAlignment();
998   }
999 
1000   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1001   return true;
1002 }
1003 
1004 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1005                                             SDValue &Offset) {
1006   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1007   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1008   if (AM != ISD::POST_INC)
1009     return false;
1010   Offset = N;
1011   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1012     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1013       Offset = CurDAG->getRegister(0, MVT::i32);
1014   }
1015   return true;
1016 }
1017 
1018 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1019                                        SDValue &Offset, SDValue &Label) {
1020   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1021     Offset = N.getOperand(0);
1022     SDValue N1 = N.getOperand(1);
1023     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
1024                                       SDLoc(N), MVT::i32);
1025     return true;
1026   }
1027 
1028   return false;
1029 }
1030 
1031 
1032 //===----------------------------------------------------------------------===//
1033 //                         Thumb Addressing Modes
1034 //===----------------------------------------------------------------------===//
1035 
1036 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1037   // Negative numbers are difficult to materialise in thumb1. If we are
1038   // selecting the add of a negative, instead try to select ri with a zero
1039   // offset, so create the add node directly which will become a sub.
1040   if (N.getOpcode() != ISD::ADD)
1041     return false;
1042 
1043   // Look for an imm which is not legal for ld/st, but is legal for sub.
1044   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1045     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1046 
1047   return false;
1048 }
1049 
1050 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1051                                                 SDValue &Offset) {
1052   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1053     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
1054     if (!NC || !NC->isNullValue())
1055       return false;
1056 
1057     Base = Offset = N;
1058     return true;
1059   }
1060 
1061   Base = N.getOperand(0);
1062   Offset = N.getOperand(1);
1063   return true;
1064 }
1065 
1066 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1067                                             SDValue &Offset) {
1068   if (shouldUseZeroOffsetLdSt(N))
1069     return false; // Select ri instead
1070   return SelectThumbAddrModeRRSext(N, Base, Offset);
1071 }
1072 
1073 bool
1074 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1075                                           SDValue &Base, SDValue &OffImm) {
1076   if (shouldUseZeroOffsetLdSt(N)) {
1077     Base = N;
1078     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1079     return true;
1080   }
1081 
1082   if (!CurDAG->isBaseWithConstantOffset(N)) {
1083     if (N.getOpcode() == ISD::ADD) {
1084       return false; // We want to select register offset instead
1085     } else if (N.getOpcode() == ARMISD::Wrapper &&
1086         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1087         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1088         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1089         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1090       Base = N.getOperand(0);
1091     } else {
1092       Base = N;
1093     }
1094 
1095     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1096     return true;
1097   }
1098 
1099   // If the RHS is + imm5 * scale, fold into addr mode.
1100   int RHSC;
1101   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1102     Base = N.getOperand(0);
1103     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1104     return true;
1105   }
1106 
1107   // Offset is too large, so use register offset instead.
1108   return false;
1109 }
1110 
1111 bool
1112 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1113                                            SDValue &OffImm) {
1114   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1115 }
1116 
1117 bool
1118 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1119                                            SDValue &OffImm) {
1120   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1121 }
1122 
1123 bool
1124 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1125                                            SDValue &OffImm) {
1126   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1127 }
1128 
1129 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1130                                             SDValue &Base, SDValue &OffImm) {
1131   if (N.getOpcode() == ISD::FrameIndex) {
1132     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1133     // Only multiples of 4 are allowed for the offset, so the frame object
1134     // alignment must be at least 4.
1135     MachineFrameInfo &MFI = MF->getFrameInfo();
1136     if (MFI.getObjectAlignment(FI) < 4)
1137       MFI.setObjectAlignment(FI, 4);
1138     Base = CurDAG->getTargetFrameIndex(
1139         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1140     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1141     return true;
1142   }
1143 
1144   if (!CurDAG->isBaseWithConstantOffset(N))
1145     return false;
1146 
1147   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1148     // If the RHS is + imm8 * scale, fold into addr mode.
1149     int RHSC;
1150     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1151       Base = N.getOperand(0);
1152       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1153       // Make sure the offset is inside the object, or we might fail to
1154       // allocate an emergency spill slot. (An out-of-range access is UB, but
1155       // it could show up anyway.)
1156       MachineFrameInfo &MFI = MF->getFrameInfo();
1157       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1158         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1159         // indexed by the LHS must be 4-byte aligned.
1160         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlignment(FI) < 4)
1161           MFI.setObjectAlignment(FI, 4);
1162         if (MFI.getObjectAlignment(FI) >= 4) {
1163           Base = CurDAG->getTargetFrameIndex(
1164               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1165           OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1166           return true;
1167         }
1168       }
1169     }
1170   }
1171 
1172   return false;
1173 }
1174 
1175 
1176 //===----------------------------------------------------------------------===//
1177 //                        Thumb 2 Addressing Modes
1178 //===----------------------------------------------------------------------===//
1179 
1180 
1181 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1182                                             SDValue &Base, SDValue &OffImm) {
1183   // Match simple R + imm12 operands.
1184 
1185   // Base only.
1186   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1187       !CurDAG->isBaseWithConstantOffset(N)) {
1188     if (N.getOpcode() == ISD::FrameIndex) {
1189       // Match frame index.
1190       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1191       Base = CurDAG->getTargetFrameIndex(
1192           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1193       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1194       return true;
1195     }
1196 
1197     if (N.getOpcode() == ARMISD::Wrapper &&
1198         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1199         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1200         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1201       Base = N.getOperand(0);
1202       if (Base.getOpcode() == ISD::TargetConstantPool)
1203         return false;  // We want to select t2LDRpci instead.
1204     } else
1205       Base = N;
1206     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1207     return true;
1208   }
1209 
1210   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1211     if (SelectT2AddrModeImm8(N, Base, OffImm))
1212       // Let t2LDRi8 handle (R - imm8).
1213       return false;
1214 
1215     int RHSC = (int)RHS->getZExtValue();
1216     if (N.getOpcode() == ISD::SUB)
1217       RHSC = -RHSC;
1218 
1219     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1220       Base   = N.getOperand(0);
1221       if (Base.getOpcode() == ISD::FrameIndex) {
1222         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1223         Base = CurDAG->getTargetFrameIndex(
1224             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225       }
1226       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1227       return true;
1228     }
1229   }
1230 
1231   // Base only.
1232   Base = N;
1233   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1234   return true;
1235 }
1236 
1237 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1238                                            SDValue &Base, SDValue &OffImm) {
1239   // Match simple R - imm8 operands.
1240   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1241       !CurDAG->isBaseWithConstantOffset(N))
1242     return false;
1243 
1244   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1245     int RHSC = (int)RHS->getSExtValue();
1246     if (N.getOpcode() == ISD::SUB)
1247       RHSC = -RHSC;
1248 
1249     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1250       Base = N.getOperand(0);
1251       if (Base.getOpcode() == ISD::FrameIndex) {
1252         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253         Base = CurDAG->getTargetFrameIndex(
1254             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1255       }
1256       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1257       return true;
1258     }
1259   }
1260 
1261   return false;
1262 }
1263 
1264 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1265                                                  SDValue &OffImm){
1266   unsigned Opcode = Op->getOpcode();
1267   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1268     ? cast<LoadSDNode>(Op)->getAddressingMode()
1269     : cast<StoreSDNode>(Op)->getAddressingMode();
1270   int RHSC;
1271   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1272     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1273       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1274       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1275     return true;
1276   }
1277 
1278   return false;
1279 }
1280 
1281 template<unsigned Shift>
1282 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
1283                                            SDValue &Base, SDValue &OffImm) {
1284   if (N.getOpcode() == ISD::SUB ||
1285       CurDAG->isBaseWithConstantOffset(N)) {
1286     if (auto RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1287       int RHSC = (int)RHS->getZExtValue();
1288       if (N.getOpcode() == ISD::SUB)
1289         RHSC = -RHSC;
1290 
1291       if (isShiftedInt<7, Shift>(RHSC)) {
1292         Base = N.getOperand(0);
1293         if (Base.getOpcode() == ISD::FrameIndex) {
1294           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1295           Base = CurDAG->getTargetFrameIndex(
1296             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1297         }
1298         OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1299         return true;
1300       }
1301     }
1302   }
1303 
1304   // Base only.
1305   Base = N;
1306   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1307   return true;
1308 }
1309 
1310 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1311                                             SDValue &Base,
1312                                             SDValue &OffReg, SDValue &ShImm) {
1313   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1314   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1315     return false;
1316 
1317   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1318   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1319     int RHSC = (int)RHS->getZExtValue();
1320     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1321       return false;
1322     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1323       return false;
1324   }
1325 
1326   // Look for (R + R) or (R + (R << [1,2,3])).
1327   unsigned ShAmt = 0;
1328   Base   = N.getOperand(0);
1329   OffReg = N.getOperand(1);
1330 
1331   // Swap if it is ((R << c) + R).
1332   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1333   if (ShOpcVal != ARM_AM::lsl) {
1334     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1335     if (ShOpcVal == ARM_AM::lsl)
1336       std::swap(Base, OffReg);
1337   }
1338 
1339   if (ShOpcVal == ARM_AM::lsl) {
1340     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1341     // it.
1342     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1343       ShAmt = Sh->getZExtValue();
1344       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1345         OffReg = OffReg.getOperand(0);
1346       else {
1347         ShAmt = 0;
1348       }
1349     }
1350   }
1351 
1352   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1353   // and use it in a shifted operand do so.
1354   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1355     unsigned PowerOfTwo = 0;
1356     SDValue NewMulConst;
1357     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1358       HandleSDNode Handle(OffReg);
1359       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1360       OffReg = Handle.getValue();
1361       ShAmt = PowerOfTwo;
1362     }
1363   }
1364 
1365   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1366 
1367   return true;
1368 }
1369 
1370 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1371                                                 SDValue &OffImm) {
1372   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1373   // instructions.
1374   Base = N;
1375   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1376 
1377   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1378     return true;
1379 
1380   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1381   if (!RHS)
1382     return true;
1383 
1384   uint32_t RHSC = (int)RHS->getZExtValue();
1385   if (RHSC > 1020 || RHSC % 4 != 0)
1386     return true;
1387 
1388   Base = N.getOperand(0);
1389   if (Base.getOpcode() == ISD::FrameIndex) {
1390     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1391     Base = CurDAG->getTargetFrameIndex(
1392         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1393   }
1394 
1395   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1396   return true;
1397 }
1398 
1399 //===--------------------------------------------------------------------===//
1400 
1401 /// getAL - Returns a ARMCC::AL immediate node.
1402 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1403   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1404 }
1405 
1406 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1407   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1408   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1409 }
1410 
1411 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1412   LoadSDNode *LD = cast<LoadSDNode>(N);
1413   ISD::MemIndexedMode AM = LD->getAddressingMode();
1414   if (AM == ISD::UNINDEXED)
1415     return false;
1416 
1417   EVT LoadedVT = LD->getMemoryVT();
1418   SDValue Offset, AMOpc;
1419   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1420   unsigned Opcode = 0;
1421   bool Match = false;
1422   if (LoadedVT == MVT::i32 && isPre &&
1423       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1424     Opcode = ARM::LDR_PRE_IMM;
1425     Match = true;
1426   } else if (LoadedVT == MVT::i32 && !isPre &&
1427       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1428     Opcode = ARM::LDR_POST_IMM;
1429     Match = true;
1430   } else if (LoadedVT == MVT::i32 &&
1431       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1432     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1433     Match = true;
1434 
1435   } else if (LoadedVT == MVT::i16 &&
1436              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1437     Match = true;
1438     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1439       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1440       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1441   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1442     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1443       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1444         Match = true;
1445         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1446       }
1447     } else {
1448       if (isPre &&
1449           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1450         Match = true;
1451         Opcode = ARM::LDRB_PRE_IMM;
1452       } else if (!isPre &&
1453                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1454         Match = true;
1455         Opcode = ARM::LDRB_POST_IMM;
1456       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1457         Match = true;
1458         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1459       }
1460     }
1461   }
1462 
1463   if (Match) {
1464     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1465       SDValue Chain = LD->getChain();
1466       SDValue Base = LD->getBasePtr();
1467       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1468                        CurDAG->getRegister(0, MVT::i32), Chain };
1469       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1470                                            MVT::Other, Ops);
1471       transferMemOperands(N, New);
1472       ReplaceNode(N, New);
1473       return true;
1474     } else {
1475       SDValue Chain = LD->getChain();
1476       SDValue Base = LD->getBasePtr();
1477       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1478                        CurDAG->getRegister(0, MVT::i32), Chain };
1479       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1480                                            MVT::Other, Ops);
1481       transferMemOperands(N, New);
1482       ReplaceNode(N, New);
1483       return true;
1484     }
1485   }
1486 
1487   return false;
1488 }
1489 
1490 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1491   LoadSDNode *LD = cast<LoadSDNode>(N);
1492   EVT LoadedVT = LD->getMemoryVT();
1493   ISD::MemIndexedMode AM = LD->getAddressingMode();
1494   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1495       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1496     return false;
1497 
1498   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1499   if (!COffs || COffs->getZExtValue() != 4)
1500     return false;
1501 
1502   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1503   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1504   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1505   // ISel.
1506   SDValue Chain = LD->getChain();
1507   SDValue Base = LD->getBasePtr();
1508   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1509                    CurDAG->getRegister(0, MVT::i32), Chain };
1510   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1511                                        MVT::i32, MVT::Other, Ops);
1512   transferMemOperands(N, New);
1513   ReplaceNode(N, New);
1514   return true;
1515 }
1516 
1517 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1518   LoadSDNode *LD = cast<LoadSDNode>(N);
1519   ISD::MemIndexedMode AM = LD->getAddressingMode();
1520   if (AM == ISD::UNINDEXED)
1521     return false;
1522 
1523   EVT LoadedVT = LD->getMemoryVT();
1524   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1525   SDValue Offset;
1526   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1527   unsigned Opcode = 0;
1528   bool Match = false;
1529   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1530     switch (LoadedVT.getSimpleVT().SimpleTy) {
1531     case MVT::i32:
1532       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1533       break;
1534     case MVT::i16:
1535       if (isSExtLd)
1536         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1537       else
1538         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1539       break;
1540     case MVT::i8:
1541     case MVT::i1:
1542       if (isSExtLd)
1543         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1544       else
1545         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1546       break;
1547     default:
1548       return false;
1549     }
1550     Match = true;
1551   }
1552 
1553   if (Match) {
1554     SDValue Chain = LD->getChain();
1555     SDValue Base = LD->getBasePtr();
1556     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1557                      CurDAG->getRegister(0, MVT::i32), Chain };
1558     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1559                                          MVT::Other, Ops);
1560     transferMemOperands(N, New);
1561     ReplaceNode(N, New);
1562     return true;
1563   }
1564 
1565   return false;
1566 }
1567 
1568 /// Form a GPRPair pseudo register from a pair of GPR regs.
1569 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1570   SDLoc dl(V0.getNode());
1571   SDValue RegClass =
1572     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1573   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1574   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1575   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1576   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1577 }
1578 
1579 /// Form a D register from a pair of S registers.
1580 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1581   SDLoc dl(V0.getNode());
1582   SDValue RegClass =
1583     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1584   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1585   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1586   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1587   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1588 }
1589 
1590 /// Form a quad register from a pair of D registers.
1591 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1592   SDLoc dl(V0.getNode());
1593   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1594                                                MVT::i32);
1595   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1596   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1597   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1598   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1599 }
1600 
1601 /// Form 4 consecutive D registers from a pair of Q registers.
1602 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1603   SDLoc dl(V0.getNode());
1604   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1605                                                MVT::i32);
1606   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1607   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1608   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1609   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1610 }
1611 
1612 /// Form 4 consecutive S registers.
1613 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1614                                    SDValue V2, SDValue V3) {
1615   SDLoc dl(V0.getNode());
1616   SDValue RegClass =
1617     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1618   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1619   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1620   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1621   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1622   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1623                                     V2, SubReg2, V3, SubReg3 };
1624   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1625 }
1626 
1627 /// Form 4 consecutive D registers.
1628 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1629                                    SDValue V2, SDValue V3) {
1630   SDLoc dl(V0.getNode());
1631   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1632                                                MVT::i32);
1633   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1634   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1635   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1636   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1637   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1638                                     V2, SubReg2, V3, SubReg3 };
1639   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1640 }
1641 
1642 /// Form 4 consecutive Q registers.
1643 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1644                                    SDValue V2, SDValue V3) {
1645   SDLoc dl(V0.getNode());
1646   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1647                                                MVT::i32);
1648   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1649   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1650   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1651   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1652   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1653                                     V2, SubReg2, V3, SubReg3 };
1654   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1655 }
1656 
1657 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1658 /// of a NEON VLD or VST instruction.  The supported values depend on the
1659 /// number of registers being loaded.
1660 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1661                                        unsigned NumVecs, bool is64BitVector) {
1662   unsigned NumRegs = NumVecs;
1663   if (!is64BitVector && NumVecs < 3)
1664     NumRegs *= 2;
1665 
1666   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
1667   if (Alignment >= 32 && NumRegs == 4)
1668     Alignment = 32;
1669   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1670     Alignment = 16;
1671   else if (Alignment >= 8)
1672     Alignment = 8;
1673   else
1674     Alignment = 0;
1675 
1676   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1677 }
1678 
1679 static bool isVLDfixed(unsigned Opc)
1680 {
1681   switch (Opc) {
1682   default: return false;
1683   case ARM::VLD1d8wb_fixed : return true;
1684   case ARM::VLD1d16wb_fixed : return true;
1685   case ARM::VLD1d64Qwb_fixed : return true;
1686   case ARM::VLD1d32wb_fixed : return true;
1687   case ARM::VLD1d64wb_fixed : return true;
1688   case ARM::VLD1d64TPseudoWB_fixed : return true;
1689   case ARM::VLD1d64QPseudoWB_fixed : return true;
1690   case ARM::VLD1q8wb_fixed : return true;
1691   case ARM::VLD1q16wb_fixed : return true;
1692   case ARM::VLD1q32wb_fixed : return true;
1693   case ARM::VLD1q64wb_fixed : return true;
1694   case ARM::VLD1DUPd8wb_fixed : return true;
1695   case ARM::VLD1DUPd16wb_fixed : return true;
1696   case ARM::VLD1DUPd32wb_fixed : return true;
1697   case ARM::VLD1DUPq8wb_fixed : return true;
1698   case ARM::VLD1DUPq16wb_fixed : return true;
1699   case ARM::VLD1DUPq32wb_fixed : return true;
1700   case ARM::VLD2d8wb_fixed : return true;
1701   case ARM::VLD2d16wb_fixed : return true;
1702   case ARM::VLD2d32wb_fixed : return true;
1703   case ARM::VLD2q8PseudoWB_fixed : return true;
1704   case ARM::VLD2q16PseudoWB_fixed : return true;
1705   case ARM::VLD2q32PseudoWB_fixed : return true;
1706   case ARM::VLD2DUPd8wb_fixed : return true;
1707   case ARM::VLD2DUPd16wb_fixed : return true;
1708   case ARM::VLD2DUPd32wb_fixed : return true;
1709   }
1710 }
1711 
1712 static bool isVSTfixed(unsigned Opc)
1713 {
1714   switch (Opc) {
1715   default: return false;
1716   case ARM::VST1d8wb_fixed : return true;
1717   case ARM::VST1d16wb_fixed : return true;
1718   case ARM::VST1d32wb_fixed : return true;
1719   case ARM::VST1d64wb_fixed : return true;
1720   case ARM::VST1q8wb_fixed : return true;
1721   case ARM::VST1q16wb_fixed : return true;
1722   case ARM::VST1q32wb_fixed : return true;
1723   case ARM::VST1q64wb_fixed : return true;
1724   case ARM::VST1d64TPseudoWB_fixed : return true;
1725   case ARM::VST1d64QPseudoWB_fixed : return true;
1726   case ARM::VST2d8wb_fixed : return true;
1727   case ARM::VST2d16wb_fixed : return true;
1728   case ARM::VST2d32wb_fixed : return true;
1729   case ARM::VST2q8PseudoWB_fixed : return true;
1730   case ARM::VST2q16PseudoWB_fixed : return true;
1731   case ARM::VST2q32PseudoWB_fixed : return true;
1732   }
1733 }
1734 
1735 // Get the register stride update opcode of a VLD/VST instruction that
1736 // is otherwise equivalent to the given fixed stride updating instruction.
1737 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
1738   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
1739     && "Incorrect fixed stride updating instruction.");
1740   switch (Opc) {
1741   default: break;
1742   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
1743   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
1744   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
1745   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
1746   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
1747   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
1748   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
1749   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
1750   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
1751   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
1752   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
1753   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
1754   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
1755   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
1756   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
1757   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
1758   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
1759   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
1760 
1761   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
1762   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
1763   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
1764   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
1765   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
1766   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
1767   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
1768   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
1769   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
1770   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
1771 
1772   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
1773   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
1774   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
1775   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
1776   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
1777   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
1778 
1779   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
1780   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
1781   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
1782   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
1783   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
1784   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
1785 
1786   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
1787   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
1788   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
1789   }
1790   return Opc; // If not one we handle, return it unchanged.
1791 }
1792 
1793 /// Returns true if the given increment is a Constant known to be equal to the
1794 /// access size performed by a NEON load/store. This means the "[rN]!" form can
1795 /// be used.
1796 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
1797   auto C = dyn_cast<ConstantSDNode>(Inc);
1798   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
1799 }
1800 
1801 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
1802                                 const uint16_t *DOpcodes,
1803                                 const uint16_t *QOpcodes0,
1804                                 const uint16_t *QOpcodes1) {
1805   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
1806   SDLoc dl(N);
1807 
1808   SDValue MemAddr, Align;
1809   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1810                                    // nodes are not intrinsics.
1811   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1812   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1813     return;
1814 
1815   SDValue Chain = N->getOperand(0);
1816   EVT VT = N->getValueType(0);
1817   bool is64BitVector = VT.is64BitVector();
1818   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1819 
1820   unsigned OpcodeIndex;
1821   switch (VT.getSimpleVT().SimpleTy) {
1822   default: llvm_unreachable("unhandled vld type");
1823     // Double-register operations:
1824   case MVT::v8i8:  OpcodeIndex = 0; break;
1825   case MVT::v4f16:
1826   case MVT::v4i16: OpcodeIndex = 1; break;
1827   case MVT::v2f32:
1828   case MVT::v2i32: OpcodeIndex = 2; break;
1829   case MVT::v1i64: OpcodeIndex = 3; break;
1830     // Quad-register operations:
1831   case MVT::v16i8: OpcodeIndex = 0; break;
1832   case MVT::v8f16:
1833   case MVT::v8i16: OpcodeIndex = 1; break;
1834   case MVT::v4f32:
1835   case MVT::v4i32: OpcodeIndex = 2; break;
1836   case MVT::v2f64:
1837   case MVT::v2i64: OpcodeIndex = 3; break;
1838   }
1839 
1840   EVT ResTy;
1841   if (NumVecs == 1)
1842     ResTy = VT;
1843   else {
1844     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
1845     if (!is64BitVector)
1846       ResTyElts *= 2;
1847     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
1848   }
1849   std::vector<EVT> ResTys;
1850   ResTys.push_back(ResTy);
1851   if (isUpdating)
1852     ResTys.push_back(MVT::i32);
1853   ResTys.push_back(MVT::Other);
1854 
1855   SDValue Pred = getAL(CurDAG, dl);
1856   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1857   SDNode *VLd;
1858   SmallVector<SDValue, 7> Ops;
1859 
1860   // Double registers and VLD1/VLD2 quad registers are directly supported.
1861   if (is64BitVector || NumVecs <= 2) {
1862     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
1863                     QOpcodes0[OpcodeIndex]);
1864     Ops.push_back(MemAddr);
1865     Ops.push_back(Align);
1866     if (isUpdating) {
1867       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1868       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
1869       if (!IsImmUpdate) {
1870         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
1871         // check for the opcode rather than the number of vector elements.
1872         if (isVLDfixed(Opc))
1873           Opc = getVLDSTRegisterUpdateOpcode(Opc);
1874         Ops.push_back(Inc);
1875       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
1876       // the operands if not such an opcode.
1877       } else if (!isVLDfixed(Opc))
1878         Ops.push_back(Reg0);
1879     }
1880     Ops.push_back(Pred);
1881     Ops.push_back(Reg0);
1882     Ops.push_back(Chain);
1883     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1884 
1885   } else {
1886     // Otherwise, quad registers are loaded with two separate instructions,
1887     // where one loads the even registers and the other loads the odd registers.
1888     EVT AddrTy = MemAddr.getValueType();
1889 
1890     // Load the even subregs.  This is always an updating load, so that it
1891     // provides the address to the second load for the odd subregs.
1892     SDValue ImplDef =
1893       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
1894     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
1895     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
1896                                           ResTy, AddrTy, MVT::Other, OpsA);
1897     Chain = SDValue(VLdA, 2);
1898 
1899     // Load the odd subregs.
1900     Ops.push_back(SDValue(VLdA, 1));
1901     Ops.push_back(Align);
1902     if (isUpdating) {
1903       SDValue Inc = N->getOperand(AddrOpIdx + 1);
1904       assert(isa<ConstantSDNode>(Inc.getNode()) &&
1905              "only constant post-increment update allowed for VLD3/4");
1906       (void)Inc;
1907       Ops.push_back(Reg0);
1908     }
1909     Ops.push_back(SDValue(VLdA, 0));
1910     Ops.push_back(Pred);
1911     Ops.push_back(Reg0);
1912     Ops.push_back(Chain);
1913     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
1914   }
1915 
1916   // Transfer memoperands.
1917   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1918   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
1919 
1920   if (NumVecs == 1) {
1921     ReplaceNode(N, VLd);
1922     return;
1923   }
1924 
1925   // Extract out the subregisters.
1926   SDValue SuperReg = SDValue(VLd, 0);
1927   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
1928                     ARM::qsub_3 == ARM::qsub_0 + 3,
1929                 "Unexpected subreg numbering");
1930   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
1931   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
1932     ReplaceUses(SDValue(N, Vec),
1933                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
1934   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
1935   if (isUpdating)
1936     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
1937   CurDAG->RemoveDeadNode(N);
1938 }
1939 
1940 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
1941                                 const uint16_t *DOpcodes,
1942                                 const uint16_t *QOpcodes0,
1943                                 const uint16_t *QOpcodes1) {
1944   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
1945   SDLoc dl(N);
1946 
1947   SDValue MemAddr, Align;
1948   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
1949                                    // nodes are not intrinsics.
1950   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
1951   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
1952   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
1953     return;
1954 
1955   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1956 
1957   SDValue Chain = N->getOperand(0);
1958   EVT VT = N->getOperand(Vec0Idx).getValueType();
1959   bool is64BitVector = VT.is64BitVector();
1960   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
1961 
1962   unsigned OpcodeIndex;
1963   switch (VT.getSimpleVT().SimpleTy) {
1964   default: llvm_unreachable("unhandled vst type");
1965     // Double-register operations:
1966   case MVT::v8i8:  OpcodeIndex = 0; break;
1967   case MVT::v4f16:
1968   case MVT::v4i16: OpcodeIndex = 1; break;
1969   case MVT::v2f32:
1970   case MVT::v2i32: OpcodeIndex = 2; break;
1971   case MVT::v1i64: OpcodeIndex = 3; break;
1972     // Quad-register operations:
1973   case MVT::v16i8: OpcodeIndex = 0; break;
1974   case MVT::v8f16:
1975   case MVT::v8i16: OpcodeIndex = 1; break;
1976   case MVT::v4f32:
1977   case MVT::v4i32: OpcodeIndex = 2; break;
1978   case MVT::v2f64:
1979   case MVT::v2i64: OpcodeIndex = 3; break;
1980   }
1981 
1982   std::vector<EVT> ResTys;
1983   if (isUpdating)
1984     ResTys.push_back(MVT::i32);
1985   ResTys.push_back(MVT::Other);
1986 
1987   SDValue Pred = getAL(CurDAG, dl);
1988   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
1989   SmallVector<SDValue, 7> Ops;
1990 
1991   // Double registers and VST1/VST2 quad registers are directly supported.
1992   if (is64BitVector || NumVecs <= 2) {
1993     SDValue SrcReg;
1994     if (NumVecs == 1) {
1995       SrcReg = N->getOperand(Vec0Idx);
1996     } else if (is64BitVector) {
1997       // Form a REG_SEQUENCE to force register allocation.
1998       SDValue V0 = N->getOperand(Vec0Idx + 0);
1999       SDValue V1 = N->getOperand(Vec0Idx + 1);
2000       if (NumVecs == 2)
2001         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2002       else {
2003         SDValue V2 = N->getOperand(Vec0Idx + 2);
2004         // If it's a vst3, form a quad D-register and leave the last part as
2005         // an undef.
2006         SDValue V3 = (NumVecs == 3)
2007           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2008           : N->getOperand(Vec0Idx + 3);
2009         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2010       }
2011     } else {
2012       // Form a QQ register.
2013       SDValue Q0 = N->getOperand(Vec0Idx);
2014       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2015       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2016     }
2017 
2018     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2019                     QOpcodes0[OpcodeIndex]);
2020     Ops.push_back(MemAddr);
2021     Ops.push_back(Align);
2022     if (isUpdating) {
2023       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2024       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2025       if (!IsImmUpdate) {
2026         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2027         // check for the opcode rather than the number of vector elements.
2028         if (isVSTfixed(Opc))
2029           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2030         Ops.push_back(Inc);
2031       }
2032       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2033       // the operands if not such an opcode.
2034       else if (!isVSTfixed(Opc))
2035         Ops.push_back(Reg0);
2036     }
2037     Ops.push_back(SrcReg);
2038     Ops.push_back(Pred);
2039     Ops.push_back(Reg0);
2040     Ops.push_back(Chain);
2041     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2042 
2043     // Transfer memoperands.
2044     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2045 
2046     ReplaceNode(N, VSt);
2047     return;
2048   }
2049 
2050   // Otherwise, quad registers are stored with two separate instructions,
2051   // where one stores the even registers and the other stores the odd registers.
2052 
2053   // Form the QQQQ REG_SEQUENCE.
2054   SDValue V0 = N->getOperand(Vec0Idx + 0);
2055   SDValue V1 = N->getOperand(Vec0Idx + 1);
2056   SDValue V2 = N->getOperand(Vec0Idx + 2);
2057   SDValue V3 = (NumVecs == 3)
2058     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2059     : N->getOperand(Vec0Idx + 3);
2060   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2061 
2062   // Store the even D registers.  This is always an updating store, so that it
2063   // provides the address to the second store for the odd subregs.
2064   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2065   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2066                                         MemAddr.getValueType(),
2067                                         MVT::Other, OpsA);
2068   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2069   Chain = SDValue(VStA, 1);
2070 
2071   // Store the odd D registers.
2072   Ops.push_back(SDValue(VStA, 0));
2073   Ops.push_back(Align);
2074   if (isUpdating) {
2075     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2076     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2077            "only constant post-increment update allowed for VST3/4");
2078     (void)Inc;
2079     Ops.push_back(Reg0);
2080   }
2081   Ops.push_back(RegSeq);
2082   Ops.push_back(Pred);
2083   Ops.push_back(Reg0);
2084   Ops.push_back(Chain);
2085   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2086                                         Ops);
2087   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2088   ReplaceNode(N, VStB);
2089 }
2090 
2091 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2092                                       unsigned NumVecs,
2093                                       const uint16_t *DOpcodes,
2094                                       const uint16_t *QOpcodes) {
2095   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2096   SDLoc dl(N);
2097 
2098   SDValue MemAddr, Align;
2099   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2100                                    // nodes are not intrinsics.
2101   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2102   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2103   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2104     return;
2105 
2106   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2107 
2108   SDValue Chain = N->getOperand(0);
2109   unsigned Lane =
2110     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
2111   EVT VT = N->getOperand(Vec0Idx).getValueType();
2112   bool is64BitVector = VT.is64BitVector();
2113 
2114   unsigned Alignment = 0;
2115   if (NumVecs != 3) {
2116     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2117     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2118     if (Alignment > NumBytes)
2119       Alignment = NumBytes;
2120     if (Alignment < 8 && Alignment < NumBytes)
2121       Alignment = 0;
2122     // Alignment must be a power of two; make sure of that.
2123     Alignment = (Alignment & -Alignment);
2124     if (Alignment == 1)
2125       Alignment = 0;
2126   }
2127   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2128 
2129   unsigned OpcodeIndex;
2130   switch (VT.getSimpleVT().SimpleTy) {
2131   default: llvm_unreachable("unhandled vld/vst lane type");
2132     // Double-register operations:
2133   case MVT::v8i8:  OpcodeIndex = 0; break;
2134   case MVT::v4f16:
2135   case MVT::v4i16: OpcodeIndex = 1; break;
2136   case MVT::v2f32:
2137   case MVT::v2i32: OpcodeIndex = 2; break;
2138     // Quad-register operations:
2139   case MVT::v8f16:
2140   case MVT::v8i16: OpcodeIndex = 0; break;
2141   case MVT::v4f32:
2142   case MVT::v4i32: OpcodeIndex = 1; break;
2143   }
2144 
2145   std::vector<EVT> ResTys;
2146   if (IsLoad) {
2147     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2148     if (!is64BitVector)
2149       ResTyElts *= 2;
2150     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2151                                       MVT::i64, ResTyElts));
2152   }
2153   if (isUpdating)
2154     ResTys.push_back(MVT::i32);
2155   ResTys.push_back(MVT::Other);
2156 
2157   SDValue Pred = getAL(CurDAG, dl);
2158   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2159 
2160   SmallVector<SDValue, 8> Ops;
2161   Ops.push_back(MemAddr);
2162   Ops.push_back(Align);
2163   if (isUpdating) {
2164     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2165     bool IsImmUpdate =
2166         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2167     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2168   }
2169 
2170   SDValue SuperReg;
2171   SDValue V0 = N->getOperand(Vec0Idx + 0);
2172   SDValue V1 = N->getOperand(Vec0Idx + 1);
2173   if (NumVecs == 2) {
2174     if (is64BitVector)
2175       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2176     else
2177       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2178   } else {
2179     SDValue V2 = N->getOperand(Vec0Idx + 2);
2180     SDValue V3 = (NumVecs == 3)
2181       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2182       : N->getOperand(Vec0Idx + 3);
2183     if (is64BitVector)
2184       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2185     else
2186       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2187   }
2188   Ops.push_back(SuperReg);
2189   Ops.push_back(getI32Imm(Lane, dl));
2190   Ops.push_back(Pred);
2191   Ops.push_back(Reg0);
2192   Ops.push_back(Chain);
2193 
2194   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2195                                   QOpcodes[OpcodeIndex]);
2196   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2197   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2198   if (!IsLoad) {
2199     ReplaceNode(N, VLdLn);
2200     return;
2201   }
2202 
2203   // Extract the subregisters.
2204   SuperReg = SDValue(VLdLn, 0);
2205   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2206                     ARM::qsub_3 == ARM::qsub_0 + 3,
2207                 "Unexpected subreg numbering");
2208   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2209   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2210     ReplaceUses(SDValue(N, Vec),
2211                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2212   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2213   if (isUpdating)
2214     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2215   CurDAG->RemoveDeadNode(N);
2216 }
2217 
2218 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2219                                    bool isUpdating, unsigned NumVecs,
2220                                    const uint16_t *DOpcodes,
2221                                    const uint16_t *QOpcodes0,
2222                                    const uint16_t *QOpcodes1) {
2223   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2224   SDLoc dl(N);
2225 
2226   SDValue MemAddr, Align;
2227   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2228   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2229     return;
2230 
2231   SDValue Chain = N->getOperand(0);
2232   EVT VT = N->getValueType(0);
2233   bool is64BitVector = VT.is64BitVector();
2234 
2235   unsigned Alignment = 0;
2236   if (NumVecs != 3) {
2237     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
2238     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2239     if (Alignment > NumBytes)
2240       Alignment = NumBytes;
2241     if (Alignment < 8 && Alignment < NumBytes)
2242       Alignment = 0;
2243     // Alignment must be a power of two; make sure of that.
2244     Alignment = (Alignment & -Alignment);
2245     if (Alignment == 1)
2246       Alignment = 0;
2247   }
2248   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2249 
2250   unsigned OpcodeIndex;
2251   switch (VT.getSimpleVT().SimpleTy) {
2252   default: llvm_unreachable("unhandled vld-dup type");
2253   case MVT::v8i8:
2254   case MVT::v16i8: OpcodeIndex = 0; break;
2255   case MVT::v4i16:
2256   case MVT::v8i16:
2257   case MVT::v4f16:
2258   case MVT::v8f16:
2259                   OpcodeIndex = 1; break;
2260   case MVT::v2f32:
2261   case MVT::v2i32:
2262   case MVT::v4f32:
2263   case MVT::v4i32: OpcodeIndex = 2; break;
2264   case MVT::v1f64:
2265   case MVT::v1i64: OpcodeIndex = 3; break;
2266   }
2267 
2268   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2269   if (!is64BitVector)
2270     ResTyElts *= 2;
2271   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2272 
2273   std::vector<EVT> ResTys;
2274   ResTys.push_back(ResTy);
2275   if (isUpdating)
2276     ResTys.push_back(MVT::i32);
2277   ResTys.push_back(MVT::Other);
2278 
2279   SDValue Pred = getAL(CurDAG, dl);
2280   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2281 
2282   SDNode *VLdDup;
2283   if (is64BitVector || NumVecs == 1) {
2284     SmallVector<SDValue, 6> Ops;
2285     Ops.push_back(MemAddr);
2286     Ops.push_back(Align);
2287     unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
2288                                    QOpcodes0[OpcodeIndex];
2289     if (isUpdating) {
2290       // fixed-stride update instructions don't have an explicit writeback
2291       // operand. It's implicit in the opcode itself.
2292       SDValue Inc = N->getOperand(2);
2293       bool IsImmUpdate =
2294           isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2295       if (NumVecs <= 2 && !IsImmUpdate)
2296         Opc = getVLDSTRegisterUpdateOpcode(Opc);
2297       if (!IsImmUpdate)
2298         Ops.push_back(Inc);
2299       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
2300       else if (NumVecs > 2)
2301         Ops.push_back(Reg0);
2302     }
2303     Ops.push_back(Pred);
2304     Ops.push_back(Reg0);
2305     Ops.push_back(Chain);
2306     VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2307   } else if (NumVecs == 2) {
2308     const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
2309     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2310                                           dl, ResTys, OpsA);
2311 
2312     Chain = SDValue(VLdA, 1);
2313     const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
2314     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2315   } else {
2316     SDValue ImplDef =
2317       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2318     const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
2319     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
2320                                           dl, ResTys, OpsA);
2321 
2322     SDValue SuperReg = SDValue(VLdA, 0);
2323     Chain = SDValue(VLdA, 1);
2324     const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
2325     VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
2326   }
2327 
2328   // Transfer memoperands.
2329   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2330   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
2331 
2332   // Extract the subregisters.
2333   if (NumVecs == 1) {
2334     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
2335   } else {
2336     SDValue SuperReg = SDValue(VLdDup, 0);
2337     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
2338     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2339     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
2340       ReplaceUses(SDValue(N, Vec),
2341                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
2342     }
2343   }
2344   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
2345   if (isUpdating)
2346     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
2347   CurDAG->RemoveDeadNode(N);
2348 }
2349 
2350 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
2351   if (!Subtarget->hasV6T2Ops())
2352     return false;
2353 
2354   unsigned Opc = isSigned
2355     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
2356     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
2357   SDLoc dl(N);
2358 
2359   // For unsigned extracts, check for a shift right and mask
2360   unsigned And_imm = 0;
2361   if (N->getOpcode() == ISD::AND) {
2362     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
2363 
2364       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2365       if (And_imm & (And_imm + 1))
2366         return false;
2367 
2368       unsigned Srl_imm = 0;
2369       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
2370                                 Srl_imm)) {
2371         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2372 
2373         // Mask off the unnecessary bits of the AND immediate; normally
2374         // DAGCombine will do this, but that might not happen if
2375         // targetShrinkDemandedConstant chooses a different immediate.
2376         And_imm &= -1U >> Srl_imm;
2377 
2378         // Note: The width operand is encoded as width-1.
2379         unsigned Width = countTrailingOnes(And_imm) - 1;
2380         unsigned LSB = Srl_imm;
2381 
2382         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2383 
2384         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
2385           // It's cheaper to use a right shift to extract the top bits.
2386           if (Subtarget->isThumb()) {
2387             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
2388             SDValue Ops[] = { N->getOperand(0).getOperand(0),
2389                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2390                               getAL(CurDAG, dl), Reg0, Reg0 };
2391             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2392             return true;
2393           }
2394 
2395           // ARM models shift instructions as MOVsi with shifter operand.
2396           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
2397           SDValue ShOpc =
2398             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
2399                                       MVT::i32);
2400           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
2401                             getAL(CurDAG, dl), Reg0, Reg0 };
2402           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
2403           return true;
2404         }
2405 
2406         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2407         SDValue Ops[] = { N->getOperand(0).getOperand(0),
2408                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2409                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
2410                           getAL(CurDAG, dl), Reg0 };
2411         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2412         return true;
2413       }
2414     }
2415     return false;
2416   }
2417 
2418   // Otherwise, we're looking for a shift of a shift
2419   unsigned Shl_imm = 0;
2420   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
2421     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
2422     unsigned Srl_imm = 0;
2423     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
2424       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2425       // Note: The width operand is encoded as width-1.
2426       unsigned Width = 32 - Srl_imm - 1;
2427       int LSB = Srl_imm - Shl_imm;
2428       if (LSB < 0)
2429         return false;
2430       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2431       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2432       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2433                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2434                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2435                         getAL(CurDAG, dl), Reg0 };
2436       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2437       return true;
2438     }
2439   }
2440 
2441   // Or we are looking for a shift of an and, with a mask operand
2442   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
2443       isShiftedMask_32(And_imm)) {
2444     unsigned Srl_imm = 0;
2445     unsigned LSB = countTrailingZeros(And_imm);
2446     // Shift must be the same as the ands lsb
2447     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
2448       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
2449       unsigned MSB = 31 - countLeadingZeros(And_imm);
2450       // Note: The width operand is encoded as width-1.
2451       unsigned Width = MSB - LSB;
2452       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2453       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
2454       SDValue Ops[] = { N->getOperand(0).getOperand(0),
2455                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
2456                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
2457                         getAL(CurDAG, dl), Reg0 };
2458       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2459       return true;
2460     }
2461   }
2462 
2463   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
2464     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2465     unsigned LSB = 0;
2466     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
2467         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
2468       return false;
2469 
2470     if (LSB + Width > 32)
2471       return false;
2472 
2473     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2474     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
2475     SDValue Ops[] = { N->getOperand(0).getOperand(0),
2476                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
2477                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
2478                       getAL(CurDAG, dl), Reg0 };
2479     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2480     return true;
2481   }
2482 
2483   return false;
2484 }
2485 
2486 /// Target-specific DAG combining for ISD::XOR.
2487 /// Target-independent combining lowers SELECT_CC nodes of the form
2488 /// select_cc setg[ge] X,  0,  X, -X
2489 /// select_cc setgt    X, -1,  X, -X
2490 /// select_cc setl[te] X,  0, -X,  X
2491 /// select_cc setlt    X,  1, -X,  X
2492 /// which represent Integer ABS into:
2493 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
2494 /// ARM instruction selection detects the latter and matches it to
2495 /// ARM::ABS or ARM::t2ABS machine node.
2496 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
2497   SDValue XORSrc0 = N->getOperand(0);
2498   SDValue XORSrc1 = N->getOperand(1);
2499   EVT VT = N->getValueType(0);
2500 
2501   if (Subtarget->isThumb1Only())
2502     return false;
2503 
2504   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
2505     return false;
2506 
2507   SDValue ADDSrc0 = XORSrc0.getOperand(0);
2508   SDValue ADDSrc1 = XORSrc0.getOperand(1);
2509   SDValue SRASrc0 = XORSrc1.getOperand(0);
2510   SDValue SRASrc1 = XORSrc1.getOperand(1);
2511   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
2512   EVT XType = SRASrc0.getValueType();
2513   unsigned Size = XType.getSizeInBits() - 1;
2514 
2515   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
2516       XType.isInteger() && SRAConstant != nullptr &&
2517       Size == SRAConstant->getZExtValue()) {
2518     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
2519     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
2520     return true;
2521   }
2522 
2523   return false;
2524 }
2525 
2526 /// We've got special pseudo-instructions for these
2527 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2528   unsigned Opcode;
2529   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
2530   if (MemTy == MVT::i8)
2531     Opcode = ARM::CMP_SWAP_8;
2532   else if (MemTy == MVT::i16)
2533     Opcode = ARM::CMP_SWAP_16;
2534   else if (MemTy == MVT::i32)
2535     Opcode = ARM::CMP_SWAP_32;
2536   else
2537     llvm_unreachable("Unknown AtomicCmpSwap type");
2538 
2539   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2540                    N->getOperand(0)};
2541   SDNode *CmpSwap = CurDAG->getMachineNode(
2542       Opcode, SDLoc(N),
2543       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2544 
2545   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2546   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2547 
2548   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2549   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
2550   CurDAG->RemoveDeadNode(N);
2551 }
2552 
2553 static Optional<std::pair<unsigned, unsigned>>
2554 getContiguousRangeOfSetBits(const APInt &A) {
2555   unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
2556   unsigned LastOne = A.countTrailingZeros();
2557   if (A.countPopulation() != (FirstOne - LastOne + 1))
2558     return Optional<std::pair<unsigned,unsigned>>();
2559   return std::make_pair(FirstOne, LastOne);
2560 }
2561 
2562 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
2563   assert(N->getOpcode() == ARMISD::CMPZ);
2564   SwitchEQNEToPLMI = false;
2565 
2566   if (!Subtarget->isThumb())
2567     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
2568     // LSR don't exist as standalone instructions - they need the barrel shifter.
2569     return;
2570 
2571   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
2572   SDValue And = N->getOperand(0);
2573   if (!And->hasOneUse())
2574     return;
2575 
2576   SDValue Zero = N->getOperand(1);
2577   if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
2578       And->getOpcode() != ISD::AND)
2579     return;
2580   SDValue X = And.getOperand(0);
2581   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
2582 
2583   if (!C)
2584     return;
2585   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
2586   if (!Range)
2587     return;
2588 
2589   // There are several ways to lower this:
2590   SDNode *NewN;
2591   SDLoc dl(N);
2592 
2593   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
2594     if (Subtarget->isThumb2()) {
2595       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
2596       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2597                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2598                         CurDAG->getRegister(0, MVT::i32) };
2599       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2600     } else {
2601       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
2602                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
2603                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
2604       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
2605     }
2606   };
2607 
2608   if (Range->second == 0) {
2609     //  1. Mask includes the LSB -> Simply shift the top N bits off
2610     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2611     ReplaceNode(And.getNode(), NewN);
2612   } else if (Range->first == 31) {
2613     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
2614     NewN = EmitShift(ARM::tLSRri, X, Range->second);
2615     ReplaceNode(And.getNode(), NewN);
2616   } else if (Range->first == Range->second) {
2617     //  3. Only one bit is set. We can shift this into the sign bit and use a
2618     //     PL/MI comparison.
2619     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2620     ReplaceNode(And.getNode(), NewN);
2621 
2622     SwitchEQNEToPLMI = true;
2623   } else if (!Subtarget->hasV6T2Ops()) {
2624     //  4. Do a double shift to clear bottom and top bits, but only in
2625     //     thumb-1 mode as in thumb-2 we can use UBFX.
2626     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
2627     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
2628                      Range->second + (31 - Range->first));
2629     ReplaceNode(And.getNode(), NewN);
2630   }
2631 
2632 }
2633 
2634 void ARMDAGToDAGISel::Select(SDNode *N) {
2635   SDLoc dl(N);
2636 
2637   if (N->isMachineOpcode()) {
2638     N->setNodeId(-1);
2639     return;   // Already selected.
2640   }
2641 
2642   switch (N->getOpcode()) {
2643   default: break;
2644   case ISD::STORE: {
2645     // For Thumb1, match an sp-relative store in C++. This is a little
2646     // unfortunate, but I don't think I can make the chain check work
2647     // otherwise.  (The chain of the store has to be the same as the chain
2648     // of the CopyFromReg, or else we can't replace the CopyFromReg with
2649     // a direct reference to "SP".)
2650     //
2651     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
2652     // a different addressing mode from other four-byte stores.
2653     //
2654     // This pattern usually comes up with call arguments.
2655     StoreSDNode *ST = cast<StoreSDNode>(N);
2656     SDValue Ptr = ST->getBasePtr();
2657     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
2658       int RHSC = 0;
2659       if (Ptr.getOpcode() == ISD::ADD &&
2660           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
2661         Ptr = Ptr.getOperand(0);
2662 
2663       if (Ptr.getOpcode() == ISD::CopyFromReg &&
2664           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
2665           Ptr.getOperand(0) == ST->getChain()) {
2666         SDValue Ops[] = {ST->getValue(),
2667                          CurDAG->getRegister(ARM::SP, MVT::i32),
2668                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
2669                          getAL(CurDAG, dl),
2670                          CurDAG->getRegister(0, MVT::i32),
2671                          ST->getChain()};
2672         MachineSDNode *ResNode =
2673             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
2674         MachineMemOperand *MemOp = ST->getMemOperand();
2675         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2676         ReplaceNode(N, ResNode);
2677         return;
2678       }
2679     }
2680     break;
2681   }
2682   case ISD::WRITE_REGISTER:
2683     if (tryWriteRegister(N))
2684       return;
2685     break;
2686   case ISD::READ_REGISTER:
2687     if (tryReadRegister(N))
2688       return;
2689     break;
2690   case ISD::INLINEASM:
2691   case ISD::INLINEASM_BR:
2692     if (tryInlineAsm(N))
2693       return;
2694     break;
2695   case ISD::XOR:
2696     // Select special operations if XOR node forms integer ABS pattern
2697     if (tryABSOp(N))
2698       return;
2699     // Other cases are autogenerated.
2700     break;
2701   case ISD::Constant: {
2702     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
2703     // If we can't materialize the constant we need to use a literal pool
2704     if (ConstantMaterializationCost(Val) > 2) {
2705       SDValue CPIdx = CurDAG->getTargetConstantPool(
2706           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
2707           TLI->getPointerTy(CurDAG->getDataLayout()));
2708 
2709       SDNode *ResNode;
2710       if (Subtarget->isThumb()) {
2711         SDValue Ops[] = {
2712           CPIdx,
2713           getAL(CurDAG, dl),
2714           CurDAG->getRegister(0, MVT::i32),
2715           CurDAG->getEntryNode()
2716         };
2717         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
2718                                          Ops);
2719       } else {
2720         SDValue Ops[] = {
2721           CPIdx,
2722           CurDAG->getTargetConstant(0, dl, MVT::i32),
2723           getAL(CurDAG, dl),
2724           CurDAG->getRegister(0, MVT::i32),
2725           CurDAG->getEntryNode()
2726         };
2727         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
2728                                          Ops);
2729       }
2730       // Annotate the Node with memory operand information so that MachineInstr
2731       // queries work properly. This e.g. gives the register allocation the
2732       // required information for rematerialization.
2733       MachineFunction& MF = CurDAG->getMachineFunction();
2734       MachineMemOperand *MemOp =
2735           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
2736                                   MachineMemOperand::MOLoad, 4, 4);
2737 
2738       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
2739 
2740       ReplaceNode(N, ResNode);
2741       return;
2742     }
2743 
2744     // Other cases are autogenerated.
2745     break;
2746   }
2747   case ISD::FrameIndex: {
2748     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
2749     int FI = cast<FrameIndexSDNode>(N)->getIndex();
2750     SDValue TFI = CurDAG->getTargetFrameIndex(
2751         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
2752     if (Subtarget->isThumb1Only()) {
2753       // Set the alignment of the frame object to 4, to avoid having to generate
2754       // more than one ADD
2755       MachineFrameInfo &MFI = MF->getFrameInfo();
2756       if (MFI.getObjectAlignment(FI) < 4)
2757         MFI.setObjectAlignment(FI, 4);
2758       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
2759                            CurDAG->getTargetConstant(0, dl, MVT::i32));
2760       return;
2761     } else {
2762       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
2763                       ARM::t2ADDri : ARM::ADDri);
2764       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
2765                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
2766                         CurDAG->getRegister(0, MVT::i32) };
2767       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
2768       return;
2769     }
2770   }
2771   case ISD::SRL:
2772     if (tryV6T2BitfieldExtractOp(N, false))
2773       return;
2774     break;
2775   case ISD::SIGN_EXTEND_INREG:
2776   case ISD::SRA:
2777     if (tryV6T2BitfieldExtractOp(N, true))
2778       return;
2779     break;
2780   case ISD::MUL:
2781     if (Subtarget->isThumb1Only())
2782       break;
2783     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
2784       unsigned RHSV = C->getZExtValue();
2785       if (!RHSV) break;
2786       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
2787         unsigned ShImm = Log2_32(RHSV-1);
2788         if (ShImm >= 32)
2789           break;
2790         SDValue V = N->getOperand(0);
2791         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2792         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2793         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2794         if (Subtarget->isThumb()) {
2795           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2796           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
2797           return;
2798         } else {
2799           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2800                             Reg0 };
2801           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
2802           return;
2803         }
2804       }
2805       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
2806         unsigned ShImm = Log2_32(RHSV+1);
2807         if (ShImm >= 32)
2808           break;
2809         SDValue V = N->getOperand(0);
2810         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
2811         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
2812         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2813         if (Subtarget->isThumb()) {
2814           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
2815           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
2816           return;
2817         } else {
2818           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
2819                             Reg0 };
2820           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
2821           return;
2822         }
2823       }
2824     }
2825     break;
2826   case ISD::AND: {
2827     // Check for unsigned bitfield extract
2828     if (tryV6T2BitfieldExtractOp(N, false))
2829       return;
2830 
2831     // If an immediate is used in an AND node, it is possible that the immediate
2832     // can be more optimally materialized when negated. If this is the case we
2833     // can negate the immediate and use a BIC instead.
2834     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
2835     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2836       uint32_t Imm = (uint32_t) N1C->getZExtValue();
2837 
2838       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2839       // immediate can be negated and fit in the immediate operand of
2840       // a t2BIC, don't do any manual transform here as this can be
2841       // handled by the generic ISel machinery.
2842       bool PreferImmediateEncoding =
2843         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2844       if (!PreferImmediateEncoding &&
2845           ConstantMaterializationCost(Imm) >
2846               ConstantMaterializationCost(~Imm)) {
2847         // The current immediate costs more to materialize than a negated
2848         // immediate, so negate the immediate and use a BIC.
2849         SDValue NewImm =
2850           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2851         // If the new constant didn't exist before, reposition it in the topological
2852         // ordering so it is just before N. Otherwise, don't touch its location.
2853         if (NewImm->getNodeId() == -1)
2854           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2855 
2856         if (!Subtarget->hasThumb2()) {
2857           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2858                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
2859                            CurDAG->getRegister(0, MVT::i32)};
2860           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2861           return;
2862         } else {
2863           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2864                            CurDAG->getRegister(0, MVT::i32),
2865                            CurDAG->getRegister(0, MVT::i32)};
2866           ReplaceNode(N,
2867                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2868           return;
2869         }
2870       }
2871     }
2872 
2873     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
2874     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
2875     // are entirely contributed by c2 and lower 16-bits are entirely contributed
2876     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
2877     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
2878     EVT VT = N->getValueType(0);
2879     if (VT != MVT::i32)
2880       break;
2881     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
2882       ? ARM::t2MOVTi16
2883       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
2884     if (!Opc)
2885       break;
2886     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2887     N1C = dyn_cast<ConstantSDNode>(N1);
2888     if (!N1C)
2889       break;
2890     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
2891       SDValue N2 = N0.getOperand(1);
2892       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
2893       if (!N2C)
2894         break;
2895       unsigned N1CVal = N1C->getZExtValue();
2896       unsigned N2CVal = N2C->getZExtValue();
2897       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
2898           (N1CVal & 0xffffU) == 0xffffU &&
2899           (N2CVal & 0xffffU) == 0x0U) {
2900         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
2901                                                   dl, MVT::i32);
2902         SDValue Ops[] = { N0.getOperand(0), Imm16,
2903                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
2904         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
2905         return;
2906       }
2907     }
2908 
2909     break;
2910   }
2911   case ARMISD::UMAAL: {
2912     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
2913     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
2914                       N->getOperand(2), N->getOperand(3),
2915                       getAL(CurDAG, dl),
2916                       CurDAG->getRegister(0, MVT::i32) };
2917     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
2918     return;
2919   }
2920   case ARMISD::UMLAL:{
2921     if (Subtarget->isThumb()) {
2922       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2923                         N->getOperand(3), getAL(CurDAG, dl),
2924                         CurDAG->getRegister(0, MVT::i32)};
2925       ReplaceNode(
2926           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
2927       return;
2928     }else{
2929       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2930                         N->getOperand(3), getAL(CurDAG, dl),
2931                         CurDAG->getRegister(0, MVT::i32),
2932                         CurDAG->getRegister(0, MVT::i32) };
2933       ReplaceNode(N, CurDAG->getMachineNode(
2934                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
2935                          MVT::i32, MVT::i32, Ops));
2936       return;
2937     }
2938   }
2939   case ARMISD::SMLAL:{
2940     if (Subtarget->isThumb()) {
2941       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2942                         N->getOperand(3), getAL(CurDAG, dl),
2943                         CurDAG->getRegister(0, MVT::i32)};
2944       ReplaceNode(
2945           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
2946       return;
2947     }else{
2948       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
2949                         N->getOperand(3), getAL(CurDAG, dl),
2950                         CurDAG->getRegister(0, MVT::i32),
2951                         CurDAG->getRegister(0, MVT::i32) };
2952       ReplaceNode(N, CurDAG->getMachineNode(
2953                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
2954                          MVT::i32, MVT::i32, Ops));
2955       return;
2956     }
2957   }
2958   case ARMISD::SUBE: {
2959     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
2960       break;
2961     // Look for a pattern to match SMMLS
2962     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
2963     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
2964         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
2965         !SDValue(N, 1).use_empty())
2966       break;
2967 
2968     if (Subtarget->isThumb())
2969       assert(Subtarget->hasThumb2() &&
2970              "This pattern should not be generated for Thumb");
2971 
2972     SDValue SmulLoHi = N->getOperand(1);
2973     SDValue Subc = N->getOperand(2);
2974     auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
2975 
2976     if (!Zero || Zero->getZExtValue() != 0 ||
2977         Subc.getOperand(1) != SmulLoHi.getValue(0) ||
2978         N->getOperand(1) != SmulLoHi.getValue(1) ||
2979         N->getOperand(2) != Subc.getValue(1))
2980       break;
2981 
2982     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
2983     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
2984                       N->getOperand(0), getAL(CurDAG, dl),
2985                       CurDAG->getRegister(0, MVT::i32) };
2986     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
2987     return;
2988   }
2989   case ISD::LOAD: {
2990     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
2991       if (tryT2IndexedLoad(N))
2992         return;
2993     } else if (Subtarget->isThumb()) {
2994       if (tryT1IndexedLoad(N))
2995         return;
2996     } else if (tryARMIndexedLoad(N))
2997       return;
2998     // Other cases are autogenerated.
2999     break;
3000   }
3001   case ARMISD::WLS: {
3002     SDValue Ops[] = { N->getOperand(1),   // Loop count
3003                       N->getOperand(2),   // Exit target
3004                       N->getOperand(0) };
3005     SDNode *LoopStart =
3006       CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
3007     ReplaceUses(N, LoopStart);
3008     CurDAG->RemoveDeadNode(N);
3009     return;
3010   }
3011   case ARMISD::BRCOND: {
3012     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3013     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3014     // Pattern complexity = 6  cost = 1  size = 0
3015 
3016     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3017     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
3018     // Pattern complexity = 6  cost = 1  size = 0
3019 
3020     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
3021     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
3022     // Pattern complexity = 6  cost = 1  size = 0
3023 
3024     unsigned Opc = Subtarget->isThumb() ?
3025       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
3026     SDValue Chain = N->getOperand(0);
3027     SDValue N1 = N->getOperand(1);
3028     SDValue N2 = N->getOperand(2);
3029     SDValue N3 = N->getOperand(3);
3030     SDValue InFlag = N->getOperand(4);
3031     assert(N1.getOpcode() == ISD::BasicBlock);
3032     assert(N2.getOpcode() == ISD::Constant);
3033     assert(N3.getOpcode() == ISD::Register);
3034 
3035     unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
3036 
3037     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3038       if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
3039         SDValue Int = InFlag.getOperand(0);
3040         uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
3041 
3042         // Handle low-overhead loops.
3043         if (ID == Intrinsic::loop_decrement_reg) {
3044           SDValue Elements = Int.getOperand(2);
3045           SDValue Size = CurDAG->getTargetConstant(
3046             cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
3047                                  MVT::i32);
3048 
3049           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
3050           SDNode *LoopDec =
3051             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
3052                                    CurDAG->getVTList(MVT::i32, MVT::Other),
3053                                    Args);
3054           ReplaceUses(Int.getNode(), LoopDec);
3055 
3056           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
3057           SDNode *LoopEnd =
3058             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
3059 
3060           ReplaceUses(N, LoopEnd);
3061           CurDAG->RemoveDeadNode(N);
3062           CurDAG->RemoveDeadNode(InFlag.getNode());
3063           CurDAG->RemoveDeadNode(Int.getNode());
3064           return;
3065         }
3066       }
3067 
3068       bool SwitchEQNEToPLMI;
3069       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3070       InFlag = N->getOperand(4);
3071 
3072       if (SwitchEQNEToPLMI) {
3073         switch ((ARMCC::CondCodes)CC) {
3074         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3075         case ARMCC::NE:
3076           CC = (unsigned)ARMCC::MI;
3077           break;
3078         case ARMCC::EQ:
3079           CC = (unsigned)ARMCC::PL;
3080           break;
3081         }
3082       }
3083     }
3084 
3085     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
3086     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
3087     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
3088                                              MVT::Glue, Ops);
3089     Chain = SDValue(ResNode, 0);
3090     if (N->getNumValues() == 2) {
3091       InFlag = SDValue(ResNode, 1);
3092       ReplaceUses(SDValue(N, 1), InFlag);
3093     }
3094     ReplaceUses(SDValue(N, 0),
3095                 SDValue(Chain.getNode(), Chain.getResNo()));
3096     CurDAG->RemoveDeadNode(N);
3097     return;
3098   }
3099 
3100   case ARMISD::CMPZ: {
3101     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
3102     //   This allows us to avoid materializing the expensive negative constant.
3103     //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
3104     //   for its glue output.
3105     SDValue X = N->getOperand(0);
3106     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
3107     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
3108       int64_t Addend = -C->getSExtValue();
3109 
3110       SDNode *Add = nullptr;
3111       // ADDS can be better than CMN if the immediate fits in a
3112       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
3113       // Outside that range we can just use a CMN which is 32-bit but has a
3114       // 12-bit immediate range.
3115       if (Addend < 1<<8) {
3116         if (Subtarget->isThumb2()) {
3117           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3118                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3119                             CurDAG->getRegister(0, MVT::i32) };
3120           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
3121         } else {
3122           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
3123           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
3124                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
3125                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3126           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3127         }
3128       }
3129       if (Add) {
3130         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
3131         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
3132       }
3133     }
3134     // Other cases are autogenerated.
3135     break;
3136   }
3137 
3138   case ARMISD::CMOV: {
3139     SDValue InFlag = N->getOperand(4);
3140 
3141     if (InFlag.getOpcode() == ARMISD::CMPZ) {
3142       bool SwitchEQNEToPLMI;
3143       SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
3144 
3145       if (SwitchEQNEToPLMI) {
3146         SDValue ARMcc = N->getOperand(2);
3147         ARMCC::CondCodes CC =
3148           (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
3149 
3150         switch (CC) {
3151         default: llvm_unreachable("CMPZ must be either NE or EQ!");
3152         case ARMCC::NE:
3153           CC = ARMCC::MI;
3154           break;
3155         case ARMCC::EQ:
3156           CC = ARMCC::PL;
3157           break;
3158         }
3159         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
3160         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
3161                          N->getOperand(3), N->getOperand(4)};
3162         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
3163       }
3164 
3165     }
3166     // Other cases are autogenerated.
3167     break;
3168   }
3169 
3170   case ARMISD::VZIP: {
3171     unsigned Opc = 0;
3172     EVT VT = N->getValueType(0);
3173     switch (VT.getSimpleVT().SimpleTy) {
3174     default: return;
3175     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
3176     case MVT::v4f16:
3177     case MVT::v4i16: Opc = ARM::VZIPd16; break;
3178     case MVT::v2f32:
3179     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3180     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3181     case MVT::v16i8: Opc = ARM::VZIPq8; break;
3182     case MVT::v8f16:
3183     case MVT::v8i16: Opc = ARM::VZIPq16; break;
3184     case MVT::v4f32:
3185     case MVT::v4i32: Opc = ARM::VZIPq32; break;
3186     }
3187     SDValue Pred = getAL(CurDAG, dl);
3188     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3189     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3190     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3191     return;
3192   }
3193   case ARMISD::VUZP: {
3194     unsigned Opc = 0;
3195     EVT VT = N->getValueType(0);
3196     switch (VT.getSimpleVT().SimpleTy) {
3197     default: return;
3198     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
3199     case MVT::v4f16:
3200     case MVT::v4i16: Opc = ARM::VUZPd16; break;
3201     case MVT::v2f32:
3202     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
3203     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3204     case MVT::v16i8: Opc = ARM::VUZPq8; break;
3205     case MVT::v8f16:
3206     case MVT::v8i16: Opc = ARM::VUZPq16; break;
3207     case MVT::v4f32:
3208     case MVT::v4i32: Opc = ARM::VUZPq32; break;
3209     }
3210     SDValue Pred = getAL(CurDAG, dl);
3211     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3212     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3213     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3214     return;
3215   }
3216   case ARMISD::VTRN: {
3217     unsigned Opc = 0;
3218     EVT VT = N->getValueType(0);
3219     switch (VT.getSimpleVT().SimpleTy) {
3220     default: return;
3221     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
3222     case MVT::v4f16:
3223     case MVT::v4i16: Opc = ARM::VTRNd16; break;
3224     case MVT::v2f32:
3225     case MVT::v2i32: Opc = ARM::VTRNd32; break;
3226     case MVT::v16i8: Opc = ARM::VTRNq8; break;
3227     case MVT::v8f16:
3228     case MVT::v8i16: Opc = ARM::VTRNq16; break;
3229     case MVT::v4f32:
3230     case MVT::v4i32: Opc = ARM::VTRNq32; break;
3231     }
3232     SDValue Pred = getAL(CurDAG, dl);
3233     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
3234     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
3235     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
3236     return;
3237   }
3238   case ARMISD::BUILD_VECTOR: {
3239     EVT VecVT = N->getValueType(0);
3240     EVT EltVT = VecVT.getVectorElementType();
3241     unsigned NumElts = VecVT.getVectorNumElements();
3242     if (EltVT == MVT::f64) {
3243       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
3244       ReplaceNode(
3245           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3246       return;
3247     }
3248     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
3249     if (NumElts == 2) {
3250       ReplaceNode(
3251           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
3252       return;
3253     }
3254     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
3255     ReplaceNode(N,
3256                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
3257                                     N->getOperand(2), N->getOperand(3)));
3258     return;
3259   }
3260 
3261   case ARMISD::VLD1DUP: {
3262     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
3263                                          ARM::VLD1DUPd32 };
3264     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
3265                                          ARM::VLD1DUPq32 };
3266     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
3267     return;
3268   }
3269 
3270   case ARMISD::VLD2DUP: {
3271     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3272                                         ARM::VLD2DUPd32 };
3273     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
3274     return;
3275   }
3276 
3277   case ARMISD::VLD3DUP: {
3278     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
3279                                         ARM::VLD3DUPd16Pseudo,
3280                                         ARM::VLD3DUPd32Pseudo };
3281     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
3282     return;
3283   }
3284 
3285   case ARMISD::VLD4DUP: {
3286     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
3287                                         ARM::VLD4DUPd16Pseudo,
3288                                         ARM::VLD4DUPd32Pseudo };
3289     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
3290     return;
3291   }
3292 
3293   case ARMISD::VLD1DUP_UPD: {
3294     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
3295                                          ARM::VLD1DUPd16wb_fixed,
3296                                          ARM::VLD1DUPd32wb_fixed };
3297     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
3298                                          ARM::VLD1DUPq16wb_fixed,
3299                                          ARM::VLD1DUPq32wb_fixed };
3300     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
3301     return;
3302   }
3303 
3304   case ARMISD::VLD2DUP_UPD: {
3305     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
3306                                         ARM::VLD2DUPd16wb_fixed,
3307                                         ARM::VLD2DUPd32wb_fixed };
3308     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
3309     return;
3310   }
3311 
3312   case ARMISD::VLD3DUP_UPD: {
3313     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
3314                                         ARM::VLD3DUPd16Pseudo_UPD,
3315                                         ARM::VLD3DUPd32Pseudo_UPD };
3316     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
3317     return;
3318   }
3319 
3320   case ARMISD::VLD4DUP_UPD: {
3321     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
3322                                         ARM::VLD4DUPd16Pseudo_UPD,
3323                                         ARM::VLD4DUPd32Pseudo_UPD };
3324     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
3325     return;
3326   }
3327 
3328   case ARMISD::VLD1_UPD: {
3329     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
3330                                          ARM::VLD1d16wb_fixed,
3331                                          ARM::VLD1d32wb_fixed,
3332                                          ARM::VLD1d64wb_fixed };
3333     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
3334                                          ARM::VLD1q16wb_fixed,
3335                                          ARM::VLD1q32wb_fixed,
3336                                          ARM::VLD1q64wb_fixed };
3337     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
3338     return;
3339   }
3340 
3341   case ARMISD::VLD2_UPD: {
3342     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
3343                                          ARM::VLD2d16wb_fixed,
3344                                          ARM::VLD2d32wb_fixed,
3345                                          ARM::VLD1q64wb_fixed};
3346     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
3347                                          ARM::VLD2q16PseudoWB_fixed,
3348                                          ARM::VLD2q32PseudoWB_fixed };
3349     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
3350     return;
3351   }
3352 
3353   case ARMISD::VLD3_UPD: {
3354     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
3355                                          ARM::VLD3d16Pseudo_UPD,
3356                                          ARM::VLD3d32Pseudo_UPD,
3357                                          ARM::VLD1d64TPseudoWB_fixed};
3358     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3359                                           ARM::VLD3q16Pseudo_UPD,
3360                                           ARM::VLD3q32Pseudo_UPD };
3361     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
3362                                           ARM::VLD3q16oddPseudo_UPD,
3363                                           ARM::VLD3q32oddPseudo_UPD };
3364     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3365     return;
3366   }
3367 
3368   case ARMISD::VLD4_UPD: {
3369     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
3370                                          ARM::VLD4d16Pseudo_UPD,
3371                                          ARM::VLD4d32Pseudo_UPD,
3372                                          ARM::VLD1d64QPseudoWB_fixed};
3373     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3374                                           ARM::VLD4q16Pseudo_UPD,
3375                                           ARM::VLD4q32Pseudo_UPD };
3376     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
3377                                           ARM::VLD4q16oddPseudo_UPD,
3378                                           ARM::VLD4q32oddPseudo_UPD };
3379     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3380     return;
3381   }
3382 
3383   case ARMISD::VLD2LN_UPD: {
3384     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
3385                                          ARM::VLD2LNd16Pseudo_UPD,
3386                                          ARM::VLD2LNd32Pseudo_UPD };
3387     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
3388                                          ARM::VLD2LNq32Pseudo_UPD };
3389     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
3390     return;
3391   }
3392 
3393   case ARMISD::VLD3LN_UPD: {
3394     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
3395                                          ARM::VLD3LNd16Pseudo_UPD,
3396                                          ARM::VLD3LNd32Pseudo_UPD };
3397     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
3398                                          ARM::VLD3LNq32Pseudo_UPD };
3399     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
3400     return;
3401   }
3402 
3403   case ARMISD::VLD4LN_UPD: {
3404     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
3405                                          ARM::VLD4LNd16Pseudo_UPD,
3406                                          ARM::VLD4LNd32Pseudo_UPD };
3407     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
3408                                          ARM::VLD4LNq32Pseudo_UPD };
3409     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
3410     return;
3411   }
3412 
3413   case ARMISD::VST1_UPD: {
3414     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
3415                                          ARM::VST1d16wb_fixed,
3416                                          ARM::VST1d32wb_fixed,
3417                                          ARM::VST1d64wb_fixed };
3418     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
3419                                          ARM::VST1q16wb_fixed,
3420                                          ARM::VST1q32wb_fixed,
3421                                          ARM::VST1q64wb_fixed };
3422     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
3423     return;
3424   }
3425 
3426   case ARMISD::VST2_UPD: {
3427     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
3428                                          ARM::VST2d16wb_fixed,
3429                                          ARM::VST2d32wb_fixed,
3430                                          ARM::VST1q64wb_fixed};
3431     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
3432                                          ARM::VST2q16PseudoWB_fixed,
3433                                          ARM::VST2q32PseudoWB_fixed };
3434     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
3435     return;
3436   }
3437 
3438   case ARMISD::VST3_UPD: {
3439     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
3440                                          ARM::VST3d16Pseudo_UPD,
3441                                          ARM::VST3d32Pseudo_UPD,
3442                                          ARM::VST1d64TPseudoWB_fixed};
3443     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3444                                           ARM::VST3q16Pseudo_UPD,
3445                                           ARM::VST3q32Pseudo_UPD };
3446     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
3447                                           ARM::VST3q16oddPseudo_UPD,
3448                                           ARM::VST3q32oddPseudo_UPD };
3449     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
3450     return;
3451   }
3452 
3453   case ARMISD::VST4_UPD: {
3454     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
3455                                          ARM::VST4d16Pseudo_UPD,
3456                                          ARM::VST4d32Pseudo_UPD,
3457                                          ARM::VST1d64QPseudoWB_fixed};
3458     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3459                                           ARM::VST4q16Pseudo_UPD,
3460                                           ARM::VST4q32Pseudo_UPD };
3461     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
3462                                           ARM::VST4q16oddPseudo_UPD,
3463                                           ARM::VST4q32oddPseudo_UPD };
3464     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
3465     return;
3466   }
3467 
3468   case ARMISD::VST2LN_UPD: {
3469     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
3470                                          ARM::VST2LNd16Pseudo_UPD,
3471                                          ARM::VST2LNd32Pseudo_UPD };
3472     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
3473                                          ARM::VST2LNq32Pseudo_UPD };
3474     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
3475     return;
3476   }
3477 
3478   case ARMISD::VST3LN_UPD: {
3479     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
3480                                          ARM::VST3LNd16Pseudo_UPD,
3481                                          ARM::VST3LNd32Pseudo_UPD };
3482     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
3483                                          ARM::VST3LNq32Pseudo_UPD };
3484     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
3485     return;
3486   }
3487 
3488   case ARMISD::VST4LN_UPD: {
3489     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
3490                                          ARM::VST4LNd16Pseudo_UPD,
3491                                          ARM::VST4LNd32Pseudo_UPD };
3492     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
3493                                          ARM::VST4LNq32Pseudo_UPD };
3494     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
3495     return;
3496   }
3497 
3498   case ISD::INTRINSIC_VOID:
3499   case ISD::INTRINSIC_W_CHAIN: {
3500     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
3501     switch (IntNo) {
3502     default:
3503       break;
3504 
3505     case Intrinsic::arm_mrrc:
3506     case Intrinsic::arm_mrrc2: {
3507       SDLoc dl(N);
3508       SDValue Chain = N->getOperand(0);
3509       unsigned Opc;
3510 
3511       if (Subtarget->isThumb())
3512         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
3513       else
3514         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
3515 
3516       SmallVector<SDValue, 5> Ops;
3517       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
3518       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
3519       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
3520 
3521       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
3522       // instruction will always be '1111' but it is possible in assembly language to specify
3523       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
3524       if (Opc != ARM::MRRC2) {
3525         Ops.push_back(getAL(CurDAG, dl));
3526         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3527       }
3528 
3529       Ops.push_back(Chain);
3530 
3531       // Writes to two registers.
3532       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
3533 
3534       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
3535       return;
3536     }
3537     case Intrinsic::arm_ldaexd:
3538     case Intrinsic::arm_ldrexd: {
3539       SDLoc dl(N);
3540       SDValue Chain = N->getOperand(0);
3541       SDValue MemAddr = N->getOperand(2);
3542       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
3543 
3544       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
3545       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
3546                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
3547 
3548       // arm_ldrexd returns a i64 value in {i32, i32}
3549       std::vector<EVT> ResTys;
3550       if (isThumb) {
3551         ResTys.push_back(MVT::i32);
3552         ResTys.push_back(MVT::i32);
3553       } else
3554         ResTys.push_back(MVT::Untyped);
3555       ResTys.push_back(MVT::Other);
3556 
3557       // Place arguments in the right order.
3558       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
3559                        CurDAG->getRegister(0, MVT::i32), Chain};
3560       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3561       // Transfer memoperands.
3562       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3563       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
3564 
3565       // Remap uses.
3566       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
3567       if (!SDValue(N, 0).use_empty()) {
3568         SDValue Result;
3569         if (isThumb)
3570           Result = SDValue(Ld, 0);
3571         else {
3572           SDValue SubRegIdx =
3573             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
3574           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3575               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3576           Result = SDValue(ResNode,0);
3577         }
3578         ReplaceUses(SDValue(N, 0), Result);
3579       }
3580       if (!SDValue(N, 1).use_empty()) {
3581         SDValue Result;
3582         if (isThumb)
3583           Result = SDValue(Ld, 1);
3584         else {
3585           SDValue SubRegIdx =
3586             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
3587           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
3588               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
3589           Result = SDValue(ResNode,0);
3590         }
3591         ReplaceUses(SDValue(N, 1), Result);
3592       }
3593       ReplaceUses(SDValue(N, 2), OutChain);
3594       CurDAG->RemoveDeadNode(N);
3595       return;
3596     }
3597     case Intrinsic::arm_stlexd:
3598     case Intrinsic::arm_strexd: {
3599       SDLoc dl(N);
3600       SDValue Chain = N->getOperand(0);
3601       SDValue Val0 = N->getOperand(2);
3602       SDValue Val1 = N->getOperand(3);
3603       SDValue MemAddr = N->getOperand(4);
3604 
3605       // Store exclusive double return a i32 value which is the return status
3606       // of the issued store.
3607       const EVT ResTys[] = {MVT::i32, MVT::Other};
3608 
3609       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
3610       // Place arguments in the right order.
3611       SmallVector<SDValue, 7> Ops;
3612       if (isThumb) {
3613         Ops.push_back(Val0);
3614         Ops.push_back(Val1);
3615       } else
3616         // arm_strexd uses GPRPair.
3617         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
3618       Ops.push_back(MemAddr);
3619       Ops.push_back(getAL(CurDAG, dl));
3620       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
3621       Ops.push_back(Chain);
3622 
3623       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
3624       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
3625                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
3626 
3627       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
3628       // Transfer memoperands.
3629       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3630       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
3631 
3632       ReplaceNode(N, St);
3633       return;
3634     }
3635 
3636     case Intrinsic::arm_neon_vld1: {
3637       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
3638                                            ARM::VLD1d32, ARM::VLD1d64 };
3639       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3640                                            ARM::VLD1q32, ARM::VLD1q64};
3641       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
3642       return;
3643     }
3644 
3645     case Intrinsic::arm_neon_vld1x2: {
3646       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
3647                                            ARM::VLD1q32, ARM::VLD1q64 };
3648       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
3649                                            ARM::VLD1d16QPseudo,
3650                                            ARM::VLD1d32QPseudo,
3651                                            ARM::VLD1d64QPseudo };
3652       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3653       return;
3654     }
3655 
3656     case Intrinsic::arm_neon_vld1x3: {
3657       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
3658                                            ARM::VLD1d16TPseudo,
3659                                            ARM::VLD1d32TPseudo,
3660                                            ARM::VLD1d64TPseudo };
3661       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
3662                                             ARM::VLD1q16LowTPseudo_UPD,
3663                                             ARM::VLD1q32LowTPseudo_UPD,
3664                                             ARM::VLD1q64LowTPseudo_UPD };
3665       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
3666                                             ARM::VLD1q16HighTPseudo,
3667                                             ARM::VLD1q32HighTPseudo,
3668                                             ARM::VLD1q64HighTPseudo };
3669       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3670       return;
3671     }
3672 
3673     case Intrinsic::arm_neon_vld1x4: {
3674       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
3675                                            ARM::VLD1d16QPseudo,
3676                                            ARM::VLD1d32QPseudo,
3677                                            ARM::VLD1d64QPseudo };
3678       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
3679                                             ARM::VLD1q16LowQPseudo_UPD,
3680                                             ARM::VLD1q32LowQPseudo_UPD,
3681                                             ARM::VLD1q64LowQPseudo_UPD };
3682       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
3683                                             ARM::VLD1q16HighQPseudo,
3684                                             ARM::VLD1q32HighQPseudo,
3685                                             ARM::VLD1q64HighQPseudo };
3686       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3687       return;
3688     }
3689 
3690     case Intrinsic::arm_neon_vld2: {
3691       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
3692                                            ARM::VLD2d32, ARM::VLD1q64 };
3693       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
3694                                            ARM::VLD2q32Pseudo };
3695       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
3696       return;
3697     }
3698 
3699     case Intrinsic::arm_neon_vld3: {
3700       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
3701                                            ARM::VLD3d16Pseudo,
3702                                            ARM::VLD3d32Pseudo,
3703                                            ARM::VLD1d64TPseudo };
3704       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
3705                                             ARM::VLD3q16Pseudo_UPD,
3706                                             ARM::VLD3q32Pseudo_UPD };
3707       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
3708                                             ARM::VLD3q16oddPseudo,
3709                                             ARM::VLD3q32oddPseudo };
3710       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3711       return;
3712     }
3713 
3714     case Intrinsic::arm_neon_vld4: {
3715       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
3716                                            ARM::VLD4d16Pseudo,
3717                                            ARM::VLD4d32Pseudo,
3718                                            ARM::VLD1d64QPseudo };
3719       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
3720                                             ARM::VLD4q16Pseudo_UPD,
3721                                             ARM::VLD4q32Pseudo_UPD };
3722       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
3723                                             ARM::VLD4q16oddPseudo,
3724                                             ARM::VLD4q32oddPseudo };
3725       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3726       return;
3727     }
3728 
3729     case Intrinsic::arm_neon_vld2dup: {
3730       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
3731                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
3732       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
3733                                             ARM::VLD2DUPq16EvenPseudo,
3734                                             ARM::VLD2DUPq32EvenPseudo };
3735       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
3736                                             ARM::VLD2DUPq16OddPseudo,
3737                                             ARM::VLD2DUPq32OddPseudo };
3738       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
3739                    DOpcodes, QOpcodes0, QOpcodes1);
3740       return;
3741     }
3742 
3743     case Intrinsic::arm_neon_vld3dup: {
3744       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
3745                                            ARM::VLD3DUPd16Pseudo,
3746                                            ARM::VLD3DUPd32Pseudo,
3747                                            ARM::VLD1d64TPseudo };
3748       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
3749                                             ARM::VLD3DUPq16EvenPseudo,
3750                                             ARM::VLD3DUPq32EvenPseudo };
3751       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
3752                                             ARM::VLD3DUPq16OddPseudo,
3753                                             ARM::VLD3DUPq32OddPseudo };
3754       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
3755                    DOpcodes, QOpcodes0, QOpcodes1);
3756       return;
3757     }
3758 
3759     case Intrinsic::arm_neon_vld4dup: {
3760       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
3761                                            ARM::VLD4DUPd16Pseudo,
3762                                            ARM::VLD4DUPd32Pseudo,
3763                                            ARM::VLD1d64QPseudo };
3764       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
3765                                             ARM::VLD4DUPq16EvenPseudo,
3766                                             ARM::VLD4DUPq32EvenPseudo };
3767       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
3768                                             ARM::VLD4DUPq16OddPseudo,
3769                                             ARM::VLD4DUPq32OddPseudo };
3770       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
3771                    DOpcodes, QOpcodes0, QOpcodes1);
3772       return;
3773     }
3774 
3775     case Intrinsic::arm_neon_vld2lane: {
3776       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
3777                                            ARM::VLD2LNd16Pseudo,
3778                                            ARM::VLD2LNd32Pseudo };
3779       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
3780                                            ARM::VLD2LNq32Pseudo };
3781       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
3782       return;
3783     }
3784 
3785     case Intrinsic::arm_neon_vld3lane: {
3786       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
3787                                            ARM::VLD3LNd16Pseudo,
3788                                            ARM::VLD3LNd32Pseudo };
3789       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
3790                                            ARM::VLD3LNq32Pseudo };
3791       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
3792       return;
3793     }
3794 
3795     case Intrinsic::arm_neon_vld4lane: {
3796       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
3797                                            ARM::VLD4LNd16Pseudo,
3798                                            ARM::VLD4LNd32Pseudo };
3799       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
3800                                            ARM::VLD4LNq32Pseudo };
3801       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
3802       return;
3803     }
3804 
3805     case Intrinsic::arm_neon_vst1: {
3806       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
3807                                            ARM::VST1d32, ARM::VST1d64 };
3808       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3809                                            ARM::VST1q32, ARM::VST1q64 };
3810       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
3811       return;
3812     }
3813 
3814     case Intrinsic::arm_neon_vst1x2: {
3815       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
3816                                            ARM::VST1q32, ARM::VST1q64 };
3817       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
3818                                            ARM::VST1d16QPseudo,
3819                                            ARM::VST1d32QPseudo,
3820                                            ARM::VST1d64QPseudo };
3821       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3822       return;
3823     }
3824 
3825     case Intrinsic::arm_neon_vst1x3: {
3826       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
3827                                            ARM::VST1d16TPseudo,
3828                                            ARM::VST1d32TPseudo,
3829                                            ARM::VST1d64TPseudo };
3830       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
3831                                             ARM::VST1q16LowTPseudo_UPD,
3832                                             ARM::VST1q32LowTPseudo_UPD,
3833                                             ARM::VST1q64LowTPseudo_UPD };
3834       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
3835                                             ARM::VST1q16HighTPseudo,
3836                                             ARM::VST1q32HighTPseudo,
3837                                             ARM::VST1q64HighTPseudo };
3838       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3839       return;
3840     }
3841 
3842     case Intrinsic::arm_neon_vst1x4: {
3843       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
3844                                            ARM::VST1d16QPseudo,
3845                                            ARM::VST1d32QPseudo,
3846                                            ARM::VST1d64QPseudo };
3847       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
3848                                             ARM::VST1q16LowQPseudo_UPD,
3849                                             ARM::VST1q32LowQPseudo_UPD,
3850                                             ARM::VST1q64LowQPseudo_UPD };
3851       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
3852                                             ARM::VST1q16HighQPseudo,
3853                                             ARM::VST1q32HighQPseudo,
3854                                             ARM::VST1q64HighQPseudo };
3855       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3856       return;
3857     }
3858 
3859     case Intrinsic::arm_neon_vst2: {
3860       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
3861                                            ARM::VST2d32, ARM::VST1q64 };
3862       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
3863                                            ARM::VST2q32Pseudo };
3864       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
3865       return;
3866     }
3867 
3868     case Intrinsic::arm_neon_vst3: {
3869       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
3870                                            ARM::VST3d16Pseudo,
3871                                            ARM::VST3d32Pseudo,
3872                                            ARM::VST1d64TPseudo };
3873       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
3874                                             ARM::VST3q16Pseudo_UPD,
3875                                             ARM::VST3q32Pseudo_UPD };
3876       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
3877                                             ARM::VST3q16oddPseudo,
3878                                             ARM::VST3q32oddPseudo };
3879       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
3880       return;
3881     }
3882 
3883     case Intrinsic::arm_neon_vst4: {
3884       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
3885                                            ARM::VST4d16Pseudo,
3886                                            ARM::VST4d32Pseudo,
3887                                            ARM::VST1d64QPseudo };
3888       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
3889                                             ARM::VST4q16Pseudo_UPD,
3890                                             ARM::VST4q32Pseudo_UPD };
3891       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
3892                                             ARM::VST4q16oddPseudo,
3893                                             ARM::VST4q32oddPseudo };
3894       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
3895       return;
3896     }
3897 
3898     case Intrinsic::arm_neon_vst2lane: {
3899       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
3900                                            ARM::VST2LNd16Pseudo,
3901                                            ARM::VST2LNd32Pseudo };
3902       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
3903                                            ARM::VST2LNq32Pseudo };
3904       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
3905       return;
3906     }
3907 
3908     case Intrinsic::arm_neon_vst3lane: {
3909       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
3910                                            ARM::VST3LNd16Pseudo,
3911                                            ARM::VST3LNd32Pseudo };
3912       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
3913                                            ARM::VST3LNq32Pseudo };
3914       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
3915       return;
3916     }
3917 
3918     case Intrinsic::arm_neon_vst4lane: {
3919       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
3920                                            ARM::VST4LNd16Pseudo,
3921                                            ARM::VST4LNd32Pseudo };
3922       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
3923                                            ARM::VST4LNq32Pseudo };
3924       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
3925       return;
3926     }
3927     }
3928     break;
3929   }
3930 
3931   case ISD::ATOMIC_CMP_SWAP:
3932     SelectCMP_SWAP(N);
3933     return;
3934   }
3935 
3936   SelectCode(N);
3937 }
3938 
3939 // Inspect a register string of the form
3940 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
3941 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
3942 // and obtain the integer operands from them, adding these operands to the
3943 // provided vector.
3944 static void getIntOperandsFromRegisterString(StringRef RegString,
3945                                              SelectionDAG *CurDAG,
3946                                              const SDLoc &DL,
3947                                              std::vector<SDValue> &Ops) {
3948   SmallVector<StringRef, 5> Fields;
3949   RegString.split(Fields, ':');
3950 
3951   if (Fields.size() > 1) {
3952     bool AllIntFields = true;
3953 
3954     for (StringRef Field : Fields) {
3955       // Need to trim out leading 'cp' characters and get the integer field.
3956       unsigned IntField;
3957       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
3958       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
3959     }
3960 
3961     assert(AllIntFields &&
3962             "Unexpected non-integer value in special register string.");
3963   }
3964 }
3965 
3966 // Maps a Banked Register string to its mask value. The mask value returned is
3967 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
3968 // mask operand, which expresses which register is to be used, e.g. r8, and in
3969 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
3970 // was invalid.
3971 static inline int getBankedRegisterMask(StringRef RegString) {
3972   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
3973   if (!TheReg)
3974      return -1;
3975   return TheReg->Encoding;
3976 }
3977 
3978 // The flags here are common to those allowed for apsr in the A class cores and
3979 // those allowed for the special registers in the M class cores. Returns a
3980 // value representing which flags were present, -1 if invalid.
3981 static inline int getMClassFlagsMask(StringRef Flags) {
3982   return StringSwitch<int>(Flags)
3983           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
3984                          // correct when flags are not permitted
3985           .Case("g", 0x1)
3986           .Case("nzcvq", 0x2)
3987           .Case("nzcvqg", 0x3)
3988           .Default(-1);
3989 }
3990 
3991 // Maps MClass special registers string to its value for use in the
3992 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
3993 // Returns -1 to signify that the string was invalid.
3994 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
3995   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
3996   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
3997   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
3998     return -1;
3999   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
4000 }
4001 
4002 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
4003   // The mask operand contains the special register (R Bit) in bit 4, whether
4004   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
4005   // bits 3-0 contains the fields to be accessed in the special register, set by
4006   // the flags provided with the register.
4007   int Mask = 0;
4008   if (Reg == "apsr") {
4009     // The flags permitted for apsr are the same flags that are allowed in
4010     // M class registers. We get the flag value and then shift the flags into
4011     // the correct place to combine with the mask.
4012     Mask = getMClassFlagsMask(Flags);
4013     if (Mask == -1)
4014       return -1;
4015     return Mask << 2;
4016   }
4017 
4018   if (Reg != "cpsr" && Reg != "spsr") {
4019     return -1;
4020   }
4021 
4022   // This is the same as if the flags were "fc"
4023   if (Flags.empty() || Flags == "all")
4024     return Mask | 0x9;
4025 
4026   // Inspect the supplied flags string and set the bits in the mask for
4027   // the relevant and valid flags allowed for cpsr and spsr.
4028   for (char Flag : Flags) {
4029     int FlagVal;
4030     switch (Flag) {
4031       case 'c':
4032         FlagVal = 0x1;
4033         break;
4034       case 'x':
4035         FlagVal = 0x2;
4036         break;
4037       case 's':
4038         FlagVal = 0x4;
4039         break;
4040       case 'f':
4041         FlagVal = 0x8;
4042         break;
4043       default:
4044         FlagVal = 0;
4045     }
4046 
4047     // This avoids allowing strings where the same flag bit appears twice.
4048     if (!FlagVal || (Mask & FlagVal))
4049       return -1;
4050     Mask |= FlagVal;
4051   }
4052 
4053   // If the register is spsr then we need to set the R bit.
4054   if (Reg == "spsr")
4055     Mask |= 0x10;
4056 
4057   return Mask;
4058 }
4059 
4060 // Lower the read_register intrinsic to ARM specific DAG nodes
4061 // using the supplied metadata string to select the instruction node to use
4062 // and the registers/masks to construct as operands for the node.
4063 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
4064   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4065   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4066   bool IsThumb2 = Subtarget->isThumb2();
4067   SDLoc DL(N);
4068 
4069   std::vector<SDValue> Ops;
4070   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4071 
4072   if (!Ops.empty()) {
4073     // If the special register string was constructed of fields (as defined
4074     // in the ACLE) then need to lower to MRC node (32 bit) or
4075     // MRRC node(64 bit), we can make the distinction based on the number of
4076     // operands we have.
4077     unsigned Opcode;
4078     SmallVector<EVT, 3> ResTypes;
4079     if (Ops.size() == 5){
4080       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
4081       ResTypes.append({ MVT::i32, MVT::Other });
4082     } else {
4083       assert(Ops.size() == 3 &&
4084               "Invalid number of fields in special register string.");
4085       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
4086       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
4087     }
4088 
4089     Ops.push_back(getAL(CurDAG, DL));
4090     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4091     Ops.push_back(N->getOperand(0));
4092     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
4093     return true;
4094   }
4095 
4096   std::string SpecialReg = RegString->getString().lower();
4097 
4098   int BankedReg = getBankedRegisterMask(SpecialReg);
4099   if (BankedReg != -1) {
4100     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
4101             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4102             N->getOperand(0) };
4103     ReplaceNode(
4104         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
4105                                   DL, MVT::i32, MVT::Other, Ops));
4106     return true;
4107   }
4108 
4109   // The VFP registers are read by creating SelectionDAG nodes with opcodes
4110   // corresponding to the register that is being read from. So we switch on the
4111   // string to find which opcode we need to use.
4112   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4113                     .Case("fpscr", ARM::VMRS)
4114                     .Case("fpexc", ARM::VMRS_FPEXC)
4115                     .Case("fpsid", ARM::VMRS_FPSID)
4116                     .Case("mvfr0", ARM::VMRS_MVFR0)
4117                     .Case("mvfr1", ARM::VMRS_MVFR1)
4118                     .Case("mvfr2", ARM::VMRS_MVFR2)
4119                     .Case("fpinst", ARM::VMRS_FPINST)
4120                     .Case("fpinst2", ARM::VMRS_FPINST2)
4121                     .Default(0);
4122 
4123   // If an opcode was found then we can lower the read to a VFP instruction.
4124   if (Opcode) {
4125     if (!Subtarget->hasVFP2Base())
4126       return false;
4127     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
4128       return false;
4129 
4130     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4131             N->getOperand(0) };
4132     ReplaceNode(N,
4133                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
4134     return true;
4135   }
4136 
4137   // If the target is M Class then need to validate that the register string
4138   // is an acceptable value, so check that a mask can be constructed from the
4139   // string.
4140   if (Subtarget->isMClass()) {
4141     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4142     if (SYSmValue == -1)
4143       return false;
4144 
4145     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4146                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4147                       N->getOperand(0) };
4148     ReplaceNode(
4149         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
4150     return true;
4151   }
4152 
4153   // Here we know the target is not M Class so we need to check if it is one
4154   // of the remaining possible values which are apsr, cpsr or spsr.
4155   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
4156     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4157             N->getOperand(0) };
4158     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
4159                                           DL, MVT::i32, MVT::Other, Ops));
4160     return true;
4161   }
4162 
4163   if (SpecialReg == "spsr") {
4164     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4165             N->getOperand(0) };
4166     ReplaceNode(
4167         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
4168                                   MVT::i32, MVT::Other, Ops));
4169     return true;
4170   }
4171 
4172   return false;
4173 }
4174 
4175 // Lower the write_register intrinsic to ARM specific DAG nodes
4176 // using the supplied metadata string to select the instruction node to use
4177 // and the registers/masks to use in the nodes
4178 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
4179   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
4180   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
4181   bool IsThumb2 = Subtarget->isThumb2();
4182   SDLoc DL(N);
4183 
4184   std::vector<SDValue> Ops;
4185   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
4186 
4187   if (!Ops.empty()) {
4188     // If the special register string was constructed of fields (as defined
4189     // in the ACLE) then need to lower to MCR node (32 bit) or
4190     // MCRR node(64 bit), we can make the distinction based on the number of
4191     // operands we have.
4192     unsigned Opcode;
4193     if (Ops.size() == 5) {
4194       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
4195       Ops.insert(Ops.begin()+2, N->getOperand(2));
4196     } else {
4197       assert(Ops.size() == 3 &&
4198               "Invalid number of fields in special register string.");
4199       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
4200       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
4201       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
4202     }
4203 
4204     Ops.push_back(getAL(CurDAG, DL));
4205     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4206     Ops.push_back(N->getOperand(0));
4207 
4208     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4209     return true;
4210   }
4211 
4212   std::string SpecialReg = RegString->getString().lower();
4213   int BankedReg = getBankedRegisterMask(SpecialReg);
4214   if (BankedReg != -1) {
4215     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
4216             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4217             N->getOperand(0) };
4218     ReplaceNode(
4219         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
4220                                   DL, MVT::Other, Ops));
4221     return true;
4222   }
4223 
4224   // The VFP registers are written to by creating SelectionDAG nodes with
4225   // opcodes corresponding to the register that is being written. So we switch
4226   // on the string to find which opcode we need to use.
4227   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
4228                     .Case("fpscr", ARM::VMSR)
4229                     .Case("fpexc", ARM::VMSR_FPEXC)
4230                     .Case("fpsid", ARM::VMSR_FPSID)
4231                     .Case("fpinst", ARM::VMSR_FPINST)
4232                     .Case("fpinst2", ARM::VMSR_FPINST2)
4233                     .Default(0);
4234 
4235   if (Opcode) {
4236     if (!Subtarget->hasVFP2Base())
4237       return false;
4238     Ops = { N->getOperand(2), getAL(CurDAG, DL),
4239             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4240     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
4241     return true;
4242   }
4243 
4244   std::pair<StringRef, StringRef> Fields;
4245   Fields = StringRef(SpecialReg).rsplit('_');
4246   std::string Reg = Fields.first.str();
4247   StringRef Flags = Fields.second;
4248 
4249   // If the target was M Class then need to validate the special register value
4250   // and retrieve the mask for use in the instruction node.
4251   if (Subtarget->isMClass()) {
4252     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
4253     if (SYSmValue == -1)
4254       return false;
4255 
4256     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
4257                       N->getOperand(2), getAL(CurDAG, DL),
4258                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
4259     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
4260     return true;
4261   }
4262 
4263   // We then check to see if a valid mask can be constructed for one of the
4264   // register string values permitted for the A and R class cores. These values
4265   // are apsr, spsr and cpsr; these are also valid on older cores.
4266   int Mask = getARClassRegisterMask(Reg, Flags);
4267   if (Mask != -1) {
4268     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
4269             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
4270             N->getOperand(0) };
4271     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
4272                                           DL, MVT::Other, Ops));
4273     return true;
4274   }
4275 
4276   return false;
4277 }
4278 
4279 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
4280   std::vector<SDValue> AsmNodeOperands;
4281   unsigned Flag, Kind;
4282   bool Changed = false;
4283   unsigned NumOps = N->getNumOperands();
4284 
4285   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
4286   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
4287   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
4288   // respectively. Since there is no constraint to explicitly specify a
4289   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
4290   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
4291   // them into a GPRPair.
4292 
4293   SDLoc dl(N);
4294   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
4295                                    : SDValue(nullptr,0);
4296 
4297   SmallVector<bool, 8> OpChanged;
4298   // Glue node will be appended late.
4299   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
4300     SDValue op = N->getOperand(i);
4301     AsmNodeOperands.push_back(op);
4302 
4303     if (i < InlineAsm::Op_FirstOperand)
4304       continue;
4305 
4306     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
4307       Flag = C->getZExtValue();
4308       Kind = InlineAsm::getKind(Flag);
4309     }
4310     else
4311       continue;
4312 
4313     // Immediate operands to inline asm in the SelectionDAG are modeled with
4314     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
4315     // the second is a constant with the value of the immediate. If we get here
4316     // and we have a Kind_Imm, skip the next operand, and continue.
4317     if (Kind == InlineAsm::Kind_Imm) {
4318       SDValue op = N->getOperand(++i);
4319       AsmNodeOperands.push_back(op);
4320       continue;
4321     }
4322 
4323     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
4324     if (NumRegs)
4325       OpChanged.push_back(false);
4326 
4327     unsigned DefIdx = 0;
4328     bool IsTiedToChangedOp = false;
4329     // If it's a use that is tied with a previous def, it has no
4330     // reg class constraint.
4331     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
4332       IsTiedToChangedOp = OpChanged[DefIdx];
4333 
4334     // Memory operands to inline asm in the SelectionDAG are modeled with two
4335     // operands: a constant of value InlineAsm::Kind_Mem followed by the input
4336     // operand. If we get here and we have a Kind_Mem, skip the next operand (so
4337     // it doesn't get misinterpreted), and continue. We do this here because
4338     // it's important to update the OpChanged array correctly before moving on.
4339     if (Kind == InlineAsm::Kind_Mem) {
4340       SDValue op = N->getOperand(++i);
4341       AsmNodeOperands.push_back(op);
4342       continue;
4343     }
4344 
4345     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
4346         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
4347       continue;
4348 
4349     unsigned RC;
4350     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
4351     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
4352         || NumRegs != 2)
4353       continue;
4354 
4355     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
4356     SDValue V0 = N->getOperand(i+1);
4357     SDValue V1 = N->getOperand(i+2);
4358     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
4359     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
4360     SDValue PairedReg;
4361     MachineRegisterInfo &MRI = MF->getRegInfo();
4362 
4363     if (Kind == InlineAsm::Kind_RegDef ||
4364         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
4365       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
4366       // the original GPRs.
4367 
4368       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4369       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4370       SDValue Chain = SDValue(N,0);
4371 
4372       SDNode *GU = N->getGluedUser();
4373       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
4374                                                Chain.getValue(1));
4375 
4376       // Extract values from a GPRPair reg and copy to the original GPR reg.
4377       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4378                                                     RegCopy);
4379       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4380                                                     RegCopy);
4381       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
4382                                         RegCopy.getValue(1));
4383       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
4384 
4385       // Update the original glue user.
4386       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
4387       Ops.push_back(T1.getValue(1));
4388       CurDAG->UpdateNodeOperands(GU, Ops);
4389     }
4390     else {
4391       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
4392       // GPRPair and then pass the GPRPair to the inline asm.
4393       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
4394 
4395       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
4396       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
4397                                           Chain.getValue(1));
4398       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
4399                                           T0.getValue(1));
4400       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
4401 
4402       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
4403       // i32 VRs of inline asm with it.
4404       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
4405       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
4406       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
4407 
4408       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
4409       Glue = Chain.getValue(1);
4410     }
4411 
4412     Changed = true;
4413 
4414     if(PairedReg.getNode()) {
4415       OpChanged[OpChanged.size() -1 ] = true;
4416       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
4417       if (IsTiedToChangedOp)
4418         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
4419       else
4420         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
4421       // Replace the current flag.
4422       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
4423           Flag, dl, MVT::i32);
4424       // Add the new register node and skip the original two GPRs.
4425       AsmNodeOperands.push_back(PairedReg);
4426       // Skip the next two GPRs.
4427       i += 2;
4428     }
4429   }
4430 
4431   if (Glue.getNode())
4432     AsmNodeOperands.push_back(Glue);
4433   if (!Changed)
4434     return false;
4435 
4436   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
4437       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
4438   New->setNodeId(-1);
4439   ReplaceNode(N, New.getNode());
4440   return true;
4441 }
4442 
4443 
4444 bool ARMDAGToDAGISel::
4445 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
4446                              std::vector<SDValue> &OutOps) {
4447   switch(ConstraintID) {
4448   default:
4449     llvm_unreachable("Unexpected asm memory constraint");
4450   case InlineAsm::Constraint_i:
4451     // FIXME: It seems strange that 'i' is needed here since it's supposed to
4452     //        be an immediate and not a memory constraint.
4453     LLVM_FALLTHROUGH;
4454   case InlineAsm::Constraint_m:
4455   case InlineAsm::Constraint_o:
4456   case InlineAsm::Constraint_Q:
4457   case InlineAsm::Constraint_Um:
4458   case InlineAsm::Constraint_Un:
4459   case InlineAsm::Constraint_Uq:
4460   case InlineAsm::Constraint_Us:
4461   case InlineAsm::Constraint_Ut:
4462   case InlineAsm::Constraint_Uv:
4463   case InlineAsm::Constraint_Uy:
4464     // Require the address to be in a register.  That is safe for all ARM
4465     // variants and it is hard to do anything much smarter without knowing
4466     // how the operand is used.
4467     OutOps.push_back(Op);
4468     return false;
4469   }
4470   return true;
4471 }
4472 
4473 /// createARMISelDag - This pass converts a legalized DAG into a
4474 /// ARM-specific DAG, ready for instruction scheduling.
4475 ///
4476 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
4477                                      CodeGenOpt::Level OptLevel) {
4478   return new ARMDAGToDAGISel(TM, OptLevel);
4479 }
4480