xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (revision 66fd12cf4896eb08ad8e7a2627537f84ead84dd3)
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64MachineFunctionInfo.h"
14 #include "AArch64TargetMachine.h"
15 #include "MCTargetDesc/AArch64AddressingModes.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/IR/Function.h" // To access function attributes.
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsAArch64.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "aarch64-isel"
32 #define PASS_NAME "AArch64 Instruction Selection"
33 
34 //===--------------------------------------------------------------------===//
35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36 /// instructions for SelectionDAG operations.
37 ///
38 namespace {
39 
40 class AArch64DAGToDAGISel : public SelectionDAGISel {
41 
42   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43   /// make the right decision when generating code for different targets.
44   const AArch64Subtarget *Subtarget;
45 
46 public:
47   static char ID;
48 
49   AArch64DAGToDAGISel() = delete;
50 
51   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52                                CodeGenOpt::Level OptLevel)
53       : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54 
55   bool runOnMachineFunction(MachineFunction &MF) override {
56     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57     return SelectionDAGISel::runOnMachineFunction(MF);
58   }
59 
60   void Select(SDNode *Node) override;
61 
62   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63   /// inline asm expressions.
64   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65                                     unsigned ConstraintID,
66                                     std::vector<SDValue> &OutOps) override;
67 
68   template <signed Low, signed High, signed Scale>
69   bool SelectRDVLImm(SDValue N, SDValue &Imm);
70 
71   bool tryMLAV64LaneV128(SDNode *N);
72   bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
73   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
74   bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
76   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
77   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78     return SelectShiftedRegister(N, false, Reg, Shift);
79   }
80   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
81     return SelectShiftedRegister(N, true, Reg, Shift);
82   }
83   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
84     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
85   }
86   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
87     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
88   }
89   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
90     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
91   }
92   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
93     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
94   }
95   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
96     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
97   }
98   bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
99     return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
100   }
101   bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102     return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
103   }
104   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
105     return SelectAddrModeIndexed(N, 1, Base, OffImm);
106   }
107   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
108     return SelectAddrModeIndexed(N, 2, Base, OffImm);
109   }
110   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
111     return SelectAddrModeIndexed(N, 4, Base, OffImm);
112   }
113   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
114     return SelectAddrModeIndexed(N, 8, Base, OffImm);
115   }
116   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
117     return SelectAddrModeIndexed(N, 16, Base, OffImm);
118   }
119   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
120     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
121   }
122   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
123     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
124   }
125   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
126     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
127   }
128   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
129     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
130   }
131   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
132     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
133   }
134   template <unsigned Size, unsigned Max>
135   bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
136     // Test if there is an appropriate addressing mode and check if the
137     // immediate fits.
138     bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
139     if (Found) {
140       if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
141         int64_t C = CI->getSExtValue();
142         if (C <= Max)
143           return true;
144       }
145     }
146 
147     // Otherwise, base only, materialize address in register.
148     Base = N;
149     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
150     return true;
151   }
152 
153   template<int Width>
154   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
155                          SDValue &SignExtend, SDValue &DoShift) {
156     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
157   }
158 
159   template<int Width>
160   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
161                          SDValue &SignExtend, SDValue &DoShift) {
162     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163   }
164 
165   bool SelectExtractHigh(SDValue N, SDValue &Res) {
166     if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
167       N = N->getOperand(0);
168     if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
169         !isa<ConstantSDNode>(N->getOperand(1)))
170       return false;
171     EVT VT = N->getValueType(0);
172     EVT LVT = N->getOperand(0).getValueType();
173     unsigned Index = N->getConstantOperandVal(1);
174     if (!VT.is64BitVector() || !LVT.is128BitVector() ||
175         Index != VT.getVectorNumElements())
176       return false;
177     Res = N->getOperand(0);
178     return true;
179   }
180 
181   bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
182     if (N.getOpcode() != AArch64ISD::VLSHR)
183       return false;
184     SDValue Op = N->getOperand(0);
185     EVT VT = Op.getValueType();
186     unsigned ShtAmt = N->getConstantOperandVal(1);
187     if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
188       return false;
189 
190     APInt Imm;
191     if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
192       Imm = APInt(VT.getScalarSizeInBits(),
193                   Op.getOperand(1).getConstantOperandVal(0)
194                       << Op.getOperand(1).getConstantOperandVal(1));
195     else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
196              isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
197       Imm = APInt(VT.getScalarSizeInBits(),
198                   Op.getOperand(1).getConstantOperandVal(0));
199     else
200       return false;
201 
202     if (Imm != 1ULL << (ShtAmt - 1))
203       return false;
204 
205     Res1 = Op.getOperand(0);
206     Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
207     return true;
208   }
209 
210   bool SelectDupZeroOrUndef(SDValue N) {
211     switch(N->getOpcode()) {
212     case ISD::UNDEF:
213       return true;
214     case AArch64ISD::DUP:
215     case ISD::SPLAT_VECTOR: {
216       auto Opnd0 = N->getOperand(0);
217       if (isNullConstant(Opnd0))
218         return true;
219       if (isNullFPConstant(Opnd0))
220         return true;
221       break;
222     }
223     default:
224       break;
225     }
226 
227     return false;
228   }
229 
230   bool SelectDupZero(SDValue N) {
231     switch(N->getOpcode()) {
232     case AArch64ISD::DUP:
233     case ISD::SPLAT_VECTOR: {
234       auto Opnd0 = N->getOperand(0);
235       if (isNullConstant(Opnd0))
236         return true;
237       if (isNullFPConstant(Opnd0))
238         return true;
239       break;
240     }
241     }
242 
243     return false;
244   }
245 
246   template<MVT::SimpleValueType VT>
247   bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
248     return SelectSVEAddSubImm(N, VT, Imm, Shift);
249   }
250 
251   template <MVT::SimpleValueType VT>
252   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
253     return SelectSVECpyDupImm(N, VT, Imm, Shift);
254   }
255 
256   template <MVT::SimpleValueType VT, bool Invert = false>
257   bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
258     return SelectSVELogicalImm(N, VT, Imm, Invert);
259   }
260 
261   template <MVT::SimpleValueType VT>
262   bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
263     return SelectSVEArithImm(N, VT, Imm);
264   }
265 
266   template <unsigned Low, unsigned High, bool AllowSaturation = false>
267   bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
268     return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
269   }
270 
271   bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
272     if (N->getOpcode() != ISD::SPLAT_VECTOR)
273       return false;
274 
275     EVT EltVT = N->getValueType(0).getVectorElementType();
276     return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
277                              /* High */ EltVT.getFixedSizeInBits(),
278                              /* AllowSaturation */ true, Imm);
279   }
280 
281   // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
282   template<signed Min, signed Max, signed Scale, bool Shift>
283   bool SelectCntImm(SDValue N, SDValue &Imm) {
284     if (!isa<ConstantSDNode>(N))
285       return false;
286 
287     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
288     if (Shift)
289       MulImm = 1LL << MulImm;
290 
291     if ((MulImm % std::abs(Scale)) != 0)
292       return false;
293 
294     MulImm /= Scale;
295     if ((MulImm >= Min) && (MulImm <= Max)) {
296       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
297       return true;
298     }
299 
300     return false;
301   }
302 
303   template <signed Max, signed Scale>
304   bool SelectEXTImm(SDValue N, SDValue &Imm) {
305     if (!isa<ConstantSDNode>(N))
306       return false;
307 
308     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
309 
310     if (MulImm >= 0 && MulImm <= Max) {
311       MulImm *= Scale;
312       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
313       return true;
314     }
315 
316     return false;
317   }
318 
319   template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
320     if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
321       uint64_t C = CI->getZExtValue();
322       Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
323       return true;
324     }
325     return false;
326   }
327 
328   /// Form sequences of consecutive 64/128-bit registers for use in NEON
329   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
330   /// between 1 and 4 elements. If it contains a single element that is returned
331   /// unchanged; otherwise a REG_SEQUENCE value is returned.
332   SDValue createDTuple(ArrayRef<SDValue> Vecs);
333   SDValue createQTuple(ArrayRef<SDValue> Vecs);
334   // Form a sequence of SVE registers for instructions using list of vectors,
335   // e.g. structured loads and stores (ldN, stN).
336   SDValue createZTuple(ArrayRef<SDValue> Vecs);
337 
338   /// Generic helper for the createDTuple/createQTuple
339   /// functions. Those should almost always be called instead.
340   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
341                       const unsigned SubRegs[]);
342 
343   void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
344 
345   bool tryIndexedLoad(SDNode *N);
346 
347   bool trySelectStackSlotTagP(SDNode *N);
348   void SelectTagP(SDNode *N);
349 
350   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
351                      unsigned SubRegIdx);
352   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
353                          unsigned SubRegIdx);
354   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
355   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
356   void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
357                             unsigned Opc_rr, unsigned Opc_ri,
358                             bool IsIntr = false);
359   void SelectWhilePair(SDNode *N, unsigned Opc);
360   void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
361 
362   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
363   /// SVE Reg+Imm addressing mode.
364   template <int64_t Min, int64_t Max>
365   bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
366                                 SDValue &OffImm);
367   /// SVE Reg+Reg address mode.
368   template <unsigned Scale>
369   bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
370     return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
371   }
372 
373   template <unsigned MaxIdx, unsigned Scale>
374   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
375     return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
376   }
377 
378   void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
379   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
380   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
382   void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
383                              unsigned Opc_rr, unsigned Opc_ri);
384   std::tuple<unsigned, SDValue, SDValue>
385   findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
386                            const SDValue &OldBase, const SDValue &OldOffset,
387                            unsigned Scale);
388 
389   bool tryBitfieldExtractOp(SDNode *N);
390   bool tryBitfieldExtractOpFromSExt(SDNode *N);
391   bool tryBitfieldInsertOp(SDNode *N);
392   bool tryBitfieldInsertInZeroOp(SDNode *N);
393   bool tryShiftAmountMod(SDNode *N);
394   bool tryHighFPExt(SDNode *N);
395 
396   bool tryReadRegister(SDNode *N);
397   bool tryWriteRegister(SDNode *N);
398 
399 // Include the pieces autogenerated from the target description.
400 #include "AArch64GenDAGISel.inc"
401 
402 private:
403   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
404                              SDValue &Shift);
405   bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
406   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
407                                SDValue &OffImm) {
408     return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
409   }
410   bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
411                                      unsigned Size, SDValue &Base,
412                                      SDValue &OffImm);
413   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
414                              SDValue &OffImm);
415   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
416                               SDValue &OffImm);
417   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
418                          SDValue &Offset, SDValue &SignExtend,
419                          SDValue &DoShift);
420   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
421                          SDValue &Offset, SDValue &SignExtend,
422                          SDValue &DoShift);
423   bool isWorthFolding(SDValue V) const;
424   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
425                          SDValue &Offset, SDValue &SignExtend);
426 
427   template<unsigned RegWidth>
428   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
429     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
430   }
431 
432   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
433 
434   bool SelectCMP_SWAP(SDNode *N);
435 
436   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
437   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
438   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
439 
440   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
441   bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
442                          bool AllowSaturation, SDValue &Imm);
443 
444   bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
445   bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
446                                SDValue &Offset);
447   bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
448                           SDValue &Offset, unsigned Scale = 1);
449 
450   bool SelectAllActivePredicate(SDValue N);
451 };
452 } // end anonymous namespace
453 
454 char AArch64DAGToDAGISel::ID = 0;
455 
456 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
457 
458 /// isIntImmediate - This method tests to see if the node is a constant
459 /// operand. If so Imm will receive the 32-bit value.
460 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
461   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
462     Imm = C->getZExtValue();
463     return true;
464   }
465   return false;
466 }
467 
468 // isIntImmediate - This method tests to see if a constant operand.
469 // If so Imm will receive the value.
470 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
471   return isIntImmediate(N.getNode(), Imm);
472 }
473 
474 // isOpcWithIntImmediate - This method tests to see if the node is a specific
475 // opcode and that it has a immediate integer right operand.
476 // If so Imm will receive the 32 bit value.
477 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
478                                   uint64_t &Imm) {
479   return N->getOpcode() == Opc &&
480          isIntImmediate(N->getOperand(1).getNode(), Imm);
481 }
482 
483 // isIntImmediateEq - This method tests to see if N is a constant operand that
484 // is equivalent to 'ImmExpected'.
485 #ifndef NDEBUG
486 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
487   uint64_t Imm;
488   if (!isIntImmediate(N.getNode(), Imm))
489     return false;
490   return Imm == ImmExpected;
491 }
492 #endif
493 
494 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
495     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
496   switch(ConstraintID) {
497   default:
498     llvm_unreachable("Unexpected asm memory constraint");
499   case InlineAsm::Constraint_m:
500   case InlineAsm::Constraint_o:
501   case InlineAsm::Constraint_Q:
502     // We need to make sure that this one operand does not end up in XZR, thus
503     // require the address to be in a PointerRegClass register.
504     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
505     const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
506     SDLoc dl(Op);
507     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
508     SDValue NewOp =
509         SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
510                                        dl, Op.getValueType(),
511                                        Op, RC), 0);
512     OutOps.push_back(NewOp);
513     return false;
514   }
515   return true;
516 }
517 
518 /// SelectArithImmed - Select an immediate value that can be represented as
519 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
520 /// Val set to the 12-bit value and Shift set to the shifter operand.
521 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
522                                            SDValue &Shift) {
523   // This function is called from the addsub_shifted_imm ComplexPattern,
524   // which lists [imm] as the list of opcode it's interested in, however
525   // we still need to check whether the operand is actually an immediate
526   // here because the ComplexPattern opcode list is only used in
527   // root-level opcode matching.
528   if (!isa<ConstantSDNode>(N.getNode()))
529     return false;
530 
531   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
532   unsigned ShiftAmt;
533 
534   if (Immed >> 12 == 0) {
535     ShiftAmt = 0;
536   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
537     ShiftAmt = 12;
538     Immed = Immed >> 12;
539   } else
540     return false;
541 
542   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
543   SDLoc dl(N);
544   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
545   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
546   return true;
547 }
548 
549 /// SelectNegArithImmed - As above, but negates the value before trying to
550 /// select it.
551 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
552                                               SDValue &Shift) {
553   // This function is called from the addsub_shifted_imm ComplexPattern,
554   // which lists [imm] as the list of opcode it's interested in, however
555   // we still need to check whether the operand is actually an immediate
556   // here because the ComplexPattern opcode list is only used in
557   // root-level opcode matching.
558   if (!isa<ConstantSDNode>(N.getNode()))
559     return false;
560 
561   // The immediate operand must be a 24-bit zero-extended immediate.
562   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
563 
564   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
565   // have the opposite effect on the C flag, so this pattern mustn't match under
566   // those circumstances.
567   if (Immed == 0)
568     return false;
569 
570   if (N.getValueType() == MVT::i32)
571     Immed = ~((uint32_t)Immed) + 1;
572   else
573     Immed = ~Immed + 1ULL;
574   if (Immed & 0xFFFFFFFFFF000000ULL)
575     return false;
576 
577   Immed &= 0xFFFFFFULL;
578   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
579                           Shift);
580 }
581 
582 /// getShiftTypeForNode - Translate a shift node to the corresponding
583 /// ShiftType value.
584 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
585   switch (N.getOpcode()) {
586   default:
587     return AArch64_AM::InvalidShiftExtend;
588   case ISD::SHL:
589     return AArch64_AM::LSL;
590   case ISD::SRL:
591     return AArch64_AM::LSR;
592   case ISD::SRA:
593     return AArch64_AM::ASR;
594   case ISD::ROTR:
595     return AArch64_AM::ROR;
596   }
597 }
598 
599 /// Determine whether it is worth it to fold SHL into the addressing
600 /// mode.
601 static bool isWorthFoldingSHL(SDValue V) {
602   assert(V.getOpcode() == ISD::SHL && "invalid opcode");
603   // It is worth folding logical shift of up to three places.
604   auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
605   if (!CSD)
606     return false;
607   unsigned ShiftVal = CSD->getZExtValue();
608   if (ShiftVal > 3)
609     return false;
610 
611   // Check if this particular node is reused in any non-memory related
612   // operation.  If yes, do not try to fold this node into the address
613   // computation, since the computation will be kept.
614   const SDNode *Node = V.getNode();
615   for (SDNode *UI : Node->uses())
616     if (!isa<MemSDNode>(*UI))
617       for (SDNode *UII : UI->uses())
618         if (!isa<MemSDNode>(*UII))
619           return false;
620   return true;
621 }
622 
623 /// Determine whether it is worth to fold V into an extended register.
624 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
625   // Trivial if we are optimizing for code size or if there is only
626   // one use of the value.
627   if (CurDAG->shouldOptForSize() || V.hasOneUse())
628     return true;
629   // If a subtarget has a fastpath LSL we can fold a logical shift into
630   // the addressing mode and save a cycle.
631   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
632       isWorthFoldingSHL(V))
633     return true;
634   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
635     const SDValue LHS = V.getOperand(0);
636     const SDValue RHS = V.getOperand(1);
637     if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
638       return true;
639     if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
640       return true;
641   }
642 
643   // It hurts otherwise, since the value will be reused.
644   return false;
645 }
646 
647 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
648 /// to select more shifted register
649 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
650                                                        SDValue &Shift) {
651   EVT VT = N.getValueType();
652   if (VT != MVT::i32 && VT != MVT::i64)
653     return false;
654 
655   if (N->getOpcode() != ISD::AND || !N->hasOneUse())
656     return false;
657   SDValue LHS = N.getOperand(0);
658   if (!LHS->hasOneUse())
659     return false;
660 
661   unsigned LHSOpcode = LHS->getOpcode();
662   if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
663     return false;
664 
665   ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
666   if (!ShiftAmtNode)
667     return false;
668 
669   uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
670   ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
671   if (!RHSC)
672     return false;
673 
674   APInt AndMask = RHSC->getAPIntValue();
675   unsigned LowZBits, MaskLen;
676   if (!AndMask.isShiftedMask(LowZBits, MaskLen))
677     return false;
678 
679   unsigned BitWidth = N.getValueSizeInBits();
680   SDLoc DL(LHS);
681   uint64_t NewShiftC;
682   unsigned NewShiftOp;
683   if (LHSOpcode == ISD::SHL) {
684     // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
685     // BitWidth != LowZBits + MaskLen doesn't match the pattern
686     if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
687       return false;
688 
689     NewShiftC = LowZBits - ShiftAmtC;
690     NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
691   } else {
692     if (LowZBits == 0)
693       return false;
694 
695     // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
696     NewShiftC = LowZBits + ShiftAmtC;
697     if (NewShiftC >= BitWidth)
698       return false;
699 
700     // SRA need all high bits
701     if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
702       return false;
703 
704     // SRL high bits can be 0 or 1
705     if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
706       return false;
707 
708     if (LHSOpcode == ISD::SRL)
709       NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
710     else
711       NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
712   }
713 
714   assert(NewShiftC < BitWidth && "Invalid shift amount");
715   SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
716   SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
717   Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
718                                        NewShiftAmt, BitWidthMinus1),
719                 0);
720   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
721   Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
722   return true;
723 }
724 
725 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
726 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
727 /// instructions allow the shifted register to be rotated, but the arithmetic
728 /// instructions do not.  The AllowROR parameter specifies whether ROR is
729 /// supported.
730 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
731                                                 SDValue &Reg, SDValue &Shift) {
732   if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
733     return true;
734 
735   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
736   if (ShType == AArch64_AM::InvalidShiftExtend)
737     return false;
738   if (!AllowROR && ShType == AArch64_AM::ROR)
739     return false;
740 
741   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
742     unsigned BitSize = N.getValueSizeInBits();
743     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
744     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
745 
746     Reg = N.getOperand(0);
747     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
748     return isWorthFolding(N);
749   }
750 
751   return false;
752 }
753 
754 /// getExtendTypeForNode - Translate an extend node to the corresponding
755 /// ExtendType value.
756 static AArch64_AM::ShiftExtendType
757 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
758   if (N.getOpcode() == ISD::SIGN_EXTEND ||
759       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
760     EVT SrcVT;
761     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
762       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
763     else
764       SrcVT = N.getOperand(0).getValueType();
765 
766     if (!IsLoadStore && SrcVT == MVT::i8)
767       return AArch64_AM::SXTB;
768     else if (!IsLoadStore && SrcVT == MVT::i16)
769       return AArch64_AM::SXTH;
770     else if (SrcVT == MVT::i32)
771       return AArch64_AM::SXTW;
772     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
773 
774     return AArch64_AM::InvalidShiftExtend;
775   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
776              N.getOpcode() == ISD::ANY_EXTEND) {
777     EVT SrcVT = N.getOperand(0).getValueType();
778     if (!IsLoadStore && SrcVT == MVT::i8)
779       return AArch64_AM::UXTB;
780     else if (!IsLoadStore && SrcVT == MVT::i16)
781       return AArch64_AM::UXTH;
782     else if (SrcVT == MVT::i32)
783       return AArch64_AM::UXTW;
784     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
785 
786     return AArch64_AM::InvalidShiftExtend;
787   } else if (N.getOpcode() == ISD::AND) {
788     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
789     if (!CSD)
790       return AArch64_AM::InvalidShiftExtend;
791     uint64_t AndMask = CSD->getZExtValue();
792 
793     switch (AndMask) {
794     default:
795       return AArch64_AM::InvalidShiftExtend;
796     case 0xFF:
797       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
798     case 0xFFFF:
799       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
800     case 0xFFFFFFFF:
801       return AArch64_AM::UXTW;
802     }
803   }
804 
805   return AArch64_AM::InvalidShiftExtend;
806 }
807 
808 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
809 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
810   if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
811       DL->getOpcode() != AArch64ISD::DUPLANE32)
812     return false;
813 
814   SDValue SV = DL->getOperand(0);
815   if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
816     return false;
817 
818   SDValue EV = SV.getOperand(1);
819   if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
820     return false;
821 
822   ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
823   ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
824   LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
825   LaneOp = EV.getOperand(0);
826 
827   return true;
828 }
829 
830 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
831 // high lane extract.
832 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
833                              SDValue &LaneOp, int &LaneIdx) {
834 
835   if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
836     std::swap(Op0, Op1);
837     if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
838       return false;
839   }
840   StdOp = Op1;
841   return true;
842 }
843 
844 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
845 /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
846 /// so that we don't emit unnecessary lane extracts.
847 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
848   SDLoc dl(N);
849   SDValue Op0 = N->getOperand(0);
850   SDValue Op1 = N->getOperand(1);
851   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
852   SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
853   int LaneIdx = -1; // Will hold the lane index.
854 
855   if (Op1.getOpcode() != ISD::MUL ||
856       !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
857                         LaneIdx)) {
858     std::swap(Op0, Op1);
859     if (Op1.getOpcode() != ISD::MUL ||
860         !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
861                           LaneIdx))
862       return false;
863   }
864 
865   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
866 
867   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
868 
869   unsigned MLAOpc = ~0U;
870 
871   switch (N->getSimpleValueType(0).SimpleTy) {
872   default:
873     llvm_unreachable("Unrecognized MLA.");
874   case MVT::v4i16:
875     MLAOpc = AArch64::MLAv4i16_indexed;
876     break;
877   case MVT::v8i16:
878     MLAOpc = AArch64::MLAv8i16_indexed;
879     break;
880   case MVT::v2i32:
881     MLAOpc = AArch64::MLAv2i32_indexed;
882     break;
883   case MVT::v4i32:
884     MLAOpc = AArch64::MLAv4i32_indexed;
885     break;
886   }
887 
888   ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
889   return true;
890 }
891 
892 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
893   SDLoc dl(N);
894   SDValue SMULLOp0;
895   SDValue SMULLOp1;
896   int LaneIdx;
897 
898   if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
899                         LaneIdx))
900     return false;
901 
902   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
903 
904   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
905 
906   unsigned SMULLOpc = ~0U;
907 
908   if (IntNo == Intrinsic::aarch64_neon_smull) {
909     switch (N->getSimpleValueType(0).SimpleTy) {
910     default:
911       llvm_unreachable("Unrecognized SMULL.");
912     case MVT::v4i32:
913       SMULLOpc = AArch64::SMULLv4i16_indexed;
914       break;
915     case MVT::v2i64:
916       SMULLOpc = AArch64::SMULLv2i32_indexed;
917       break;
918     }
919   } else if (IntNo == Intrinsic::aarch64_neon_umull) {
920     switch (N->getSimpleValueType(0).SimpleTy) {
921     default:
922       llvm_unreachable("Unrecognized SMULL.");
923     case MVT::v4i32:
924       SMULLOpc = AArch64::UMULLv4i16_indexed;
925       break;
926     case MVT::v2i64:
927       SMULLOpc = AArch64::UMULLv2i32_indexed;
928       break;
929     }
930   } else
931     llvm_unreachable("Unrecognized intrinsic.");
932 
933   ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
934   return true;
935 }
936 
937 /// Instructions that accept extend modifiers like UXTW expect the register
938 /// being extended to be a GPR32, but the incoming DAG might be acting on a
939 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
940 /// this is the case.
941 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
942   if (N.getValueType() == MVT::i32)
943     return N;
944 
945   SDLoc dl(N);
946   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
947   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
948                                                dl, MVT::i32, N, SubReg);
949   return SDValue(Node, 0);
950 }
951 
952 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
953 template<signed Low, signed High, signed Scale>
954 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
955   if (!isa<ConstantSDNode>(N))
956     return false;
957 
958   int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
959   if ((MulImm % std::abs(Scale)) == 0) {
960     int64_t RDVLImm = MulImm / Scale;
961     if ((RDVLImm >= Low) && (RDVLImm <= High)) {
962       Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
963       return true;
964     }
965   }
966 
967   return false;
968 }
969 
970 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
971 /// operand folds in an extend followed by an optional left shift.
972 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
973                                                       SDValue &Shift) {
974   unsigned ShiftVal = 0;
975   AArch64_AM::ShiftExtendType Ext;
976 
977   if (N.getOpcode() == ISD::SHL) {
978     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
979     if (!CSD)
980       return false;
981     ShiftVal = CSD->getZExtValue();
982     if (ShiftVal > 4)
983       return false;
984 
985     Ext = getExtendTypeForNode(N.getOperand(0));
986     if (Ext == AArch64_AM::InvalidShiftExtend)
987       return false;
988 
989     Reg = N.getOperand(0).getOperand(0);
990   } else {
991     Ext = getExtendTypeForNode(N);
992     if (Ext == AArch64_AM::InvalidShiftExtend)
993       return false;
994 
995     Reg = N.getOperand(0);
996 
997     // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
998     // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
999     auto isDef32 = [](SDValue N) {
1000       unsigned Opc = N.getOpcode();
1001       return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1002              Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1003              Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1004              Opc != ISD::FREEZE;
1005     };
1006     if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1007         isDef32(Reg))
1008       return false;
1009   }
1010 
1011   // AArch64 mandates that the RHS of the operation must use the smallest
1012   // register class that could contain the size being extended from.  Thus,
1013   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1014   // there might not be an actual 32-bit value in the program.  We can
1015   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1016   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1017   Reg = narrowIfNeeded(CurDAG, Reg);
1018   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1019                                     MVT::i32);
1020   return isWorthFolding(N);
1021 }
1022 
1023 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1024 /// operand is refered by the instructions have SP operand
1025 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1026                                                   SDValue &Shift) {
1027   unsigned ShiftVal = 0;
1028   AArch64_AM::ShiftExtendType Ext;
1029 
1030   if (N.getOpcode() != ISD::SHL)
1031     return false;
1032 
1033   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1034   if (!CSD)
1035     return false;
1036   ShiftVal = CSD->getZExtValue();
1037   if (ShiftVal > 4)
1038     return false;
1039 
1040   Ext = AArch64_AM::UXTX;
1041   Reg = N.getOperand(0);
1042   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1043                                     MVT::i32);
1044   return isWorthFolding(N);
1045 }
1046 
1047 /// If there's a use of this ADDlow that's not itself a load/store then we'll
1048 /// need to create a real ADD instruction from it anyway and there's no point in
1049 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1050 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1051 /// leads to duplicated ADRP instructions.
1052 static bool isWorthFoldingADDlow(SDValue N) {
1053   for (auto *Use : N->uses()) {
1054     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1055         Use->getOpcode() != ISD::ATOMIC_LOAD &&
1056         Use->getOpcode() != ISD::ATOMIC_STORE)
1057       return false;
1058 
1059     // ldar and stlr have much more restrictive addressing modes (just a
1060     // register).
1061     if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1062       return false;
1063   }
1064 
1065   return true;
1066 }
1067 
1068 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1069 /// immediate" address.  The "Size" argument is the size in bytes of the memory
1070 /// reference, which determines the scale.
1071 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1072                                                         unsigned BW, unsigned Size,
1073                                                         SDValue &Base,
1074                                                         SDValue &OffImm) {
1075   SDLoc dl(N);
1076   const DataLayout &DL = CurDAG->getDataLayout();
1077   const TargetLowering *TLI = getTargetLowering();
1078   if (N.getOpcode() == ISD::FrameIndex) {
1079     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1080     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1081     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1082     return true;
1083   }
1084 
1085   // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1086   // selected here doesn't support labels/immediates, only base+offset.
1087   if (CurDAG->isBaseWithConstantOffset(N)) {
1088     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1089       if (IsSignedImm) {
1090         int64_t RHSC = RHS->getSExtValue();
1091         unsigned Scale = Log2_32(Size);
1092         int64_t Range = 0x1LL << (BW - 1);
1093 
1094         if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1095             RHSC < (Range << Scale)) {
1096           Base = N.getOperand(0);
1097           if (Base.getOpcode() == ISD::FrameIndex) {
1098             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1099             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1100           }
1101           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1102           return true;
1103         }
1104       } else {
1105         // unsigned Immediate
1106         uint64_t RHSC = RHS->getZExtValue();
1107         unsigned Scale = Log2_32(Size);
1108         uint64_t Range = 0x1ULL << BW;
1109 
1110         if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1111           Base = N.getOperand(0);
1112           if (Base.getOpcode() == ISD::FrameIndex) {
1113             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1114             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1115           }
1116           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1117           return true;
1118         }
1119       }
1120     }
1121   }
1122   // Base only. The address will be materialized into a register before
1123   // the memory is accessed.
1124   //    add x0, Xbase, #offset
1125   //    stp x1, x2, [x0]
1126   Base = N;
1127   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1128   return true;
1129 }
1130 
1131 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1132 /// immediate" address.  The "Size" argument is the size in bytes of the memory
1133 /// reference, which determines the scale.
1134 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1135                                               SDValue &Base, SDValue &OffImm) {
1136   SDLoc dl(N);
1137   const DataLayout &DL = CurDAG->getDataLayout();
1138   const TargetLowering *TLI = getTargetLowering();
1139   if (N.getOpcode() == ISD::FrameIndex) {
1140     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1141     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1142     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1143     return true;
1144   }
1145 
1146   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1147     GlobalAddressSDNode *GAN =
1148         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1149     Base = N.getOperand(0);
1150     OffImm = N.getOperand(1);
1151     if (!GAN)
1152       return true;
1153 
1154     if (GAN->getOffset() % Size == 0 &&
1155         GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1156       return true;
1157   }
1158 
1159   if (CurDAG->isBaseWithConstantOffset(N)) {
1160     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1161       int64_t RHSC = (int64_t)RHS->getZExtValue();
1162       unsigned Scale = Log2_32(Size);
1163       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1164         Base = N.getOperand(0);
1165         if (Base.getOpcode() == ISD::FrameIndex) {
1166           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1167           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1168         }
1169         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1170         return true;
1171       }
1172     }
1173   }
1174 
1175   // Before falling back to our general case, check if the unscaled
1176   // instructions can handle this. If so, that's preferable.
1177   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1178     return false;
1179 
1180   // Base only. The address will be materialized into a register before
1181   // the memory is accessed.
1182   //    add x0, Xbase, #offset
1183   //    ldr x0, [x0]
1184   Base = N;
1185   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1186   return true;
1187 }
1188 
1189 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1190 /// immediate" address.  This should only match when there is an offset that
1191 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
1192 /// is the size in bytes of the memory reference, which is needed here to know
1193 /// what is valid for a scaled immediate.
1194 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1195                                                  SDValue &Base,
1196                                                  SDValue &OffImm) {
1197   if (!CurDAG->isBaseWithConstantOffset(N))
1198     return false;
1199   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1200     int64_t RHSC = RHS->getSExtValue();
1201     // If the offset is valid as a scaled immediate, don't match here.
1202     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1203         RHSC < (0x1000 << Log2_32(Size)))
1204       return false;
1205     if (RHSC >= -256 && RHSC < 256) {
1206       Base = N.getOperand(0);
1207       if (Base.getOpcode() == ISD::FrameIndex) {
1208         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1209         const TargetLowering *TLI = getTargetLowering();
1210         Base = CurDAG->getTargetFrameIndex(
1211             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1212       }
1213       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1214       return true;
1215     }
1216   }
1217   return false;
1218 }
1219 
1220 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1221   SDLoc dl(N);
1222   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1223   SDValue ImpDef = SDValue(
1224       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1225   MachineSDNode *Node = CurDAG->getMachineNode(
1226       TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
1227   return SDValue(Node, 0);
1228 }
1229 
1230 /// Check if the given SHL node (\p N), can be used to form an
1231 /// extended register for an addressing mode.
1232 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1233                                             bool WantExtend, SDValue &Offset,
1234                                             SDValue &SignExtend) {
1235   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1236   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1237   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1238     return false;
1239 
1240   SDLoc dl(N);
1241   if (WantExtend) {
1242     AArch64_AM::ShiftExtendType Ext =
1243         getExtendTypeForNode(N.getOperand(0), true);
1244     if (Ext == AArch64_AM::InvalidShiftExtend)
1245       return false;
1246 
1247     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1248     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1249                                            MVT::i32);
1250   } else {
1251     Offset = N.getOperand(0);
1252     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1253   }
1254 
1255   unsigned LegalShiftVal = Log2_32(Size);
1256   unsigned ShiftVal = CSD->getZExtValue();
1257 
1258   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1259     return false;
1260 
1261   return isWorthFolding(N);
1262 }
1263 
1264 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1265                                             SDValue &Base, SDValue &Offset,
1266                                             SDValue &SignExtend,
1267                                             SDValue &DoShift) {
1268   if (N.getOpcode() != ISD::ADD)
1269     return false;
1270   SDValue LHS = N.getOperand(0);
1271   SDValue RHS = N.getOperand(1);
1272   SDLoc dl(N);
1273 
1274   // We don't want to match immediate adds here, because they are better lowered
1275   // to the register-immediate addressing modes.
1276   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1277     return false;
1278 
1279   // Check if this particular node is reused in any non-memory related
1280   // operation.  If yes, do not try to fold this node into the address
1281   // computation, since the computation will be kept.
1282   const SDNode *Node = N.getNode();
1283   for (SDNode *UI : Node->uses()) {
1284     if (!isa<MemSDNode>(*UI))
1285       return false;
1286   }
1287 
1288   // Remember if it is worth folding N when it produces extended register.
1289   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1290 
1291   // Try to match a shifted extend on the RHS.
1292   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1293       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1294     Base = LHS;
1295     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296     return true;
1297   }
1298 
1299   // Try to match a shifted extend on the LHS.
1300   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1301       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1302     Base = RHS;
1303     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1304     return true;
1305   }
1306 
1307   // There was no shift, whatever else we find.
1308   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1309 
1310   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1311   // Try to match an unshifted extend on the LHS.
1312   if (IsExtendedRegisterWorthFolding &&
1313       (Ext = getExtendTypeForNode(LHS, true)) !=
1314           AArch64_AM::InvalidShiftExtend) {
1315     Base = RHS;
1316     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1317     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1318                                            MVT::i32);
1319     if (isWorthFolding(LHS))
1320       return true;
1321   }
1322 
1323   // Try to match an unshifted extend on the RHS.
1324   if (IsExtendedRegisterWorthFolding &&
1325       (Ext = getExtendTypeForNode(RHS, true)) !=
1326           AArch64_AM::InvalidShiftExtend) {
1327     Base = LHS;
1328     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1329     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1330                                            MVT::i32);
1331     if (isWorthFolding(RHS))
1332       return true;
1333   }
1334 
1335   return false;
1336 }
1337 
1338 // Check if the given immediate is preferred by ADD. If an immediate can be
1339 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1340 // encoded by one MOVZ, return true.
1341 static bool isPreferredADD(int64_t ImmOff) {
1342   // Constant in [0x0, 0xfff] can be encoded in ADD.
1343   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1344     return true;
1345   // Check if it can be encoded in an "ADD LSL #12".
1346   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1347     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1348     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1349            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1350   return false;
1351 }
1352 
1353 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1354                                             SDValue &Base, SDValue &Offset,
1355                                             SDValue &SignExtend,
1356                                             SDValue &DoShift) {
1357   if (N.getOpcode() != ISD::ADD)
1358     return false;
1359   SDValue LHS = N.getOperand(0);
1360   SDValue RHS = N.getOperand(1);
1361   SDLoc DL(N);
1362 
1363   // Check if this particular node is reused in any non-memory related
1364   // operation.  If yes, do not try to fold this node into the address
1365   // computation, since the computation will be kept.
1366   const SDNode *Node = N.getNode();
1367   for (SDNode *UI : Node->uses()) {
1368     if (!isa<MemSDNode>(*UI))
1369       return false;
1370   }
1371 
1372   // Watch out if RHS is a wide immediate, it can not be selected into
1373   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1374   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1375   // instructions like:
1376   //     MOV  X0, WideImmediate
1377   //     ADD  X1, BaseReg, X0
1378   //     LDR  X2, [X1, 0]
1379   // For such situation, using [BaseReg, XReg] addressing mode can save one
1380   // ADD/SUB:
1381   //     MOV  X0, WideImmediate
1382   //     LDR  X2, [BaseReg, X0]
1383   if (isa<ConstantSDNode>(RHS)) {
1384     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1385     unsigned Scale = Log2_32(Size);
1386     // Skip the immediate can be selected by load/store addressing mode.
1387     // Also skip the immediate can be encoded by a single ADD (SUB is also
1388     // checked by using -ImmOff).
1389     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1390         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1391       return false;
1392 
1393     SDValue Ops[] = { RHS };
1394     SDNode *MOVI =
1395         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1396     SDValue MOVIV = SDValue(MOVI, 0);
1397     // This ADD of two X register will be selected into [Reg+Reg] mode.
1398     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1399   }
1400 
1401   // Remember if it is worth folding N when it produces extended register.
1402   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1403 
1404   // Try to match a shifted extend on the RHS.
1405   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1406       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1407     Base = LHS;
1408     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1409     return true;
1410   }
1411 
1412   // Try to match a shifted extend on the LHS.
1413   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1414       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1415     Base = RHS;
1416     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1417     return true;
1418   }
1419 
1420   // Match any non-shifted, non-extend, non-immediate add expression.
1421   Base = LHS;
1422   Offset = RHS;
1423   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1424   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1425   // Reg1 + Reg2 is free: no check needed.
1426   return true;
1427 }
1428 
1429 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1430   static const unsigned RegClassIDs[] = {
1431       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1432   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1433                                      AArch64::dsub2, AArch64::dsub3};
1434 
1435   return createTuple(Regs, RegClassIDs, SubRegs);
1436 }
1437 
1438 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1439   static const unsigned RegClassIDs[] = {
1440       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1441   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1442                                      AArch64::qsub2, AArch64::qsub3};
1443 
1444   return createTuple(Regs, RegClassIDs, SubRegs);
1445 }
1446 
1447 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1448   static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1449                                          AArch64::ZPR3RegClassID,
1450                                          AArch64::ZPR4RegClassID};
1451   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1452                                      AArch64::zsub2, AArch64::zsub3};
1453 
1454   return createTuple(Regs, RegClassIDs, SubRegs);
1455 }
1456 
1457 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1458                                          const unsigned RegClassIDs[],
1459                                          const unsigned SubRegs[]) {
1460   // There's no special register-class for a vector-list of 1 element: it's just
1461   // a vector.
1462   if (Regs.size() == 1)
1463     return Regs[0];
1464 
1465   assert(Regs.size() >= 2 && Regs.size() <= 4);
1466 
1467   SDLoc DL(Regs[0]);
1468 
1469   SmallVector<SDValue, 4> Ops;
1470 
1471   // First operand of REG_SEQUENCE is the desired RegClass.
1472   Ops.push_back(
1473       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1474 
1475   // Then we get pairs of source & subregister-position for the components.
1476   for (unsigned i = 0; i < Regs.size(); ++i) {
1477     Ops.push_back(Regs[i]);
1478     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1479   }
1480 
1481   SDNode *N =
1482       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1483   return SDValue(N, 0);
1484 }
1485 
1486 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1487                                       bool isExt) {
1488   SDLoc dl(N);
1489   EVT VT = N->getValueType(0);
1490 
1491   unsigned ExtOff = isExt;
1492 
1493   // Form a REG_SEQUENCE to force register allocation.
1494   unsigned Vec0Off = ExtOff + 1;
1495   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1496                                N->op_begin() + Vec0Off + NumVecs);
1497   SDValue RegSeq = createQTuple(Regs);
1498 
1499   SmallVector<SDValue, 6> Ops;
1500   if (isExt)
1501     Ops.push_back(N->getOperand(1));
1502   Ops.push_back(RegSeq);
1503   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1504   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1505 }
1506 
1507 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1508   LoadSDNode *LD = cast<LoadSDNode>(N);
1509   if (LD->isUnindexed())
1510     return false;
1511   EVT VT = LD->getMemoryVT();
1512   EVT DstVT = N->getValueType(0);
1513   ISD::MemIndexedMode AM = LD->getAddressingMode();
1514   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1515 
1516   // We're not doing validity checking here. That was done when checking
1517   // if we should mark the load as indexed or not. We're just selecting
1518   // the right instruction.
1519   unsigned Opcode = 0;
1520 
1521   ISD::LoadExtType ExtType = LD->getExtensionType();
1522   bool InsertTo64 = false;
1523   if (VT == MVT::i64)
1524     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1525   else if (VT == MVT::i32) {
1526     if (ExtType == ISD::NON_EXTLOAD)
1527       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1528     else if (ExtType == ISD::SEXTLOAD)
1529       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1530     else {
1531       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1532       InsertTo64 = true;
1533       // The result of the load is only i32. It's the subreg_to_reg that makes
1534       // it into an i64.
1535       DstVT = MVT::i32;
1536     }
1537   } else if (VT == MVT::i16) {
1538     if (ExtType == ISD::SEXTLOAD) {
1539       if (DstVT == MVT::i64)
1540         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1541       else
1542         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1543     } else {
1544       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1545       InsertTo64 = DstVT == MVT::i64;
1546       // The result of the load is only i32. It's the subreg_to_reg that makes
1547       // it into an i64.
1548       DstVT = MVT::i32;
1549     }
1550   } else if (VT == MVT::i8) {
1551     if (ExtType == ISD::SEXTLOAD) {
1552       if (DstVT == MVT::i64)
1553         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1554       else
1555         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1556     } else {
1557       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1558       InsertTo64 = DstVT == MVT::i64;
1559       // The result of the load is only i32. It's the subreg_to_reg that makes
1560       // it into an i64.
1561       DstVT = MVT::i32;
1562     }
1563   } else if (VT == MVT::f16) {
1564     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1565   } else if (VT == MVT::bf16) {
1566     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1567   } else if (VT == MVT::f32) {
1568     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1569   } else if (VT == MVT::f64 || VT.is64BitVector()) {
1570     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1571   } else if (VT.is128BitVector()) {
1572     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1573   } else
1574     return false;
1575   SDValue Chain = LD->getChain();
1576   SDValue Base = LD->getBasePtr();
1577   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1578   int OffsetVal = (int)OffsetOp->getZExtValue();
1579   SDLoc dl(N);
1580   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1581   SDValue Ops[] = { Base, Offset, Chain };
1582   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1583                                        MVT::Other, Ops);
1584 
1585   // Transfer memoperands.
1586   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1587   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1588 
1589   // Either way, we're replacing the node, so tell the caller that.
1590   SDValue LoadedVal = SDValue(Res, 1);
1591   if (InsertTo64) {
1592     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1593     LoadedVal =
1594         SDValue(CurDAG->getMachineNode(
1595                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
1596                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1597                     SubReg),
1598                 0);
1599   }
1600 
1601   ReplaceUses(SDValue(N, 0), LoadedVal);
1602   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1603   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1604   CurDAG->RemoveDeadNode(N);
1605   return true;
1606 }
1607 
1608 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1609                                      unsigned SubRegIdx) {
1610   SDLoc dl(N);
1611   EVT VT = N->getValueType(0);
1612   SDValue Chain = N->getOperand(0);
1613 
1614   SDValue Ops[] = {N->getOperand(2), // Mem operand;
1615                    Chain};
1616 
1617   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1618 
1619   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1620   SDValue SuperReg = SDValue(Ld, 0);
1621   for (unsigned i = 0; i < NumVecs; ++i)
1622     ReplaceUses(SDValue(N, i),
1623         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1624 
1625   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1626 
1627   // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1628   // because it's too simple to have needed special treatment during lowering.
1629   if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1630     MachineMemOperand *MemOp = MemIntr->getMemOperand();
1631     CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1632   }
1633 
1634   CurDAG->RemoveDeadNode(N);
1635 }
1636 
1637 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1638                                          unsigned Opc, unsigned SubRegIdx) {
1639   SDLoc dl(N);
1640   EVT VT = N->getValueType(0);
1641   SDValue Chain = N->getOperand(0);
1642 
1643   SDValue Ops[] = {N->getOperand(1), // Mem operand
1644                    N->getOperand(2), // Incremental
1645                    Chain};
1646 
1647   const EVT ResTys[] = {MVT::i64, // Type of the write back register
1648                         MVT::Untyped, MVT::Other};
1649 
1650   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1651 
1652   // Update uses of write back register
1653   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1654 
1655   // Update uses of vector list
1656   SDValue SuperReg = SDValue(Ld, 1);
1657   if (NumVecs == 1)
1658     ReplaceUses(SDValue(N, 0), SuperReg);
1659   else
1660     for (unsigned i = 0; i < NumVecs; ++i)
1661       ReplaceUses(SDValue(N, i),
1662           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1663 
1664   // Update the chain
1665   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1666   CurDAG->RemoveDeadNode(N);
1667 }
1668 
1669 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1670 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1671 /// new Base and an SDValue representing the new offset.
1672 std::tuple<unsigned, SDValue, SDValue>
1673 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1674                                               unsigned Opc_ri,
1675                                               const SDValue &OldBase,
1676                                               const SDValue &OldOffset,
1677                                               unsigned Scale) {
1678   SDValue NewBase = OldBase;
1679   SDValue NewOffset = OldOffset;
1680   // Detect a possible Reg+Imm addressing mode.
1681   const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1682       N, OldBase, NewBase, NewOffset);
1683 
1684   // Detect a possible reg+reg addressing mode, but only if we haven't already
1685   // detected a Reg+Imm one.
1686   const bool IsRegReg =
1687       !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1688 
1689   // Select the instruction.
1690   return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1691 }
1692 
1693 enum class SelectTypeKind {
1694   Int1 = 0,
1695 };
1696 
1697 /// This function selects an opcode from a list of opcodes, which is
1698 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1699 /// element types, in this order.
1700 template <SelectTypeKind Kind>
1701 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1702   // Only match scalable vector VTs
1703   if (!VT.isScalableVector())
1704     return 0;
1705 
1706   EVT EltVT = VT.getVectorElementType();
1707   switch (Kind) {
1708   case SelectTypeKind::Int1:
1709     if (EltVT != MVT::i1)
1710       return 0;
1711     break;
1712   }
1713 
1714   unsigned Offset;
1715   switch (VT.getVectorMinNumElements()) {
1716   case 16: // 8-bit
1717     Offset = 0;
1718     break;
1719   case 8: // 16-bit
1720     Offset = 1;
1721     break;
1722   case 4: // 32-bit
1723     Offset = 2;
1724     break;
1725   case 2: // 64-bit
1726     Offset = 3;
1727     break;
1728   default:
1729     return 0;
1730   }
1731 
1732   return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1733 }
1734 
1735 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1736   SDLoc DL(N);
1737   EVT VT = N->getValueType(0);
1738 
1739   SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1740 
1741   SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1742   SDValue SuperReg = SDValue(WhilePair, 0);
1743 
1744   for (unsigned I = 0; I < 2; ++I)
1745     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1746                                    AArch64::psub0 + I, DL, VT, SuperReg));
1747 
1748   CurDAG->RemoveDeadNode(N);
1749 }
1750 
1751 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1752                                              unsigned Opcode) {
1753   EVT VT = N->getValueType(0);
1754   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1755   SDValue Ops = createZTuple(Regs);
1756   SDLoc DL(N);
1757   SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1758   SDValue SuperReg = SDValue(Intrinsic, 0);
1759   for (unsigned i = 0; i < NumVecs; ++i)
1760     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1761                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1762 
1763   CurDAG->RemoveDeadNode(N);
1764   return;
1765 }
1766 
1767 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1768                                                unsigned Scale, unsigned Opc_ri,
1769                                                unsigned Opc_rr, bool IsIntr) {
1770   assert(Scale < 4 && "Invalid scaling value.");
1771   SDLoc DL(N);
1772   EVT VT = N->getValueType(0);
1773   SDValue Chain = N->getOperand(0);
1774 
1775   // Optimize addressing mode.
1776   SDValue Base, Offset;
1777   unsigned Opc;
1778   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1779       N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1780       CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1781 
1782   SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1783                    Base,                          // Memory operand
1784                    Offset, Chain};
1785 
1786   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1787 
1788   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1789   SDValue SuperReg = SDValue(Load, 0);
1790   for (unsigned i = 0; i < NumVecs; ++i)
1791     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1792                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1793 
1794   // Copy chain
1795   unsigned ChainIdx = NumVecs;
1796   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1797   CurDAG->RemoveDeadNode(N);
1798 }
1799 
1800 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1801                                       unsigned Opc) {
1802   SDLoc dl(N);
1803   EVT VT = N->getOperand(2)->getValueType(0);
1804 
1805   // Form a REG_SEQUENCE to force register allocation.
1806   bool Is128Bit = VT.getSizeInBits() == 128;
1807   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1808   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1809 
1810   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1811   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1812 
1813   // Transfer memoperands.
1814   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1815   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1816 
1817   ReplaceNode(N, St);
1818 }
1819 
1820 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1821                                                 unsigned Scale, unsigned Opc_rr,
1822                                                 unsigned Opc_ri) {
1823   SDLoc dl(N);
1824 
1825   // Form a REG_SEQUENCE to force register allocation.
1826   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1827   SDValue RegSeq = createZTuple(Regs);
1828 
1829   // Optimize addressing mode.
1830   unsigned Opc;
1831   SDValue Offset, Base;
1832   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1833       N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1834       CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1835 
1836   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1837                    Base,                               // address
1838                    Offset,                             // offset
1839                    N->getOperand(0)};                  // chain
1840   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1841 
1842   ReplaceNode(N, St);
1843 }
1844 
1845 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1846                                                       SDValue &OffImm) {
1847   SDLoc dl(N);
1848   const DataLayout &DL = CurDAG->getDataLayout();
1849   const TargetLowering *TLI = getTargetLowering();
1850 
1851   // Try to match it for the frame address
1852   if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1853     int FI = FINode->getIndex();
1854     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1855     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1856     return true;
1857   }
1858 
1859   return false;
1860 }
1861 
1862 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1863                                           unsigned Opc) {
1864   SDLoc dl(N);
1865   EVT VT = N->getOperand(2)->getValueType(0);
1866   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
1867                         MVT::Other}; // Type for the Chain
1868 
1869   // Form a REG_SEQUENCE to force register allocation.
1870   bool Is128Bit = VT.getSizeInBits() == 128;
1871   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1872   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1873 
1874   SDValue Ops[] = {RegSeq,
1875                    N->getOperand(NumVecs + 1), // base register
1876                    N->getOperand(NumVecs + 2), // Incremental
1877                    N->getOperand(0)};          // Chain
1878   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1879 
1880   ReplaceNode(N, St);
1881 }
1882 
1883 namespace {
1884 /// WidenVector - Given a value in the V64 register class, produce the
1885 /// equivalent value in the V128 register class.
1886 class WidenVector {
1887   SelectionDAG &DAG;
1888 
1889 public:
1890   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1891 
1892   SDValue operator()(SDValue V64Reg) {
1893     EVT VT = V64Reg.getValueType();
1894     unsigned NarrowSize = VT.getVectorNumElements();
1895     MVT EltTy = VT.getVectorElementType().getSimpleVT();
1896     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1897     SDLoc DL(V64Reg);
1898 
1899     SDValue Undef =
1900         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1901     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1902   }
1903 };
1904 } // namespace
1905 
1906 /// NarrowVector - Given a value in the V128 register class, produce the
1907 /// equivalent value in the V64 register class.
1908 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1909   EVT VT = V128Reg.getValueType();
1910   unsigned WideSize = VT.getVectorNumElements();
1911   MVT EltTy = VT.getVectorElementType().getSimpleVT();
1912   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1913 
1914   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1915                                     V128Reg);
1916 }
1917 
1918 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1919                                          unsigned Opc) {
1920   SDLoc dl(N);
1921   EVT VT = N->getValueType(0);
1922   bool Narrow = VT.getSizeInBits() == 64;
1923 
1924   // Form a REG_SEQUENCE to force register allocation.
1925   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1926 
1927   if (Narrow)
1928     transform(Regs, Regs.begin(),
1929                    WidenVector(*CurDAG));
1930 
1931   SDValue RegSeq = createQTuple(Regs);
1932 
1933   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1934 
1935   unsigned LaneNo =
1936       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1937 
1938   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1939                    N->getOperand(NumVecs + 3), N->getOperand(0)};
1940   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1941   SDValue SuperReg = SDValue(Ld, 0);
1942 
1943   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1944   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1945                                     AArch64::qsub2, AArch64::qsub3 };
1946   for (unsigned i = 0; i < NumVecs; ++i) {
1947     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1948     if (Narrow)
1949       NV = NarrowVector(NV, *CurDAG);
1950     ReplaceUses(SDValue(N, i), NV);
1951   }
1952 
1953   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1954   CurDAG->RemoveDeadNode(N);
1955 }
1956 
1957 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1958                                              unsigned Opc) {
1959   SDLoc dl(N);
1960   EVT VT = N->getValueType(0);
1961   bool Narrow = VT.getSizeInBits() == 64;
1962 
1963   // Form a REG_SEQUENCE to force register allocation.
1964   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1965 
1966   if (Narrow)
1967     transform(Regs, Regs.begin(),
1968                    WidenVector(*CurDAG));
1969 
1970   SDValue RegSeq = createQTuple(Regs);
1971 
1972   const EVT ResTys[] = {MVT::i64, // Type of the write back register
1973                         RegSeq->getValueType(0), MVT::Other};
1974 
1975   unsigned LaneNo =
1976       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1977 
1978   SDValue Ops[] = {RegSeq,
1979                    CurDAG->getTargetConstant(LaneNo, dl,
1980                                              MVT::i64),         // Lane Number
1981                    N->getOperand(NumVecs + 2),                  // Base register
1982                    N->getOperand(NumVecs + 3),                  // Incremental
1983                    N->getOperand(0)};
1984   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1985 
1986   // Update uses of the write back register
1987   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1988 
1989   // Update uses of the vector list
1990   SDValue SuperReg = SDValue(Ld, 1);
1991   if (NumVecs == 1) {
1992     ReplaceUses(SDValue(N, 0),
1993                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1994   } else {
1995     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1996     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1997                                       AArch64::qsub2, AArch64::qsub3 };
1998     for (unsigned i = 0; i < NumVecs; ++i) {
1999       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2000                                                   SuperReg);
2001       if (Narrow)
2002         NV = NarrowVector(NV, *CurDAG);
2003       ReplaceUses(SDValue(N, i), NV);
2004     }
2005   }
2006 
2007   // Update the Chain
2008   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2009   CurDAG->RemoveDeadNode(N);
2010 }
2011 
2012 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2013                                           unsigned Opc) {
2014   SDLoc dl(N);
2015   EVT VT = N->getOperand(2)->getValueType(0);
2016   bool Narrow = VT.getSizeInBits() == 64;
2017 
2018   // Form a REG_SEQUENCE to force register allocation.
2019   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2020 
2021   if (Narrow)
2022     transform(Regs, Regs.begin(),
2023                    WidenVector(*CurDAG));
2024 
2025   SDValue RegSeq = createQTuple(Regs);
2026 
2027   unsigned LaneNo =
2028       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2029 
2030   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2031                    N->getOperand(NumVecs + 3), N->getOperand(0)};
2032   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2033 
2034   // Transfer memoperands.
2035   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2036   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2037 
2038   ReplaceNode(N, St);
2039 }
2040 
2041 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2042                                               unsigned Opc) {
2043   SDLoc dl(N);
2044   EVT VT = N->getOperand(2)->getValueType(0);
2045   bool Narrow = VT.getSizeInBits() == 64;
2046 
2047   // Form a REG_SEQUENCE to force register allocation.
2048   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2049 
2050   if (Narrow)
2051     transform(Regs, Regs.begin(),
2052                    WidenVector(*CurDAG));
2053 
2054   SDValue RegSeq = createQTuple(Regs);
2055 
2056   const EVT ResTys[] = {MVT::i64, // Type of the write back register
2057                         MVT::Other};
2058 
2059   unsigned LaneNo =
2060       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2061 
2062   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2063                    N->getOperand(NumVecs + 2), // Base Register
2064                    N->getOperand(NumVecs + 3), // Incremental
2065                    N->getOperand(0)};
2066   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2067 
2068   // Transfer memoperands.
2069   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2070   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2071 
2072   ReplaceNode(N, St);
2073 }
2074 
2075 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2076                                        unsigned &Opc, SDValue &Opd0,
2077                                        unsigned &LSB, unsigned &MSB,
2078                                        unsigned NumberOfIgnoredLowBits,
2079                                        bool BiggerPattern) {
2080   assert(N->getOpcode() == ISD::AND &&
2081          "N must be a AND operation to call this function");
2082 
2083   EVT VT = N->getValueType(0);
2084 
2085   // Here we can test the type of VT and return false when the type does not
2086   // match, but since it is done prior to that call in the current context
2087   // we turned that into an assert to avoid redundant code.
2088   assert((VT == MVT::i32 || VT == MVT::i64) &&
2089          "Type checking must have been done before calling this function");
2090 
2091   // FIXME: simplify-demanded-bits in DAGCombine will probably have
2092   // changed the AND node to a 32-bit mask operation. We'll have to
2093   // undo that as part of the transform here if we want to catch all
2094   // the opportunities.
2095   // Currently the NumberOfIgnoredLowBits argument helps to recover
2096   // from these situations when matching bigger pattern (bitfield insert).
2097 
2098   // For unsigned extracts, check for a shift right and mask
2099   uint64_t AndImm = 0;
2100   if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2101     return false;
2102 
2103   const SDNode *Op0 = N->getOperand(0).getNode();
2104 
2105   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2106   // simplified. Try to undo that
2107   AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2108 
2109   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2110   if (AndImm & (AndImm + 1))
2111     return false;
2112 
2113   bool ClampMSB = false;
2114   uint64_t SrlImm = 0;
2115   // Handle the SRL + ANY_EXTEND case.
2116   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2117       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2118     // Extend the incoming operand of the SRL to 64-bit.
2119     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2120     // Make sure to clamp the MSB so that we preserve the semantics of the
2121     // original operations.
2122     ClampMSB = true;
2123   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2124              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2125                                    SrlImm)) {
2126     // If the shift result was truncated, we can still combine them.
2127     Opd0 = Op0->getOperand(0).getOperand(0);
2128 
2129     // Use the type of SRL node.
2130     VT = Opd0->getValueType(0);
2131   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2132     Opd0 = Op0->getOperand(0);
2133     ClampMSB = (VT == MVT::i32);
2134   } else if (BiggerPattern) {
2135     // Let's pretend a 0 shift right has been performed.
2136     // The resulting code will be at least as good as the original one
2137     // plus it may expose more opportunities for bitfield insert pattern.
2138     // FIXME: Currently we limit this to the bigger pattern, because
2139     // some optimizations expect AND and not UBFM.
2140     Opd0 = N->getOperand(0);
2141   } else
2142     return false;
2143 
2144   // Bail out on large immediates. This happens when no proper
2145   // combining/constant folding was performed.
2146   if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2147     LLVM_DEBUG(
2148         (dbgs() << N
2149                 << ": Found large shift immediate, this should not happen\n"));
2150     return false;
2151   }
2152 
2153   LSB = SrlImm;
2154   MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
2155                                  : countTrailingOnes<uint64_t>(AndImm)) -
2156         1;
2157   if (ClampMSB)
2158     // Since we're moving the extend before the right shift operation, we need
2159     // to clamp the MSB to make sure we don't shift in undefined bits instead of
2160     // the zeros which would get shifted in with the original right shift
2161     // operation.
2162     MSB = MSB > 31 ? 31 : MSB;
2163 
2164   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2165   return true;
2166 }
2167 
2168 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2169                                              SDValue &Opd0, unsigned &Immr,
2170                                              unsigned &Imms) {
2171   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2172 
2173   EVT VT = N->getValueType(0);
2174   unsigned BitWidth = VT.getSizeInBits();
2175   assert((VT == MVT::i32 || VT == MVT::i64) &&
2176          "Type checking must have been done before calling this function");
2177 
2178   SDValue Op = N->getOperand(0);
2179   if (Op->getOpcode() == ISD::TRUNCATE) {
2180     Op = Op->getOperand(0);
2181     VT = Op->getValueType(0);
2182     BitWidth = VT.getSizeInBits();
2183   }
2184 
2185   uint64_t ShiftImm;
2186   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2187       !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2188     return false;
2189 
2190   unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2191   if (ShiftImm + Width > BitWidth)
2192     return false;
2193 
2194   Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2195   Opd0 = Op.getOperand(0);
2196   Immr = ShiftImm;
2197   Imms = ShiftImm + Width - 1;
2198   return true;
2199 }
2200 
2201 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2202                                           SDValue &Opd0, unsigned &LSB,
2203                                           unsigned &MSB) {
2204   // We are looking for the following pattern which basically extracts several
2205   // continuous bits from the source value and places it from the LSB of the
2206   // destination value, all other bits of the destination value or set to zero:
2207   //
2208   // Value2 = AND Value, MaskImm
2209   // SRL Value2, ShiftImm
2210   //
2211   // with MaskImm >> ShiftImm to search for the bit width.
2212   //
2213   // This gets selected into a single UBFM:
2214   //
2215   // UBFM Value, ShiftImm, findLastSet(MaskImm)
2216   //
2217 
2218   if (N->getOpcode() != ISD::SRL)
2219     return false;
2220 
2221   uint64_t AndMask = 0;
2222   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2223     return false;
2224 
2225   Opd0 = N->getOperand(0).getOperand(0);
2226 
2227   uint64_t SrlImm = 0;
2228   if (!isIntImmediate(N->getOperand(1), SrlImm))
2229     return false;
2230 
2231   // Check whether we really have several bits extract here.
2232   if (!isMask_64(AndMask >> SrlImm))
2233     return false;
2234 
2235   Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2236   LSB = SrlImm;
2237   MSB = findLastSet(AndMask, ZB_Undefined);
2238   return true;
2239 }
2240 
2241 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2242                                        unsigned &Immr, unsigned &Imms,
2243                                        bool BiggerPattern) {
2244   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2245          "N must be a SHR/SRA operation to call this function");
2246 
2247   EVT VT = N->getValueType(0);
2248 
2249   // Here we can test the type of VT and return false when the type does not
2250   // match, but since it is done prior to that call in the current context
2251   // we turned that into an assert to avoid redundant code.
2252   assert((VT == MVT::i32 || VT == MVT::i64) &&
2253          "Type checking must have been done before calling this function");
2254 
2255   // Check for AND + SRL doing several bits extract.
2256   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2257     return true;
2258 
2259   // We're looking for a shift of a shift.
2260   uint64_t ShlImm = 0;
2261   uint64_t TruncBits = 0;
2262   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2263     Opd0 = N->getOperand(0).getOperand(0);
2264   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2265              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2266     // We are looking for a shift of truncate. Truncate from i64 to i32 could
2267     // be considered as setting high 32 bits as zero. Our strategy here is to
2268     // always generate 64bit UBFM. This consistency will help the CSE pass
2269     // later find more redundancy.
2270     Opd0 = N->getOperand(0).getOperand(0);
2271     TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2272     VT = Opd0.getValueType();
2273     assert(VT == MVT::i64 && "the promoted type should be i64");
2274   } else if (BiggerPattern) {
2275     // Let's pretend a 0 shift left has been performed.
2276     // FIXME: Currently we limit this to the bigger pattern case,
2277     // because some optimizations expect AND and not UBFM
2278     Opd0 = N->getOperand(0);
2279   } else
2280     return false;
2281 
2282   // Missing combines/constant folding may have left us with strange
2283   // constants.
2284   if (ShlImm >= VT.getSizeInBits()) {
2285     LLVM_DEBUG(
2286         (dbgs() << N
2287                 << ": Found large shift immediate, this should not happen\n"));
2288     return false;
2289   }
2290 
2291   uint64_t SrlImm = 0;
2292   if (!isIntImmediate(N->getOperand(1), SrlImm))
2293     return false;
2294 
2295   assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2296          "bad amount in shift node!");
2297   int immr = SrlImm - ShlImm;
2298   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2299   Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2300   // SRA requires a signed extraction
2301   if (VT == MVT::i32)
2302     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2303   else
2304     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2305   return true;
2306 }
2307 
2308 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2309   assert(N->getOpcode() == ISD::SIGN_EXTEND);
2310 
2311   EVT VT = N->getValueType(0);
2312   EVT NarrowVT = N->getOperand(0)->getValueType(0);
2313   if (VT != MVT::i64 || NarrowVT != MVT::i32)
2314     return false;
2315 
2316   uint64_t ShiftImm;
2317   SDValue Op = N->getOperand(0);
2318   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2319     return false;
2320 
2321   SDLoc dl(N);
2322   // Extend the incoming operand of the shift to 64-bits.
2323   SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2324   unsigned Immr = ShiftImm;
2325   unsigned Imms = NarrowVT.getSizeInBits() - 1;
2326   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2327                    CurDAG->getTargetConstant(Imms, dl, VT)};
2328   CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2329   return true;
2330 }
2331 
2332 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2333 /// extract of a subvector.
2334 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2335   assert(N->getOpcode() == ISD::FP_EXTEND);
2336 
2337   // There are 2 forms of fcvtl2 - extend to double or extend to float.
2338   SDValue Extract = N->getOperand(0);
2339   EVT VT = N->getValueType(0);
2340   EVT NarrowVT = Extract.getValueType();
2341   if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2342       (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2343     return false;
2344 
2345   // Optionally look past a bitcast.
2346   Extract = peekThroughBitcasts(Extract);
2347   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2348     return false;
2349 
2350   // Match extract from start of high half index.
2351   // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2352   unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2353   if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2354     return false;
2355 
2356   auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2357   CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2358   return true;
2359 }
2360 
2361 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2362                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2363                                 unsigned NumberOfIgnoredLowBits = 0,
2364                                 bool BiggerPattern = false) {
2365   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2366     return false;
2367 
2368   switch (N->getOpcode()) {
2369   default:
2370     if (!N->isMachineOpcode())
2371       return false;
2372     break;
2373   case ISD::AND:
2374     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2375                                       NumberOfIgnoredLowBits, BiggerPattern);
2376   case ISD::SRL:
2377   case ISD::SRA:
2378     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2379 
2380   case ISD::SIGN_EXTEND_INREG:
2381     return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2382   }
2383 
2384   unsigned NOpc = N->getMachineOpcode();
2385   switch (NOpc) {
2386   default:
2387     return false;
2388   case AArch64::SBFMWri:
2389   case AArch64::UBFMWri:
2390   case AArch64::SBFMXri:
2391   case AArch64::UBFMXri:
2392     Opc = NOpc;
2393     Opd0 = N->getOperand(0);
2394     Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2395     Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2396     return true;
2397   }
2398   // Unreachable
2399   return false;
2400 }
2401 
2402 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2403   unsigned Opc, Immr, Imms;
2404   SDValue Opd0;
2405   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2406     return false;
2407 
2408   EVT VT = N->getValueType(0);
2409   SDLoc dl(N);
2410 
2411   // If the bit extract operation is 64bit but the original type is 32bit, we
2412   // need to add one EXTRACT_SUBREG.
2413   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2414     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2415                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2416 
2417     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2418     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
2419     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2420                                           MVT::i32, SDValue(BFM, 0), SubReg));
2421     return true;
2422   }
2423 
2424   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2425                    CurDAG->getTargetConstant(Imms, dl, VT)};
2426   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2427   return true;
2428 }
2429 
2430 /// Does DstMask form a complementary pair with the mask provided by
2431 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2432 /// this asks whether DstMask zeroes precisely those bits that will be set by
2433 /// the other half.
2434 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2435                               unsigned NumberOfIgnoredHighBits, EVT VT) {
2436   assert((VT == MVT::i32 || VT == MVT::i64) &&
2437          "i32 or i64 mask type expected!");
2438   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2439 
2440   APInt SignificantDstMask = APInt(BitWidth, DstMask);
2441   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2442 
2443   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2444          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2445 }
2446 
2447 // Look for bits that will be useful for later uses.
2448 // A bit is consider useless as soon as it is dropped and never used
2449 // before it as been dropped.
2450 // E.g., looking for useful bit of x
2451 // 1. y = x & 0x7
2452 // 2. z = y >> 2
2453 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2454 // y.
2455 // After #2, the useful bits of x are 0x4.
2456 // However, if x is used on an unpredicatable instruction, then all its bits
2457 // are useful.
2458 // E.g.
2459 // 1. y = x & 0x7
2460 // 2. z = y >> 2
2461 // 3. str x, [@x]
2462 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2463 
2464 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2465                                               unsigned Depth) {
2466   uint64_t Imm =
2467       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2468   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2469   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2470   getUsefulBits(Op, UsefulBits, Depth + 1);
2471 }
2472 
2473 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2474                                              uint64_t Imm, uint64_t MSB,
2475                                              unsigned Depth) {
2476   // inherit the bitwidth value
2477   APInt OpUsefulBits(UsefulBits);
2478   OpUsefulBits = 1;
2479 
2480   if (MSB >= Imm) {
2481     OpUsefulBits <<= MSB - Imm + 1;
2482     --OpUsefulBits;
2483     // The interesting part will be in the lower part of the result
2484     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2485     // The interesting part was starting at Imm in the argument
2486     OpUsefulBits <<= Imm;
2487   } else {
2488     OpUsefulBits <<= MSB + 1;
2489     --OpUsefulBits;
2490     // The interesting part will be shifted in the result
2491     OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2492     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2493     // The interesting part was at zero in the argument
2494     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2495   }
2496 
2497   UsefulBits &= OpUsefulBits;
2498 }
2499 
2500 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2501                                   unsigned Depth) {
2502   uint64_t Imm =
2503       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2504   uint64_t MSB =
2505       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2506 
2507   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2508 }
2509 
2510 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2511                                               unsigned Depth) {
2512   uint64_t ShiftTypeAndValue =
2513       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2514   APInt Mask(UsefulBits);
2515   Mask.clearAllBits();
2516   Mask.flipAllBits();
2517 
2518   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2519     // Shift Left
2520     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2521     Mask <<= ShiftAmt;
2522     getUsefulBits(Op, Mask, Depth + 1);
2523     Mask.lshrInPlace(ShiftAmt);
2524   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2525     // Shift Right
2526     // We do not handle AArch64_AM::ASR, because the sign will change the
2527     // number of useful bits
2528     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2529     Mask.lshrInPlace(ShiftAmt);
2530     getUsefulBits(Op, Mask, Depth + 1);
2531     Mask <<= ShiftAmt;
2532   } else
2533     return;
2534 
2535   UsefulBits &= Mask;
2536 }
2537 
2538 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2539                                  unsigned Depth) {
2540   uint64_t Imm =
2541       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2542   uint64_t MSB =
2543       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2544 
2545   APInt OpUsefulBits(UsefulBits);
2546   OpUsefulBits = 1;
2547 
2548   APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2549   ResultUsefulBits.flipAllBits();
2550   APInt Mask(UsefulBits.getBitWidth(), 0);
2551 
2552   getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2553 
2554   if (MSB >= Imm) {
2555     // The instruction is a BFXIL.
2556     uint64_t Width = MSB - Imm + 1;
2557     uint64_t LSB = Imm;
2558 
2559     OpUsefulBits <<= Width;
2560     --OpUsefulBits;
2561 
2562     if (Op.getOperand(1) == Orig) {
2563       // Copy the low bits from the result to bits starting from LSB.
2564       Mask = ResultUsefulBits & OpUsefulBits;
2565       Mask <<= LSB;
2566     }
2567 
2568     if (Op.getOperand(0) == Orig)
2569       // Bits starting from LSB in the input contribute to the result.
2570       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2571   } else {
2572     // The instruction is a BFI.
2573     uint64_t Width = MSB + 1;
2574     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2575 
2576     OpUsefulBits <<= Width;
2577     --OpUsefulBits;
2578     OpUsefulBits <<= LSB;
2579 
2580     if (Op.getOperand(1) == Orig) {
2581       // Copy the bits from the result to the zero bits.
2582       Mask = ResultUsefulBits & OpUsefulBits;
2583       Mask.lshrInPlace(LSB);
2584     }
2585 
2586     if (Op.getOperand(0) == Orig)
2587       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2588   }
2589 
2590   UsefulBits &= Mask;
2591 }
2592 
2593 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2594                                 SDValue Orig, unsigned Depth) {
2595 
2596   // Users of this node should have already been instruction selected
2597   // FIXME: Can we turn that into an assert?
2598   if (!UserNode->isMachineOpcode())
2599     return;
2600 
2601   switch (UserNode->getMachineOpcode()) {
2602   default:
2603     return;
2604   case AArch64::ANDSWri:
2605   case AArch64::ANDSXri:
2606   case AArch64::ANDWri:
2607   case AArch64::ANDXri:
2608     // We increment Depth only when we call the getUsefulBits
2609     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2610                                              Depth);
2611   case AArch64::UBFMWri:
2612   case AArch64::UBFMXri:
2613     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2614 
2615   case AArch64::ORRWrs:
2616   case AArch64::ORRXrs:
2617     if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2618       getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2619                                         Depth);
2620     return;
2621   case AArch64::BFMWri:
2622   case AArch64::BFMXri:
2623     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2624 
2625   case AArch64::STRBBui:
2626   case AArch64::STURBBi:
2627     if (UserNode->getOperand(0) != Orig)
2628       return;
2629     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2630     return;
2631 
2632   case AArch64::STRHHui:
2633   case AArch64::STURHHi:
2634     if (UserNode->getOperand(0) != Orig)
2635       return;
2636     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2637     return;
2638   }
2639 }
2640 
2641 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2642   if (Depth >= SelectionDAG::MaxRecursionDepth)
2643     return;
2644   // Initialize UsefulBits
2645   if (!Depth) {
2646     unsigned Bitwidth = Op.getScalarValueSizeInBits();
2647     // At the beginning, assume every produced bits is useful
2648     UsefulBits = APInt(Bitwidth, 0);
2649     UsefulBits.flipAllBits();
2650   }
2651   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2652 
2653   for (SDNode *Node : Op.getNode()->uses()) {
2654     // A use cannot produce useful bits
2655     APInt UsefulBitsForUse = APInt(UsefulBits);
2656     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2657     UsersUsefulBits |= UsefulBitsForUse;
2658   }
2659   // UsefulBits contains the produced bits that are meaningful for the
2660   // current definition, thus a user cannot make a bit meaningful at
2661   // this point
2662   UsefulBits &= UsersUsefulBits;
2663 }
2664 
2665 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2666 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2667 /// 0, return Op unchanged.
2668 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2669   if (ShlAmount == 0)
2670     return Op;
2671 
2672   EVT VT = Op.getValueType();
2673   SDLoc dl(Op);
2674   unsigned BitWidth = VT.getSizeInBits();
2675   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2676 
2677   SDNode *ShiftNode;
2678   if (ShlAmount > 0) {
2679     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2680     ShiftNode = CurDAG->getMachineNode(
2681         UBFMOpc, dl, VT, Op,
2682         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2683         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2684   } else {
2685     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2686     assert(ShlAmount < 0 && "expected right shift");
2687     int ShrAmount = -ShlAmount;
2688     ShiftNode = CurDAG->getMachineNode(
2689         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2690         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2691   }
2692 
2693   return SDValue(ShiftNode, 0);
2694 }
2695 
2696 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2697 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2698                                            bool BiggerPattern,
2699                                            const uint64_t NonZeroBits,
2700                                            SDValue &Src, int &DstLSB,
2701                                            int &Width);
2702 
2703 // For bit-field-positioning pattern "shl VAL, N)".
2704 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2705                                            bool BiggerPattern,
2706                                            const uint64_t NonZeroBits,
2707                                            SDValue &Src, int &DstLSB,
2708                                            int &Width);
2709 
2710 /// Does this tree qualify as an attempt to move a bitfield into position,
2711 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
2712 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2713                                     bool BiggerPattern, SDValue &Src,
2714                                     int &DstLSB, int &Width) {
2715   EVT VT = Op.getValueType();
2716   unsigned BitWidth = VT.getSizeInBits();
2717   (void)BitWidth;
2718   assert(BitWidth == 32 || BitWidth == 64);
2719 
2720   KnownBits Known = CurDAG->computeKnownBits(Op);
2721 
2722   // Non-zero in the sense that they're not provably zero, which is the key
2723   // point if we want to use this value
2724   const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2725   if (!isShiftedMask_64(NonZeroBits))
2726     return false;
2727 
2728   switch (Op.getOpcode()) {
2729   default:
2730     break;
2731   case ISD::AND:
2732     return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2733                                           NonZeroBits, Src, DstLSB, Width);
2734   case ISD::SHL:
2735     return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2736                                           NonZeroBits, Src, DstLSB, Width);
2737   }
2738 
2739   return false;
2740 }
2741 
2742 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2743                                            bool BiggerPattern,
2744                                            const uint64_t NonZeroBits,
2745                                            SDValue &Src, int &DstLSB,
2746                                            int &Width) {
2747   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2748 
2749   EVT VT = Op.getValueType();
2750   assert((VT == MVT::i32 || VT == MVT::i64) &&
2751          "Caller guarantees VT is one of i32 or i64");
2752   (void)VT;
2753 
2754   uint64_t AndImm;
2755   if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2756     return false;
2757 
2758   // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2759   //   1) (AndImm & (1 << POS) == 0)
2760   //   2) the result of AND is not zero at POS bit (according to NonZeroBits)
2761   //
2762   // 1) and 2) don't agree so something must be wrong (e.g., in
2763   // 'SelectionDAG::computeKnownBits')
2764   assert((~AndImm & NonZeroBits) == 0 &&
2765          "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2766 
2767   SDValue AndOp0 = Op.getOperand(0);
2768 
2769   uint64_t ShlImm;
2770   SDValue ShlOp0;
2771   if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2772     // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2773     ShlOp0 = AndOp0.getOperand(0);
2774   } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2775              isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
2776                                    ShlImm)) {
2777     // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2778 
2779     // ShlVal == shl(val, N), which is a left shift on a smaller type.
2780     SDValue ShlVal = AndOp0.getOperand(0);
2781 
2782     // Since this is after type legalization and ShlVal is extended to MVT::i64,
2783     // expect VT to be MVT::i32.
2784     assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2785 
2786     // Widens 'val' to MVT::i64 as the source of bit field positioning.
2787     ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2788   } else
2789     return false;
2790 
2791   // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2792   // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2793   // AndOp0+AND.
2794   if (!BiggerPattern && !AndOp0.hasOneUse())
2795     return false;
2796 
2797   DstLSB = countTrailingZeros(NonZeroBits);
2798   Width = countTrailingOnes(NonZeroBits >> DstLSB);
2799 
2800   // Bail out on large Width. This happens when no proper combining / constant
2801   // folding was performed.
2802   if (Width >= (int)VT.getSizeInBits()) {
2803     // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2804     // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2805     // "val".
2806     // If VT is i32, what Width >= 32 means:
2807     // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
2808     //   demands at least 'Width' bits (after dag-combiner). This together with
2809     //   `any_extend` Op (undefined higher bits) indicates missed combination
2810     //   when lowering the 'and' IR instruction to an machine IR instruction.
2811     LLVM_DEBUG(
2812         dbgs()
2813         << "Found large Width in bit-field-positioning -- this indicates no "
2814            "proper combining / constant folding was performed\n");
2815     return false;
2816   }
2817 
2818   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2819   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2820   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
2821   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2822   // which case it is not profitable to insert an extra shift.
2823   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
2824     return false;
2825 
2826   Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
2827   return true;
2828 }
2829 
2830 // For node (shl (and val, mask), N)), returns true if the node is equivalent to
2831 // UBFIZ.
2832 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
2833                                               SDValue &Src, int &DstLSB,
2834                                               int &Width) {
2835   // Caller should have verified that N is a left shift with constant shift
2836   // amount; asserts that.
2837   assert(Op.getOpcode() == ISD::SHL &&
2838          "Op.getNode() should be a SHL node to call this function");
2839   assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
2840          "Op.getNode() should shift ShlImm to call this function");
2841 
2842   uint64_t AndImm = 0;
2843   SDValue Op0 = Op.getOperand(0);
2844   if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
2845     return false;
2846 
2847   const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
2848   if (isMask_64(ShiftedAndImm)) {
2849     // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
2850     // should end with Mask, and could be prefixed with random bits if those
2851     // bits are shifted out.
2852     //
2853     // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
2854     // the AND result corresponding to those bits are shifted out, so it's fine
2855     // to not extract them.
2856     Width = countTrailingOnes(ShiftedAndImm);
2857     DstLSB = ShlImm;
2858     Src = Op0.getOperand(0);
2859     return true;
2860   }
2861   return false;
2862 }
2863 
2864 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2865                                            bool BiggerPattern,
2866                                            const uint64_t NonZeroBits,
2867                                            SDValue &Src, int &DstLSB,
2868                                            int &Width) {
2869   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2870 
2871   EVT VT = Op.getValueType();
2872   assert((VT == MVT::i32 || VT == MVT::i64) &&
2873          "Caller guarantees that type is i32 or i64");
2874   (void)VT;
2875 
2876   uint64_t ShlImm;
2877   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2878     return false;
2879 
2880   if (!BiggerPattern && !Op.hasOneUse())
2881     return false;
2882 
2883   if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
2884     return true;
2885 
2886   DstLSB = countTrailingZeros(NonZeroBits);
2887   Width = countTrailingOnes(NonZeroBits >> DstLSB);
2888 
2889   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
2890     return false;
2891 
2892   Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
2893   return true;
2894 }
2895 
2896 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2897   assert(VT == MVT::i32 || VT == MVT::i64);
2898   if (VT == MVT::i32)
2899     return isShiftedMask_32(Mask);
2900   return isShiftedMask_64(Mask);
2901 }
2902 
2903 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2904 // inserted only sets known zero bits.
2905 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2906   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2907 
2908   EVT VT = N->getValueType(0);
2909   if (VT != MVT::i32 && VT != MVT::i64)
2910     return false;
2911 
2912   unsigned BitWidth = VT.getSizeInBits();
2913 
2914   uint64_t OrImm;
2915   if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2916     return false;
2917 
2918   // Skip this transformation if the ORR immediate can be encoded in the ORR.
2919   // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2920   // performance neutral.
2921   if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2922     return false;
2923 
2924   uint64_t MaskImm;
2925   SDValue And = N->getOperand(0);
2926   // Must be a single use AND with an immediate operand.
2927   if (!And.hasOneUse() ||
2928       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2929     return false;
2930 
2931   // Compute the Known Zero for the AND as this allows us to catch more general
2932   // cases than just looking for AND with imm.
2933   KnownBits Known = CurDAG->computeKnownBits(And);
2934 
2935   // Non-zero in the sense that they're not provably zero, which is the key
2936   // point if we want to use this value.
2937   uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2938 
2939   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2940   if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2941     return false;
2942 
2943   // The bits being inserted must only set those bits that are known to be zero.
2944   if ((OrImm & NotKnownZero) != 0) {
2945     // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2946     // currently handle this case.
2947     return false;
2948   }
2949 
2950   // BFI/BFXIL dst, src, #lsb, #width.
2951   int LSB = countTrailingOnes(NotKnownZero);
2952   int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2953 
2954   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2955   unsigned ImmR = (BitWidth - LSB) % BitWidth;
2956   unsigned ImmS = Width - 1;
2957 
2958   // If we're creating a BFI instruction avoid cases where we need more
2959   // instructions to materialize the BFI constant as compared to the original
2960   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
2961   // should be no worse in this case.
2962   bool IsBFI = LSB != 0;
2963   uint64_t BFIImm = OrImm >> LSB;
2964   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2965     // We have a BFI instruction and we know the constant can't be materialized
2966     // with a ORR-immediate with the zero register.
2967     unsigned OrChunks = 0, BFIChunks = 0;
2968     for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2969       if (((OrImm >> Shift) & 0xFFFF) != 0)
2970         ++OrChunks;
2971       if (((BFIImm >> Shift) & 0xFFFF) != 0)
2972         ++BFIChunks;
2973     }
2974     if (BFIChunks > OrChunks)
2975       return false;
2976   }
2977 
2978   // Materialize the constant to be inserted.
2979   SDLoc DL(N);
2980   unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2981   SDNode *MOVI = CurDAG->getMachineNode(
2982       MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2983 
2984   // Create the BFI/BFXIL instruction.
2985   SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2986                    CurDAG->getTargetConstant(ImmR, DL, VT),
2987                    CurDAG->getTargetConstant(ImmS, DL, VT)};
2988   unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2989   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2990   return true;
2991 }
2992 
2993 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
2994                                            SDValue &ShiftedOperand,
2995                                            uint64_t &EncodedShiftImm) {
2996   // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
2997   if (!Dst.hasOneUse())
2998     return false;
2999 
3000   EVT VT = Dst.getValueType();
3001   assert((VT == MVT::i32 || VT == MVT::i64) &&
3002          "Caller should guarantee that VT is one of i32 or i64");
3003   const unsigned SizeInBits = VT.getSizeInBits();
3004 
3005   SDLoc DL(Dst.getNode());
3006   uint64_t AndImm, ShlImm;
3007   if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3008       isShiftedMask_64(AndImm)) {
3009     // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3010     SDValue DstOp0 = Dst.getOperand(0);
3011     if (!DstOp0.hasOneUse())
3012       return false;
3013 
3014     // An example to illustrate the transformation
3015     // From:
3016     //    lsr     x8, x1, #1
3017     //    and     x8, x8, #0x3f80
3018     //    bfxil   x8, x1, #0, #7
3019     // To:
3020     //    and    x8, x23, #0x7f
3021     //    ubfx   x9, x23, #8, #7
3022     //    orr    x23, x8, x9, lsl #7
3023     //
3024     // The number of instructions remains the same, but ORR is faster than BFXIL
3025     // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3026     // the dependency chain is improved after the transformation.
3027     uint64_t SrlImm;
3028     if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3029       uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm);
3030       if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3031         unsigned MaskWidth =
3032             countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask);
3033         unsigned UBFMOpc =
3034             (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3035         SDNode *UBFMNode = CurDAG->getMachineNode(
3036             UBFMOpc, DL, VT, DstOp0.getOperand(0),
3037             CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3038                                       VT),
3039             CurDAG->getTargetConstant(
3040                 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3041         ShiftedOperand = SDValue(UBFMNode, 0);
3042         EncodedShiftImm = AArch64_AM::getShifterImm(
3043             AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3044         return true;
3045       }
3046     }
3047     return false;
3048   }
3049 
3050   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3051     ShiftedOperand = Dst.getOperand(0);
3052     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3053     return true;
3054   }
3055 
3056   uint64_t SrlImm;
3057   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3058     ShiftedOperand = Dst.getOperand(0);
3059     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3060     return true;
3061   }
3062   return false;
3063 }
3064 
3065 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3066 // the operands and select it to AArch64::ORR with shifted registers if
3067 // that's more efficient. Returns true iff selection to AArch64::ORR happens.
3068 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3069                             SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3070                             const bool BiggerPattern) {
3071   EVT VT = N->getValueType(0);
3072   assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3073   assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3074           (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3075          "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3076   assert((VT == MVT::i32 || VT == MVT::i64) &&
3077          "Expect result type to be i32 or i64 since N is combinable to BFM");
3078   SDLoc DL(N);
3079 
3080   // Bail out if BFM simplifies away one node in BFM Dst.
3081   if (OrOpd1 != Dst)
3082     return false;
3083 
3084   const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3085   // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3086   // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3087   if (BiggerPattern) {
3088     uint64_t SrcAndImm;
3089     if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3090         isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3091       // OrOpd0 = AND Src, #Mask
3092       // So BFM simplifies away one AND node from Src and doesn't simplify away
3093       // nodes from Dst. If ORR with left-shifted operand also simplifies away
3094       // one node (from Rd), ORR is better since it has higher throughput and
3095       // smaller latency than BFM on many AArch64 processors (and for the rest
3096       // ORR is at least as good as BFM).
3097       SDValue ShiftedOperand;
3098       uint64_t EncodedShiftImm;
3099       if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3100                                          EncodedShiftImm)) {
3101         SDValue Ops[] = {OrOpd0, ShiftedOperand,
3102                          CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3103         CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3104         return true;
3105       }
3106     }
3107     return false;
3108   }
3109 
3110   assert((!BiggerPattern) && "BiggerPattern should be handled above");
3111 
3112   uint64_t ShlImm;
3113   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3114     if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3115       SDValue Ops[] = {
3116           Dst, Src,
3117           CurDAG->getTargetConstant(
3118               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3119       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3120       return true;
3121     }
3122 
3123     // Select the following pattern to left-shifted operand rather than BFI.
3124     // %val1 = op ..
3125     // %val2 = shl %val1, #imm
3126     // %res = or %val1, %val2
3127     //
3128     // If N is selected to be BFI, we know that
3129     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3130     // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3131     //
3132     // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3133     if (OrOpd0.getOperand(0) == OrOpd1) {
3134       SDValue Ops[] = {
3135           OrOpd1, OrOpd1,
3136           CurDAG->getTargetConstant(
3137               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3138       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3139       return true;
3140     }
3141   }
3142 
3143   uint64_t SrlImm;
3144   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3145     // Select the following pattern to right-shifted operand rather than BFXIL.
3146     // %val1 = op ..
3147     // %val2 = lshr %val1, #imm
3148     // %res = or %val1, %val2
3149     //
3150     // If N is selected to be BFXIL, we know that
3151     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3152     // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3153     //
3154     // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3155     if (OrOpd0.getOperand(0) == OrOpd1) {
3156       SDValue Ops[] = {
3157           OrOpd1, OrOpd1,
3158           CurDAG->getTargetConstant(
3159               AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
3160       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3161       return true;
3162     }
3163   }
3164 
3165   return false;
3166 }
3167 
3168 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3169                                       SelectionDAG *CurDAG) {
3170   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3171 
3172   EVT VT = N->getValueType(0);
3173   if (VT != MVT::i32 && VT != MVT::i64)
3174     return false;
3175 
3176   unsigned BitWidth = VT.getSizeInBits();
3177 
3178   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3179   // have the expected shape. Try to undo that.
3180 
3181   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
3182   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
3183 
3184   // Given a OR operation, check if we have the following pattern
3185   // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3186   //                       isBitfieldExtractOp)
3187   // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3188   //                 countTrailingZeros(mask2) == imm2 - imm + 1
3189   // f = d | c
3190   // if yes, replace the OR instruction with:
3191   // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3192 
3193   // OR is commutative, check all combinations of operand order and values of
3194   // BiggerPattern, i.e.
3195   //     Opd0, Opd1, BiggerPattern=false
3196   //     Opd1, Opd0, BiggerPattern=false
3197   //     Opd0, Opd1, BiggerPattern=true
3198   //     Opd1, Opd0, BiggerPattern=true
3199   // Several of these combinations may match, so check with BiggerPattern=false
3200   // first since that will produce better results by matching more instructions
3201   // and/or inserting fewer extra instructions.
3202   for (int I = 0; I < 4; ++I) {
3203 
3204     SDValue Dst, Src;
3205     unsigned ImmR, ImmS;
3206     bool BiggerPattern = I / 2;
3207     SDValue OrOpd0Val = N->getOperand(I % 2);
3208     SDNode *OrOpd0 = OrOpd0Val.getNode();
3209     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3210     SDNode *OrOpd1 = OrOpd1Val.getNode();
3211 
3212     unsigned BFXOpc;
3213     int DstLSB, Width;
3214     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3215                             NumberOfIgnoredLowBits, BiggerPattern)) {
3216       // Check that the returned opcode is compatible with the pattern,
3217       // i.e., same type and zero extended (U and not S)
3218       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3219           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3220         continue;
3221 
3222       // Compute the width of the bitfield insertion
3223       DstLSB = 0;
3224       Width = ImmS - ImmR + 1;
3225       // FIXME: This constraint is to catch bitfield insertion we may
3226       // want to widen the pattern if we want to grab general bitfied
3227       // move case
3228       if (Width <= 0)
3229         continue;
3230 
3231       // If the mask on the insertee is correct, we have a BFXIL operation. We
3232       // can share the ImmR and ImmS values from the already-computed UBFM.
3233     } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3234                                        BiggerPattern,
3235                                        Src, DstLSB, Width)) {
3236       ImmR = (BitWidth - DstLSB) % BitWidth;
3237       ImmS = Width - 1;
3238     } else
3239       continue;
3240 
3241     // Check the second part of the pattern
3242     EVT VT = OrOpd1Val.getValueType();
3243     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3244 
3245     // Compute the Known Zero for the candidate of the first operand.
3246     // This allows to catch more general case than just looking for
3247     // AND with imm. Indeed, simplify-demanded-bits may have removed
3248     // the AND instruction because it proves it was useless.
3249     KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3250 
3251     // Check if there is enough room for the second operand to appear
3252     // in the first one
3253     APInt BitsToBeInserted =
3254         APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3255 
3256     if ((BitsToBeInserted & ~Known.Zero) != 0)
3257       continue;
3258 
3259     // Set the first operand
3260     uint64_t Imm;
3261     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3262         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3263       // In that case, we can eliminate the AND
3264       Dst = OrOpd1->getOperand(0);
3265     else
3266       // Maybe the AND has been removed by simplify-demanded-bits
3267       // or is useful because it discards more bits
3268       Dst = OrOpd1Val;
3269 
3270     // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3271     // with shifted operand is more efficient.
3272     if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3273                         BiggerPattern))
3274       return true;
3275 
3276     // both parts match
3277     SDLoc DL(N);
3278     SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3279                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3280     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3281     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3282     return true;
3283   }
3284 
3285   // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3286   // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3287   // mask (e.g., 0x000ffff0).
3288   uint64_t Mask0Imm, Mask1Imm;
3289   SDValue And0 = N->getOperand(0);
3290   SDValue And1 = N->getOperand(1);
3291   if (And0.hasOneUse() && And1.hasOneUse() &&
3292       isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3293       isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3294       APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3295       (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3296 
3297     // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3298     // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3299     // bits to be inserted.
3300     if (isShiftedMask(Mask0Imm, VT)) {
3301       std::swap(And0, And1);
3302       std::swap(Mask0Imm, Mask1Imm);
3303     }
3304 
3305     SDValue Src = And1->getOperand(0);
3306     SDValue Dst = And0->getOperand(0);
3307     unsigned LSB = countTrailingZeros(Mask1Imm);
3308     int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
3309 
3310     // The BFXIL inserts the low-order bits from a source register, so right
3311     // shift the needed bits into place.
3312     SDLoc DL(N);
3313     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3314     uint64_t LsrImm = LSB;
3315     if (Src->hasOneUse() &&
3316         isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3317         (LsrImm + LSB) < BitWidth) {
3318       Src = Src->getOperand(0);
3319       LsrImm += LSB;
3320     }
3321 
3322     SDNode *LSR = CurDAG->getMachineNode(
3323         ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3324         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3325 
3326     // BFXIL is an alias of BFM, so translate to BFM operands.
3327     unsigned ImmR = (BitWidth - LSB) % BitWidth;
3328     unsigned ImmS = Width - 1;
3329 
3330     // Create the BFXIL instruction.
3331     SDValue Ops[] = {Dst, SDValue(LSR, 0),
3332                      CurDAG->getTargetConstant(ImmR, DL, VT),
3333                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3334     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3335     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3336     return true;
3337   }
3338 
3339   return false;
3340 }
3341 
3342 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3343   if (N->getOpcode() != ISD::OR)
3344     return false;
3345 
3346   APInt NUsefulBits;
3347   getUsefulBits(SDValue(N, 0), NUsefulBits);
3348 
3349   // If all bits are not useful, just return UNDEF.
3350   if (!NUsefulBits) {
3351     CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3352     return true;
3353   }
3354 
3355   if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3356     return true;
3357 
3358   return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3359 }
3360 
3361 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3362 /// equivalent of a left shift by a constant amount followed by an and masking
3363 /// out a contiguous set of bits.
3364 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3365   if (N->getOpcode() != ISD::AND)
3366     return false;
3367 
3368   EVT VT = N->getValueType(0);
3369   if (VT != MVT::i32 && VT != MVT::i64)
3370     return false;
3371 
3372   SDValue Op0;
3373   int DstLSB, Width;
3374   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3375                                Op0, DstLSB, Width))
3376     return false;
3377 
3378   // ImmR is the rotate right amount.
3379   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3380   // ImmS is the most significant bit of the source to be moved.
3381   unsigned ImmS = Width - 1;
3382 
3383   SDLoc DL(N);
3384   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3385                    CurDAG->getTargetConstant(ImmS, DL, VT)};
3386   unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3387   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3388   return true;
3389 }
3390 
3391 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3392 /// variable shift/rotate instructions.
3393 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3394   EVT VT = N->getValueType(0);
3395 
3396   unsigned Opc;
3397   switch (N->getOpcode()) {
3398   case ISD::ROTR:
3399     Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3400     break;
3401   case ISD::SHL:
3402     Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3403     break;
3404   case ISD::SRL:
3405     Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3406     break;
3407   case ISD::SRA:
3408     Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3409     break;
3410   default:
3411     return false;
3412   }
3413 
3414   uint64_t Size;
3415   uint64_t Bits;
3416   if (VT == MVT::i32) {
3417     Bits = 5;
3418     Size = 32;
3419   } else if (VT == MVT::i64) {
3420     Bits = 6;
3421     Size = 64;
3422   } else
3423     return false;
3424 
3425   SDValue ShiftAmt = N->getOperand(1);
3426   SDLoc DL(N);
3427   SDValue NewShiftAmt;
3428 
3429   // Skip over an extend of the shift amount.
3430   if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3431       ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3432     ShiftAmt = ShiftAmt->getOperand(0);
3433 
3434   if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3435     SDValue Add0 = ShiftAmt->getOperand(0);
3436     SDValue Add1 = ShiftAmt->getOperand(1);
3437     uint64_t Add0Imm;
3438     uint64_t Add1Imm;
3439     if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3440       // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3441       // to avoid the ADD/SUB.
3442       NewShiftAmt = Add0;
3443     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3444                isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3445                (Add0Imm % Size == 0)) {
3446       // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3447       // to generate a NEG instead of a SUB from a constant.
3448       unsigned NegOpc;
3449       unsigned ZeroReg;
3450       EVT SubVT = ShiftAmt->getValueType(0);
3451       if (SubVT == MVT::i32) {
3452         NegOpc = AArch64::SUBWrr;
3453         ZeroReg = AArch64::WZR;
3454       } else {
3455         assert(SubVT == MVT::i64);
3456         NegOpc = AArch64::SUBXrr;
3457         ZeroReg = AArch64::XZR;
3458       }
3459       SDValue Zero =
3460           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3461       MachineSDNode *Neg =
3462           CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3463       NewShiftAmt = SDValue(Neg, 0);
3464     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3465                isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3466       // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3467       // to generate a NOT instead of a SUB from a constant.
3468       unsigned NotOpc;
3469       unsigned ZeroReg;
3470       EVT SubVT = ShiftAmt->getValueType(0);
3471       if (SubVT == MVT::i32) {
3472         NotOpc = AArch64::ORNWrr;
3473         ZeroReg = AArch64::WZR;
3474       } else {
3475         assert(SubVT == MVT::i64);
3476         NotOpc = AArch64::ORNXrr;
3477         ZeroReg = AArch64::XZR;
3478       }
3479       SDValue Zero =
3480           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3481       MachineSDNode *Not =
3482           CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3483       NewShiftAmt = SDValue(Not, 0);
3484     } else
3485       return false;
3486   } else {
3487     // If the shift amount is masked with an AND, check that the mask covers the
3488     // bits that are implicitly ANDed off by the above opcodes and if so, skip
3489     // the AND.
3490     uint64_t MaskImm;
3491     if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3492         !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3493       return false;
3494 
3495     if (countTrailingOnes(MaskImm) < Bits)
3496       return false;
3497 
3498     NewShiftAmt = ShiftAmt->getOperand(0);
3499   }
3500 
3501   // Narrow/widen the shift amount to match the size of the shift operation.
3502   if (VT == MVT::i32)
3503     NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3504   else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3505     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3506     MachineSDNode *Ext = CurDAG->getMachineNode(
3507         AArch64::SUBREG_TO_REG, DL, VT,
3508         CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3509     NewShiftAmt = SDValue(Ext, 0);
3510   }
3511 
3512   SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3513   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3514   return true;
3515 }
3516 
3517 bool
3518 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3519                                               unsigned RegWidth) {
3520   APFloat FVal(0.0);
3521   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3522     FVal = CN->getValueAPF();
3523   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3524     // Some otherwise illegal constants are allowed in this case.
3525     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3526         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3527       return false;
3528 
3529     ConstantPoolSDNode *CN =
3530         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3531     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3532   } else
3533     return false;
3534 
3535   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3536   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3537   // x-register.
3538   //
3539   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3540   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3541   // integers.
3542   bool IsExact;
3543 
3544   // fbits is between 1 and 64 in the worst-case, which means the fmul
3545   // could have 2^64 as an actual operand. Need 65 bits of precision.
3546   APSInt IntVal(65, true);
3547   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3548 
3549   // N.b. isPowerOf2 also checks for > 0.
3550   if (!IsExact || !IntVal.isPowerOf2()) return false;
3551   unsigned FBits = IntVal.logBase2();
3552 
3553   // Checks above should have guaranteed that we haven't lost information in
3554   // finding FBits, but it must still be in range.
3555   if (FBits == 0 || FBits > RegWidth) return false;
3556 
3557   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3558   return true;
3559 }
3560 
3561 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3562 // of the string and obtains the integer values from them and combines these
3563 // into a single value to be used in the MRS/MSR instruction.
3564 static int getIntOperandFromRegisterString(StringRef RegString) {
3565   SmallVector<StringRef, 5> Fields;
3566   RegString.split(Fields, ':');
3567 
3568   if (Fields.size() == 1)
3569     return -1;
3570 
3571   assert(Fields.size() == 5
3572             && "Invalid number of fields in read register string");
3573 
3574   SmallVector<int, 5> Ops;
3575   bool AllIntFields = true;
3576 
3577   for (StringRef Field : Fields) {
3578     unsigned IntField;
3579     AllIntFields &= !Field.getAsInteger(10, IntField);
3580     Ops.push_back(IntField);
3581   }
3582 
3583   assert(AllIntFields &&
3584           "Unexpected non-integer value in special register string.");
3585   (void)AllIntFields;
3586 
3587   // Need to combine the integer fields of the string into a single value
3588   // based on the bit encoding of MRS/MSR instruction.
3589   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3590          (Ops[3] << 3) | (Ops[4]);
3591 }
3592 
3593 // Lower the read_register intrinsic to an MRS instruction node if the special
3594 // register string argument is either of the form detailed in the ALCE (the
3595 // form described in getIntOperandsFromRegsterString) or is a named register
3596 // known by the MRS SysReg mapper.
3597 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3598   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3599   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3600   SDLoc DL(N);
3601 
3602   bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3603 
3604   unsigned Opcode64Bit = AArch64::MRS;
3605   int Imm = getIntOperandFromRegisterString(RegString->getString());
3606   if (Imm == -1) {
3607     // No match, Use the sysreg mapper to map the remaining possible strings to
3608     // the value for the register to be used for the instruction operand.
3609     const auto *TheReg =
3610         AArch64SysReg::lookupSysRegByName(RegString->getString());
3611     if (TheReg && TheReg->Readable &&
3612         TheReg->haveFeatures(Subtarget->getFeatureBits()))
3613       Imm = TheReg->Encoding;
3614     else
3615       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3616 
3617     if (Imm == -1) {
3618       // Still no match, see if this is "pc" or give up.
3619       if (!ReadIs128Bit && RegString->getString() == "pc") {
3620         Opcode64Bit = AArch64::ADR;
3621         Imm = 0;
3622       } else {
3623         return false;
3624       }
3625     }
3626   }
3627 
3628   SDValue InChain = N->getOperand(0);
3629   SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3630   if (!ReadIs128Bit) {
3631     CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3632                          {SysRegImm, InChain});
3633   } else {
3634     SDNode *MRRS = CurDAG->getMachineNode(
3635         AArch64::MRRS, DL,
3636         {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3637         {SysRegImm, InChain});
3638 
3639     // Sysregs are not endian. The even register always contains the low half
3640     // of the register.
3641     SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3642                                                 SDValue(MRRS, 0));
3643     SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3644                                                 SDValue(MRRS, 0));
3645     SDValue OutChain = SDValue(MRRS, 1);
3646 
3647     ReplaceUses(SDValue(N, 0), Lo);
3648     ReplaceUses(SDValue(N, 1), Hi);
3649     ReplaceUses(SDValue(N, 2), OutChain);
3650   };
3651   return true;
3652 }
3653 
3654 // Lower the write_register intrinsic to an MSR instruction node if the special
3655 // register string argument is either of the form detailed in the ALCE (the
3656 // form described in getIntOperandsFromRegsterString) or is a named register
3657 // known by the MSR SysReg mapper.
3658 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3659   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3660   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3661   SDLoc DL(N);
3662 
3663   bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3664 
3665   if (!WriteIs128Bit) {
3666     // Check if the register was one of those allowed as the pstatefield value
3667     // in the MSR (immediate) instruction. To accept the values allowed in the
3668     // pstatefield for the MSR (immediate) instruction, we also require that an
3669     // immediate value has been provided as an argument, we know that this is
3670     // the case as it has been ensured by semantic checking.
3671     auto trySelectPState = [&](auto PMapper, unsigned State) {
3672       if (PMapper) {
3673         assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3674                "Expected a constant integer expression.");
3675         unsigned Reg = PMapper->Encoding;
3676         uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3677         CurDAG->SelectNodeTo(
3678             N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3679             CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3680         return true;
3681       }
3682       return false;
3683     };
3684 
3685     if (trySelectPState(
3686             AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3687             AArch64::MSRpstateImm4))
3688       return true;
3689     if (trySelectPState(
3690             AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3691             AArch64::MSRpstateImm1))
3692       return true;
3693   }
3694 
3695   int Imm = getIntOperandFromRegisterString(RegString->getString());
3696   if (Imm == -1) {
3697     // Use the sysreg mapper to attempt to map the remaining possible strings
3698     // to the value for the register to be used for the MSR (register)
3699     // instruction operand.
3700     auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3701     if (TheReg && TheReg->Writeable &&
3702         TheReg->haveFeatures(Subtarget->getFeatureBits()))
3703       Imm = TheReg->Encoding;
3704     else
3705       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3706 
3707     if (Imm == -1)
3708       return false;
3709   }
3710 
3711   SDValue InChain = N->getOperand(0);
3712   if (!WriteIs128Bit) {
3713     CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3714                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3715                          N->getOperand(2), InChain);
3716   } else {
3717     // No endian swap. The lower half always goes into the even subreg, and the
3718     // higher half always into the odd supreg.
3719     SDNode *Pair = CurDAG->getMachineNode(
3720         TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3721         {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3722                                    MVT::i32),
3723          N->getOperand(2),
3724          CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3725          N->getOperand(3),
3726          CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3727 
3728     CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3729                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3730                          SDValue(Pair, 0), InChain);
3731   }
3732 
3733   return true;
3734 }
3735 
3736 /// We've got special pseudo-instructions for these
3737 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3738   unsigned Opcode;
3739   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3740 
3741   // Leave IR for LSE if subtarget supports it.
3742   if (Subtarget->hasLSE()) return false;
3743 
3744   if (MemTy == MVT::i8)
3745     Opcode = AArch64::CMP_SWAP_8;
3746   else if (MemTy == MVT::i16)
3747     Opcode = AArch64::CMP_SWAP_16;
3748   else if (MemTy == MVT::i32)
3749     Opcode = AArch64::CMP_SWAP_32;
3750   else if (MemTy == MVT::i64)
3751     Opcode = AArch64::CMP_SWAP_64;
3752   else
3753     llvm_unreachable("Unknown AtomicCmpSwap type");
3754 
3755   MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3756   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3757                    N->getOperand(0)};
3758   SDNode *CmpSwap = CurDAG->getMachineNode(
3759       Opcode, SDLoc(N),
3760       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3761 
3762   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3763   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3764 
3765   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3766   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3767   CurDAG->RemoveDeadNode(N);
3768 
3769   return true;
3770 }
3771 
3772 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3773                                              SDValue &Shift) {
3774   if (!isa<ConstantSDNode>(N))
3775     return false;
3776 
3777   SDLoc DL(N);
3778   uint64_t Val = cast<ConstantSDNode>(N)
3779                      ->getAPIntValue()
3780                      .trunc(VT.getFixedSizeInBits())
3781                      .getZExtValue();
3782 
3783   switch (VT.SimpleTy) {
3784   case MVT::i8:
3785     // All immediates are supported.
3786     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3787     Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3788     return true;
3789   case MVT::i16:
3790   case MVT::i32:
3791   case MVT::i64:
3792     // Support 8bit unsigned immediates.
3793     if (Val <= 255) {
3794       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3795       Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3796       return true;
3797     }
3798     // Support 16bit unsigned immediates that are a multiple of 256.
3799     if (Val <= 65280 && Val % 256 == 0) {
3800       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3801       Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
3802       return true;
3803     }
3804     break;
3805   default:
3806     break;
3807   }
3808 
3809   return false;
3810 }
3811 
3812 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
3813                                              SDValue &Shift) {
3814   if (!isa<ConstantSDNode>(N))
3815     return false;
3816 
3817   SDLoc DL(N);
3818   int64_t Val = cast<ConstantSDNode>(N)
3819                     ->getAPIntValue()
3820                     .trunc(VT.getFixedSizeInBits())
3821                     .getSExtValue();
3822 
3823   switch (VT.SimpleTy) {
3824   case MVT::i8:
3825     // All immediates are supported.
3826     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3827     Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3828     return true;
3829   case MVT::i16:
3830   case MVT::i32:
3831   case MVT::i64:
3832     // Support 8bit signed immediates.
3833     if (Val >= -128 && Val <= 127) {
3834       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3835       Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3836       return true;
3837     }
3838     // Support 16bit signed immediates that are a multiple of 256.
3839     if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
3840       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3841       Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
3842       return true;
3843     }
3844     break;
3845   default:
3846     break;
3847   }
3848 
3849   return false;
3850 }
3851 
3852 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3853   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3854     int64_t ImmVal = CNode->getSExtValue();
3855     SDLoc DL(N);
3856     if (ImmVal >= -128 && ImmVal < 128) {
3857       Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3858       return true;
3859     }
3860   }
3861   return false;
3862 }
3863 
3864 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
3865   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3866     uint64_t ImmVal = CNode->getZExtValue();
3867 
3868     switch (VT.SimpleTy) {
3869     case MVT::i8:
3870       ImmVal &= 0xFF;
3871       break;
3872     case MVT::i16:
3873       ImmVal &= 0xFFFF;
3874       break;
3875     case MVT::i32:
3876       ImmVal &= 0xFFFFFFFF;
3877       break;
3878     case MVT::i64:
3879       break;
3880     default:
3881       llvm_unreachable("Unexpected type");
3882     }
3883 
3884     if (ImmVal < 256) {
3885       Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3886       return true;
3887     }
3888   }
3889   return false;
3890 }
3891 
3892 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
3893                                               bool Invert) {
3894   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3895     uint64_t ImmVal = CNode->getZExtValue();
3896     SDLoc DL(N);
3897 
3898     if (Invert)
3899       ImmVal = ~ImmVal;
3900 
3901     // Shift mask depending on type size.
3902     switch (VT.SimpleTy) {
3903     case MVT::i8:
3904       ImmVal &= 0xFF;
3905       ImmVal |= ImmVal << 8;
3906       ImmVal |= ImmVal << 16;
3907       ImmVal |= ImmVal << 32;
3908       break;
3909     case MVT::i16:
3910       ImmVal &= 0xFFFF;
3911       ImmVal |= ImmVal << 16;
3912       ImmVal |= ImmVal << 32;
3913       break;
3914     case MVT::i32:
3915       ImmVal &= 0xFFFFFFFF;
3916       ImmVal |= ImmVal << 32;
3917       break;
3918     case MVT::i64:
3919       break;
3920     default:
3921       llvm_unreachable("Unexpected type");
3922     }
3923 
3924     uint64_t encoding;
3925     if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
3926       Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
3927       return true;
3928     }
3929   }
3930   return false;
3931 }
3932 
3933 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
3934 // Rather than attempt to normalise everything we can sometimes saturate the
3935 // shift amount during selection. This function also allows for consistent
3936 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
3937 // required by the instructions.
3938 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
3939                                             uint64_t High, bool AllowSaturation,
3940                                             SDValue &Imm) {
3941   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3942     uint64_t ImmVal = CN->getZExtValue();
3943 
3944     // Reject shift amounts that are too small.
3945     if (ImmVal < Low)
3946       return false;
3947 
3948     // Reject or saturate shift amounts that are too big.
3949     if (ImmVal > High) {
3950       if (!AllowSaturation)
3951         return false;
3952       ImmVal = High;
3953     }
3954 
3955     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3956     return true;
3957   }
3958 
3959   return false;
3960 }
3961 
3962 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
3963   // tagp(FrameIndex, IRGstack, tag_offset):
3964   // since the offset between FrameIndex and IRGstack is a compile-time
3965   // constant, this can be lowered to a single ADDG instruction.
3966   if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
3967     return false;
3968   }
3969 
3970   SDValue IRG_SP = N->getOperand(2);
3971   if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
3972       cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
3973           Intrinsic::aarch64_irg_sp) {
3974     return false;
3975   }
3976 
3977   const TargetLowering *TLI = getTargetLowering();
3978   SDLoc DL(N);
3979   int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
3980   SDValue FiOp = CurDAG->getTargetFrameIndex(
3981       FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3982   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3983 
3984   SDNode *Out = CurDAG->getMachineNode(
3985       AArch64::TAGPstack, DL, MVT::i64,
3986       {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
3987        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3988   ReplaceNode(N, Out);
3989   return true;
3990 }
3991 
3992 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
3993   assert(isa<ConstantSDNode>(N->getOperand(3)) &&
3994          "llvm.aarch64.tagp third argument must be an immediate");
3995   if (trySelectStackSlotTagP(N))
3996     return;
3997   // FIXME: above applies in any case when offset between Op1 and Op2 is a
3998   // compile-time constant, not just for stack allocations.
3999 
4000   // General case for unrelated pointers in Op1 and Op2.
4001   SDLoc DL(N);
4002   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4003   SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4004                                       {N->getOperand(1), N->getOperand(2)});
4005   SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4006                                       {SDValue(N1, 0), N->getOperand(2)});
4007   SDNode *N3 = CurDAG->getMachineNode(
4008       AArch64::ADDG, DL, MVT::i64,
4009       {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4010        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4011   ReplaceNode(N, N3);
4012 }
4013 
4014 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
4015 // vector types larger than NEON don't have a matching SubRegIndex.
4016 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
4017   assert(V.getValueType().isScalableVector() &&
4018          V.getValueType().getSizeInBits().getKnownMinValue() ==
4019              AArch64::SVEBitsPerBlock &&
4020          "Expected to extract from a packed scalable vector!");
4021   assert(VT.isFixedLengthVector() &&
4022          "Expected to extract a fixed length vector!");
4023 
4024   SDLoc DL(V);
4025   switch (VT.getSizeInBits()) {
4026   case 64: {
4027     auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4028     return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
4029   }
4030   case 128: {
4031     auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4032     return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
4033   }
4034   default: {
4035     auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4036     return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
4037   }
4038   }
4039 }
4040 
4041 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
4042 // vector types larger than NEON don't have a matching SubRegIndex.
4043 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
4044   assert(VT.isScalableVector() &&
4045          VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4046          "Expected to insert into a packed scalable vector!");
4047   assert(V.getValueType().isFixedLengthVector() &&
4048          "Expected to insert a fixed length vector!");
4049 
4050   SDLoc DL(V);
4051   switch (V.getValueType().getSizeInBits()) {
4052   case 64: {
4053     auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4054     auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
4055     return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
4056                                SDValue(Container, 0), V, SubReg);
4057   }
4058   case 128: {
4059     auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4060     auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
4061     return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
4062                                SDValue(Container, 0), V, SubReg);
4063   }
4064   default: {
4065     auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4066     return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
4067   }
4068   }
4069 }
4070 
4071 void AArch64DAGToDAGISel::Select(SDNode *Node) {
4072   // If we have a custom node, we already have selected!
4073   if (Node->isMachineOpcode()) {
4074     LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4075     Node->setNodeId(-1);
4076     return;
4077   }
4078 
4079   // Few custom selection stuff.
4080   EVT VT = Node->getValueType(0);
4081 
4082   switch (Node->getOpcode()) {
4083   default:
4084     break;
4085 
4086   case ISD::ATOMIC_CMP_SWAP:
4087     if (SelectCMP_SWAP(Node))
4088       return;
4089     break;
4090 
4091   case ISD::READ_REGISTER:
4092   case AArch64ISD::MRRS:
4093     if (tryReadRegister(Node))
4094       return;
4095     break;
4096 
4097   case ISD::WRITE_REGISTER:
4098   case AArch64ISD::MSRR:
4099     if (tryWriteRegister(Node))
4100       return;
4101     break;
4102 
4103   case ISD::ADD:
4104     if (tryMLAV64LaneV128(Node))
4105       return;
4106     break;
4107 
4108   case ISD::LOAD: {
4109     // Try to select as an indexed load. Fall through to normal processing
4110     // if we can't.
4111     if (tryIndexedLoad(Node))
4112       return;
4113     break;
4114   }
4115 
4116   case ISD::SRL:
4117   case ISD::AND:
4118   case ISD::SRA:
4119   case ISD::SIGN_EXTEND_INREG:
4120     if (tryBitfieldExtractOp(Node))
4121       return;
4122     if (tryBitfieldInsertInZeroOp(Node))
4123       return;
4124     [[fallthrough]];
4125   case ISD::ROTR:
4126   case ISD::SHL:
4127     if (tryShiftAmountMod(Node))
4128       return;
4129     break;
4130 
4131   case ISD::SIGN_EXTEND:
4132     if (tryBitfieldExtractOpFromSExt(Node))
4133       return;
4134     break;
4135 
4136   case ISD::FP_EXTEND:
4137     if (tryHighFPExt(Node))
4138       return;
4139     break;
4140 
4141   case ISD::OR:
4142     if (tryBitfieldInsertOp(Node))
4143       return;
4144     break;
4145 
4146   case ISD::EXTRACT_SUBVECTOR: {
4147     // Bail when not a "cast" like extract_subvector.
4148     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
4149       break;
4150 
4151     // Bail when normal isel can do the job.
4152     EVT InVT = Node->getOperand(0).getValueType();
4153     if (VT.isScalableVector() || InVT.isFixedLengthVector())
4154       break;
4155 
4156     // NOTE: We can only get here when doing fixed length SVE code generation.
4157     // We do manual selection because the types involved are not linked to real
4158     // registers (despite being legal) and must be coerced into SVE registers.
4159     //
4160     // NOTE: If the above changes, be aware that selection will still not work
4161     // because the td definition of extract_vector does not support extracting
4162     // a fixed length vector from a scalable vector.
4163 
4164     ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
4165     return;
4166   }
4167 
4168   case ISD::INSERT_SUBVECTOR: {
4169     // Bail when not a "cast" like insert_subvector.
4170     if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
4171       break;
4172     if (!Node->getOperand(0).isUndef())
4173       break;
4174 
4175     // Bail when normal isel should do the job.
4176     EVT InVT = Node->getOperand(1).getValueType();
4177     if (VT.isFixedLengthVector() || InVT.isScalableVector())
4178       break;
4179 
4180     // NOTE: We can only get here when doing fixed length SVE code generation.
4181     // We do manual selection because the types involved are not linked to real
4182     // registers (despite being legal) and must be coerced into SVE registers.
4183     //
4184     // NOTE: If the above changes, be aware that selection will still not work
4185     // because the td definition of insert_vector does not support inserting a
4186     // fixed length vector into a scalable vector.
4187 
4188     ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
4189     return;
4190   }
4191 
4192   case ISD::Constant: {
4193     // Materialize zero constants as copies from WZR/XZR.  This allows
4194     // the coalescer to propagate these into other instructions.
4195     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4196     if (ConstNode->isZero()) {
4197       if (VT == MVT::i32) {
4198         SDValue New = CurDAG->getCopyFromReg(
4199             CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4200         ReplaceNode(Node, New.getNode());
4201         return;
4202       } else if (VT == MVT::i64) {
4203         SDValue New = CurDAG->getCopyFromReg(
4204             CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4205         ReplaceNode(Node, New.getNode());
4206         return;
4207       }
4208     }
4209     break;
4210   }
4211 
4212   case ISD::FrameIndex: {
4213     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4214     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4215     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4216     const TargetLowering *TLI = getTargetLowering();
4217     SDValue TFI = CurDAG->getTargetFrameIndex(
4218         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4219     SDLoc DL(Node);
4220     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4221                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4222     CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4223     return;
4224   }
4225   case ISD::INTRINSIC_W_CHAIN: {
4226     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4227     switch (IntNo) {
4228     default:
4229       break;
4230     case Intrinsic::aarch64_ldaxp:
4231     case Intrinsic::aarch64_ldxp: {
4232       unsigned Op =
4233           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4234       SDValue MemAddr = Node->getOperand(2);
4235       SDLoc DL(Node);
4236       SDValue Chain = Node->getOperand(0);
4237 
4238       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4239                                           MVT::Other, MemAddr, Chain);
4240 
4241       // Transfer memoperands.
4242       MachineMemOperand *MemOp =
4243           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4244       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4245       ReplaceNode(Node, Ld);
4246       return;
4247     }
4248     case Intrinsic::aarch64_stlxp:
4249     case Intrinsic::aarch64_stxp: {
4250       unsigned Op =
4251           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4252       SDLoc DL(Node);
4253       SDValue Chain = Node->getOperand(0);
4254       SDValue ValLo = Node->getOperand(2);
4255       SDValue ValHi = Node->getOperand(3);
4256       SDValue MemAddr = Node->getOperand(4);
4257 
4258       // Place arguments in the right order.
4259       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4260 
4261       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4262       // Transfer memoperands.
4263       MachineMemOperand *MemOp =
4264           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4265       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4266 
4267       ReplaceNode(Node, St);
4268       return;
4269     }
4270     case Intrinsic::aarch64_neon_ld1x2:
4271       if (VT == MVT::v8i8) {
4272         SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4273         return;
4274       } else if (VT == MVT::v16i8) {
4275         SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4276         return;
4277       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4278         SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4279         return;
4280       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4281         SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4282         return;
4283       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4284         SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4285         return;
4286       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4287         SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4288         return;
4289       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4290         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4291         return;
4292       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4293         SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4294         return;
4295       }
4296       break;
4297     case Intrinsic::aarch64_neon_ld1x3:
4298       if (VT == MVT::v8i8) {
4299         SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4300         return;
4301       } else if (VT == MVT::v16i8) {
4302         SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4303         return;
4304       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4305         SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4306         return;
4307       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4308         SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4309         return;
4310       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4311         SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4312         return;
4313       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4314         SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4315         return;
4316       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4317         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4318         return;
4319       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4320         SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4321         return;
4322       }
4323       break;
4324     case Intrinsic::aarch64_neon_ld1x4:
4325       if (VT == MVT::v8i8) {
4326         SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4327         return;
4328       } else if (VT == MVT::v16i8) {
4329         SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4330         return;
4331       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4332         SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4333         return;
4334       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4335         SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4336         return;
4337       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4338         SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4339         return;
4340       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4341         SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4342         return;
4343       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4344         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4345         return;
4346       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4347         SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4348         return;
4349       }
4350       break;
4351     case Intrinsic::aarch64_neon_ld2:
4352       if (VT == MVT::v8i8) {
4353         SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4354         return;
4355       } else if (VT == MVT::v16i8) {
4356         SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4357         return;
4358       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4359         SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4360         return;
4361       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4362         SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4363         return;
4364       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4365         SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4366         return;
4367       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4368         SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4369         return;
4370       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4371         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4372         return;
4373       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4374         SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4375         return;
4376       }
4377       break;
4378     case Intrinsic::aarch64_neon_ld3:
4379       if (VT == MVT::v8i8) {
4380         SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4381         return;
4382       } else if (VT == MVT::v16i8) {
4383         SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4384         return;
4385       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4386         SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4387         return;
4388       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4389         SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4390         return;
4391       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4392         SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4393         return;
4394       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4395         SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4396         return;
4397       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4398         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4399         return;
4400       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4401         SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4402         return;
4403       }
4404       break;
4405     case Intrinsic::aarch64_neon_ld4:
4406       if (VT == MVT::v8i8) {
4407         SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4408         return;
4409       } else if (VT == MVT::v16i8) {
4410         SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4411         return;
4412       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4413         SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4414         return;
4415       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4416         SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4417         return;
4418       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4419         SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4420         return;
4421       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4422         SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4423         return;
4424       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4425         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4426         return;
4427       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4428         SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4429         return;
4430       }
4431       break;
4432     case Intrinsic::aarch64_neon_ld2r:
4433       if (VT == MVT::v8i8) {
4434         SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4435         return;
4436       } else if (VT == MVT::v16i8) {
4437         SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4438         return;
4439       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4440         SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4441         return;
4442       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4443         SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4444         return;
4445       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4446         SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4447         return;
4448       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4449         SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4450         return;
4451       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4452         SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4453         return;
4454       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4455         SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4456         return;
4457       }
4458       break;
4459     case Intrinsic::aarch64_neon_ld3r:
4460       if (VT == MVT::v8i8) {
4461         SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4462         return;
4463       } else if (VT == MVT::v16i8) {
4464         SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4465         return;
4466       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4467         SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4468         return;
4469       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4470         SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4471         return;
4472       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4473         SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4474         return;
4475       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4476         SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4477         return;
4478       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4479         SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4480         return;
4481       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4482         SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4483         return;
4484       }
4485       break;
4486     case Intrinsic::aarch64_neon_ld4r:
4487       if (VT == MVT::v8i8) {
4488         SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4489         return;
4490       } else if (VT == MVT::v16i8) {
4491         SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4492         return;
4493       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4494         SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4495         return;
4496       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4497         SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4498         return;
4499       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4500         SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4501         return;
4502       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4503         SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4504         return;
4505       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4506         SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4507         return;
4508       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4509         SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4510         return;
4511       }
4512       break;
4513     case Intrinsic::aarch64_neon_ld2lane:
4514       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4515         SelectLoadLane(Node, 2, AArch64::LD2i8);
4516         return;
4517       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4518                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4519         SelectLoadLane(Node, 2, AArch64::LD2i16);
4520         return;
4521       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4522                  VT == MVT::v2f32) {
4523         SelectLoadLane(Node, 2, AArch64::LD2i32);
4524         return;
4525       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4526                  VT == MVT::v1f64) {
4527         SelectLoadLane(Node, 2, AArch64::LD2i64);
4528         return;
4529       }
4530       break;
4531     case Intrinsic::aarch64_neon_ld3lane:
4532       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4533         SelectLoadLane(Node, 3, AArch64::LD3i8);
4534         return;
4535       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4536                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4537         SelectLoadLane(Node, 3, AArch64::LD3i16);
4538         return;
4539       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4540                  VT == MVT::v2f32) {
4541         SelectLoadLane(Node, 3, AArch64::LD3i32);
4542         return;
4543       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4544                  VT == MVT::v1f64) {
4545         SelectLoadLane(Node, 3, AArch64::LD3i64);
4546         return;
4547       }
4548       break;
4549     case Intrinsic::aarch64_neon_ld4lane:
4550       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4551         SelectLoadLane(Node, 4, AArch64::LD4i8);
4552         return;
4553       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4554                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4555         SelectLoadLane(Node, 4, AArch64::LD4i16);
4556         return;
4557       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4558                  VT == MVT::v2f32) {
4559         SelectLoadLane(Node, 4, AArch64::LD4i32);
4560         return;
4561       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4562                  VT == MVT::v1f64) {
4563         SelectLoadLane(Node, 4, AArch64::LD4i64);
4564         return;
4565       }
4566       break;
4567     case Intrinsic::aarch64_ld64b:
4568       SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4569       return;
4570     case Intrinsic::aarch64_sve_ld2_sret: {
4571       if (VT == MVT::nxv16i8) {
4572         SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4573                              true);
4574         return;
4575       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4576                  VT == MVT::nxv8bf16) {
4577         SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4578                              true);
4579         return;
4580       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4581         SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4582                              true);
4583         return;
4584       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4585         SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4586                              true);
4587         return;
4588       }
4589       break;
4590     }
4591     case Intrinsic::aarch64_sve_ld3_sret: {
4592       if (VT == MVT::nxv16i8) {
4593         SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
4594                              true);
4595         return;
4596       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4597                  VT == MVT::nxv8bf16) {
4598         SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
4599                              true);
4600         return;
4601       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4602         SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
4603                              true);
4604         return;
4605       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4606         SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
4607                              true);
4608         return;
4609       }
4610       break;
4611     }
4612     case Intrinsic::aarch64_sve_ld4_sret: {
4613       if (VT == MVT::nxv16i8) {
4614         SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
4615                              true);
4616         return;
4617       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4618                  VT == MVT::nxv8bf16) {
4619         SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
4620                              true);
4621         return;
4622       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4623         SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
4624                              true);
4625         return;
4626       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4627         SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
4628                              true);
4629         return;
4630       }
4631       break;
4632     }
4633     case Intrinsic::swift_async_context_addr: {
4634       SDLoc DL(Node);
4635       SDValue Chain = Node->getOperand(0);
4636       SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
4637       SDValue Res = SDValue(
4638           CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
4639                                  CurDAG->getTargetConstant(8, DL, MVT::i32),
4640                                  CurDAG->getTargetConstant(0, DL, MVT::i32)),
4641           0);
4642       ReplaceUses(SDValue(Node, 0), Res);
4643       ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
4644       CurDAG->RemoveDeadNode(Node);
4645 
4646       auto &MF = CurDAG->getMachineFunction();
4647       MF.getFrameInfo().setFrameAddressIsTaken(true);
4648       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
4649       return;
4650     }
4651     }
4652   } break;
4653   case ISD::INTRINSIC_WO_CHAIN: {
4654     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
4655     switch (IntNo) {
4656     default:
4657       break;
4658     case Intrinsic::aarch64_tagp:
4659       SelectTagP(Node);
4660       return;
4661     case Intrinsic::aarch64_neon_tbl2:
4662       SelectTable(Node, 2,
4663                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
4664                   false);
4665       return;
4666     case Intrinsic::aarch64_neon_tbl3:
4667       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
4668                                            : AArch64::TBLv16i8Three,
4669                   false);
4670       return;
4671     case Intrinsic::aarch64_neon_tbl4:
4672       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
4673                                            : AArch64::TBLv16i8Four,
4674                   false);
4675       return;
4676     case Intrinsic::aarch64_neon_tbx2:
4677       SelectTable(Node, 2,
4678                   VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
4679                   true);
4680       return;
4681     case Intrinsic::aarch64_neon_tbx3:
4682       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
4683                                            : AArch64::TBXv16i8Three,
4684                   true);
4685       return;
4686     case Intrinsic::aarch64_neon_tbx4:
4687       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
4688                                            : AArch64::TBXv16i8Four,
4689                   true);
4690       return;
4691     case Intrinsic::aarch64_neon_smull:
4692     case Intrinsic::aarch64_neon_umull:
4693       if (tryMULLV64LaneV128(IntNo, Node))
4694         return;
4695       break;
4696     case Intrinsic::aarch64_sve_whilege_x2:
4697       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4698               Node->getValueType(0),
4699               {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
4700                AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
4701         SelectWhilePair(Node, Op);
4702       return;
4703     case Intrinsic::aarch64_sve_whilegt_x2:
4704       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4705               Node->getValueType(0),
4706               {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
4707                AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
4708         SelectWhilePair(Node, Op);
4709       return;
4710     case Intrinsic::aarch64_sve_whilehi_x2:
4711       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4712               Node->getValueType(0),
4713               {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
4714                AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
4715         SelectWhilePair(Node, Op);
4716       return;
4717     case Intrinsic::aarch64_sve_whilehs_x2:
4718       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4719               Node->getValueType(0),
4720               {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
4721                AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
4722         SelectWhilePair(Node, Op);
4723       return;
4724     case Intrinsic::aarch64_sve_whilele_x2:
4725       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4726               Node->getValueType(0),
4727               {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
4728                AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
4729       SelectWhilePair(Node, Op);
4730       return;
4731     case Intrinsic::aarch64_sve_whilelo_x2:
4732       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4733               Node->getValueType(0),
4734               {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
4735                AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
4736       SelectWhilePair(Node, Op);
4737       return;
4738     case Intrinsic::aarch64_sve_whilels_x2:
4739       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4740               Node->getValueType(0),
4741               {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
4742                AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
4743         SelectWhilePair(Node, Op);
4744       return;
4745     case Intrinsic::aarch64_sve_whilelt_x2:
4746       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4747               Node->getValueType(0),
4748               {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
4749                AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
4750         SelectWhilePair(Node, Op);
4751       return;
4752     case Intrinsic::aarch64_sve_fcvts_x2:
4753       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
4754       return;
4755     case Intrinsic::aarch64_sve_scvtf_x2:
4756       SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
4757       return;
4758     case Intrinsic::aarch64_sve_fcvtu_x2:
4759       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
4760       return;
4761     case Intrinsic::aarch64_sve_ucvtf_x2:
4762       SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
4763       return;
4764     case Intrinsic::aarch64_sve_fcvts_x4:
4765       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
4766       return;
4767     case Intrinsic::aarch64_sve_scvtf_x4:
4768       SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
4769       return;
4770     case Intrinsic::aarch64_sve_fcvtu_x4:
4771       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
4772       return;
4773     case Intrinsic::aarch64_sve_ucvtf_x4:
4774       SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
4775       return;
4776     }
4777     break;
4778   }
4779   case ISD::INTRINSIC_VOID: {
4780     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4781     if (Node->getNumOperands() >= 3)
4782       VT = Node->getOperand(2)->getValueType(0);
4783     switch (IntNo) {
4784     default:
4785       break;
4786     case Intrinsic::aarch64_neon_st1x2: {
4787       if (VT == MVT::v8i8) {
4788         SelectStore(Node, 2, AArch64::ST1Twov8b);
4789         return;
4790       } else if (VT == MVT::v16i8) {
4791         SelectStore(Node, 2, AArch64::ST1Twov16b);
4792         return;
4793       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4794                  VT == MVT::v4bf16) {
4795         SelectStore(Node, 2, AArch64::ST1Twov4h);
4796         return;
4797       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4798                  VT == MVT::v8bf16) {
4799         SelectStore(Node, 2, AArch64::ST1Twov8h);
4800         return;
4801       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4802         SelectStore(Node, 2, AArch64::ST1Twov2s);
4803         return;
4804       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4805         SelectStore(Node, 2, AArch64::ST1Twov4s);
4806         return;
4807       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4808         SelectStore(Node, 2, AArch64::ST1Twov2d);
4809         return;
4810       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4811         SelectStore(Node, 2, AArch64::ST1Twov1d);
4812         return;
4813       }
4814       break;
4815     }
4816     case Intrinsic::aarch64_neon_st1x3: {
4817       if (VT == MVT::v8i8) {
4818         SelectStore(Node, 3, AArch64::ST1Threev8b);
4819         return;
4820       } else if (VT == MVT::v16i8) {
4821         SelectStore(Node, 3, AArch64::ST1Threev16b);
4822         return;
4823       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4824                  VT == MVT::v4bf16) {
4825         SelectStore(Node, 3, AArch64::ST1Threev4h);
4826         return;
4827       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4828                  VT == MVT::v8bf16) {
4829         SelectStore(Node, 3, AArch64::ST1Threev8h);
4830         return;
4831       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4832         SelectStore(Node, 3, AArch64::ST1Threev2s);
4833         return;
4834       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4835         SelectStore(Node, 3, AArch64::ST1Threev4s);
4836         return;
4837       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4838         SelectStore(Node, 3, AArch64::ST1Threev2d);
4839         return;
4840       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4841         SelectStore(Node, 3, AArch64::ST1Threev1d);
4842         return;
4843       }
4844       break;
4845     }
4846     case Intrinsic::aarch64_neon_st1x4: {
4847       if (VT == MVT::v8i8) {
4848         SelectStore(Node, 4, AArch64::ST1Fourv8b);
4849         return;
4850       } else if (VT == MVT::v16i8) {
4851         SelectStore(Node, 4, AArch64::ST1Fourv16b);
4852         return;
4853       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4854                  VT == MVT::v4bf16) {
4855         SelectStore(Node, 4, AArch64::ST1Fourv4h);
4856         return;
4857       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4858                  VT == MVT::v8bf16) {
4859         SelectStore(Node, 4, AArch64::ST1Fourv8h);
4860         return;
4861       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4862         SelectStore(Node, 4, AArch64::ST1Fourv2s);
4863         return;
4864       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4865         SelectStore(Node, 4, AArch64::ST1Fourv4s);
4866         return;
4867       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4868         SelectStore(Node, 4, AArch64::ST1Fourv2d);
4869         return;
4870       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4871         SelectStore(Node, 4, AArch64::ST1Fourv1d);
4872         return;
4873       }
4874       break;
4875     }
4876     case Intrinsic::aarch64_neon_st2: {
4877       if (VT == MVT::v8i8) {
4878         SelectStore(Node, 2, AArch64::ST2Twov8b);
4879         return;
4880       } else if (VT == MVT::v16i8) {
4881         SelectStore(Node, 2, AArch64::ST2Twov16b);
4882         return;
4883       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4884                  VT == MVT::v4bf16) {
4885         SelectStore(Node, 2, AArch64::ST2Twov4h);
4886         return;
4887       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4888                  VT == MVT::v8bf16) {
4889         SelectStore(Node, 2, AArch64::ST2Twov8h);
4890         return;
4891       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4892         SelectStore(Node, 2, AArch64::ST2Twov2s);
4893         return;
4894       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4895         SelectStore(Node, 2, AArch64::ST2Twov4s);
4896         return;
4897       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4898         SelectStore(Node, 2, AArch64::ST2Twov2d);
4899         return;
4900       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4901         SelectStore(Node, 2, AArch64::ST1Twov1d);
4902         return;
4903       }
4904       break;
4905     }
4906     case Intrinsic::aarch64_neon_st3: {
4907       if (VT == MVT::v8i8) {
4908         SelectStore(Node, 3, AArch64::ST3Threev8b);
4909         return;
4910       } else if (VT == MVT::v16i8) {
4911         SelectStore(Node, 3, AArch64::ST3Threev16b);
4912         return;
4913       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4914                  VT == MVT::v4bf16) {
4915         SelectStore(Node, 3, AArch64::ST3Threev4h);
4916         return;
4917       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4918                  VT == MVT::v8bf16) {
4919         SelectStore(Node, 3, AArch64::ST3Threev8h);
4920         return;
4921       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4922         SelectStore(Node, 3, AArch64::ST3Threev2s);
4923         return;
4924       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4925         SelectStore(Node, 3, AArch64::ST3Threev4s);
4926         return;
4927       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4928         SelectStore(Node, 3, AArch64::ST3Threev2d);
4929         return;
4930       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4931         SelectStore(Node, 3, AArch64::ST1Threev1d);
4932         return;
4933       }
4934       break;
4935     }
4936     case Intrinsic::aarch64_neon_st4: {
4937       if (VT == MVT::v8i8) {
4938         SelectStore(Node, 4, AArch64::ST4Fourv8b);
4939         return;
4940       } else if (VT == MVT::v16i8) {
4941         SelectStore(Node, 4, AArch64::ST4Fourv16b);
4942         return;
4943       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4944                  VT == MVT::v4bf16) {
4945         SelectStore(Node, 4, AArch64::ST4Fourv4h);
4946         return;
4947       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4948                  VT == MVT::v8bf16) {
4949         SelectStore(Node, 4, AArch64::ST4Fourv8h);
4950         return;
4951       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4952         SelectStore(Node, 4, AArch64::ST4Fourv2s);
4953         return;
4954       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4955         SelectStore(Node, 4, AArch64::ST4Fourv4s);
4956         return;
4957       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4958         SelectStore(Node, 4, AArch64::ST4Fourv2d);
4959         return;
4960       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4961         SelectStore(Node, 4, AArch64::ST1Fourv1d);
4962         return;
4963       }
4964       break;
4965     }
4966     case Intrinsic::aarch64_neon_st2lane: {
4967       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4968         SelectStoreLane(Node, 2, AArch64::ST2i8);
4969         return;
4970       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4971                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4972         SelectStoreLane(Node, 2, AArch64::ST2i16);
4973         return;
4974       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4975                  VT == MVT::v2f32) {
4976         SelectStoreLane(Node, 2, AArch64::ST2i32);
4977         return;
4978       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4979                  VT == MVT::v1f64) {
4980         SelectStoreLane(Node, 2, AArch64::ST2i64);
4981         return;
4982       }
4983       break;
4984     }
4985     case Intrinsic::aarch64_neon_st3lane: {
4986       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4987         SelectStoreLane(Node, 3, AArch64::ST3i8);
4988         return;
4989       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4990                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4991         SelectStoreLane(Node, 3, AArch64::ST3i16);
4992         return;
4993       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4994                  VT == MVT::v2f32) {
4995         SelectStoreLane(Node, 3, AArch64::ST3i32);
4996         return;
4997       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4998                  VT == MVT::v1f64) {
4999         SelectStoreLane(Node, 3, AArch64::ST3i64);
5000         return;
5001       }
5002       break;
5003     }
5004     case Intrinsic::aarch64_neon_st4lane: {
5005       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5006         SelectStoreLane(Node, 4, AArch64::ST4i8);
5007         return;
5008       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5009                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5010         SelectStoreLane(Node, 4, AArch64::ST4i16);
5011         return;
5012       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5013                  VT == MVT::v2f32) {
5014         SelectStoreLane(Node, 4, AArch64::ST4i32);
5015         return;
5016       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5017                  VT == MVT::v1f64) {
5018         SelectStoreLane(Node, 4, AArch64::ST4i64);
5019         return;
5020       }
5021       break;
5022     }
5023     case Intrinsic::aarch64_sve_st2: {
5024       if (VT == MVT::nxv16i8) {
5025         SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
5026         return;
5027       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5028                  VT == MVT::nxv8bf16) {
5029         SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
5030         return;
5031       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5032         SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
5033         return;
5034       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5035         SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
5036         return;
5037       }
5038       break;
5039     }
5040     case Intrinsic::aarch64_sve_st3: {
5041       if (VT == MVT::nxv16i8) {
5042         SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
5043         return;
5044       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5045                  VT == MVT::nxv8bf16) {
5046         SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
5047         return;
5048       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5049         SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
5050         return;
5051       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5052         SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
5053         return;
5054       }
5055       break;
5056     }
5057     case Intrinsic::aarch64_sve_st4: {
5058       if (VT == MVT::nxv16i8) {
5059         SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
5060         return;
5061       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5062                  VT == MVT::nxv8bf16) {
5063         SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
5064         return;
5065       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5066         SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
5067         return;
5068       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5069         SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
5070         return;
5071       }
5072       break;
5073     }
5074     }
5075     break;
5076   }
5077   case AArch64ISD::LD2post: {
5078     if (VT == MVT::v8i8) {
5079       SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
5080       return;
5081     } else if (VT == MVT::v16i8) {
5082       SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
5083       return;
5084     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5085       SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
5086       return;
5087     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5088       SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
5089       return;
5090     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5091       SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
5092       return;
5093     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5094       SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
5095       return;
5096     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5097       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5098       return;
5099     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5100       SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
5101       return;
5102     }
5103     break;
5104   }
5105   case AArch64ISD::LD3post: {
5106     if (VT == MVT::v8i8) {
5107       SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
5108       return;
5109     } else if (VT == MVT::v16i8) {
5110       SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
5111       return;
5112     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5113       SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
5114       return;
5115     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5116       SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
5117       return;
5118     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5119       SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
5120       return;
5121     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5122       SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
5123       return;
5124     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5125       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5126       return;
5127     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5128       SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
5129       return;
5130     }
5131     break;
5132   }
5133   case AArch64ISD::LD4post: {
5134     if (VT == MVT::v8i8) {
5135       SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
5136       return;
5137     } else if (VT == MVT::v16i8) {
5138       SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
5139       return;
5140     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5141       SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
5142       return;
5143     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5144       SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
5145       return;
5146     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5147       SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
5148       return;
5149     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5150       SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
5151       return;
5152     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5153       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5154       return;
5155     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5156       SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
5157       return;
5158     }
5159     break;
5160   }
5161   case AArch64ISD::LD1x2post: {
5162     if (VT == MVT::v8i8) {
5163       SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
5164       return;
5165     } else if (VT == MVT::v16i8) {
5166       SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
5167       return;
5168     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5169       SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
5170       return;
5171     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5172       SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
5173       return;
5174     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5175       SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
5176       return;
5177     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5178       SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
5179       return;
5180     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5181       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5182       return;
5183     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5184       SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
5185       return;
5186     }
5187     break;
5188   }
5189   case AArch64ISD::LD1x3post: {
5190     if (VT == MVT::v8i8) {
5191       SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
5192       return;
5193     } else if (VT == MVT::v16i8) {
5194       SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
5195       return;
5196     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5197       SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
5198       return;
5199     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5200       SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
5201       return;
5202     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5203       SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
5204       return;
5205     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5206       SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
5207       return;
5208     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5209       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5210       return;
5211     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5212       SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
5213       return;
5214     }
5215     break;
5216   }
5217   case AArch64ISD::LD1x4post: {
5218     if (VT == MVT::v8i8) {
5219       SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
5220       return;
5221     } else if (VT == MVT::v16i8) {
5222       SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
5223       return;
5224     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5225       SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
5226       return;
5227     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5228       SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
5229       return;
5230     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5231       SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
5232       return;
5233     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5234       SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
5235       return;
5236     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5237       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5238       return;
5239     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5240       SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
5241       return;
5242     }
5243     break;
5244   }
5245   case AArch64ISD::LD1DUPpost: {
5246     if (VT == MVT::v8i8) {
5247       SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
5248       return;
5249     } else if (VT == MVT::v16i8) {
5250       SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
5251       return;
5252     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5253       SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
5254       return;
5255     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5256       SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
5257       return;
5258     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5259       SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
5260       return;
5261     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5262       SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
5263       return;
5264     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5265       SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
5266       return;
5267     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5268       SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
5269       return;
5270     }
5271     break;
5272   }
5273   case AArch64ISD::LD2DUPpost: {
5274     if (VT == MVT::v8i8) {
5275       SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
5276       return;
5277     } else if (VT == MVT::v16i8) {
5278       SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
5279       return;
5280     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5281       SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
5282       return;
5283     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5284       SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
5285       return;
5286     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5287       SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
5288       return;
5289     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5290       SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
5291       return;
5292     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5293       SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
5294       return;
5295     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5296       SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
5297       return;
5298     }
5299     break;
5300   }
5301   case AArch64ISD::LD3DUPpost: {
5302     if (VT == MVT::v8i8) {
5303       SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
5304       return;
5305     } else if (VT == MVT::v16i8) {
5306       SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
5307       return;
5308     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5309       SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
5310       return;
5311     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5312       SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
5313       return;
5314     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5315       SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
5316       return;
5317     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5318       SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
5319       return;
5320     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5321       SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
5322       return;
5323     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5324       SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
5325       return;
5326     }
5327     break;
5328   }
5329   case AArch64ISD::LD4DUPpost: {
5330     if (VT == MVT::v8i8) {
5331       SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
5332       return;
5333     } else if (VT == MVT::v16i8) {
5334       SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
5335       return;
5336     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5337       SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
5338       return;
5339     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
5340       SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
5341       return;
5342     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5343       SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
5344       return;
5345     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5346       SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
5347       return;
5348     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5349       SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
5350       return;
5351     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5352       SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
5353       return;
5354     }
5355     break;
5356   }
5357   case AArch64ISD::LD1LANEpost: {
5358     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5359       SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
5360       return;
5361     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5362                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5363       SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
5364       return;
5365     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5366                VT == MVT::v2f32) {
5367       SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
5368       return;
5369     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5370                VT == MVT::v1f64) {
5371       SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
5372       return;
5373     }
5374     break;
5375   }
5376   case AArch64ISD::LD2LANEpost: {
5377     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5378       SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
5379       return;
5380     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5381                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5382       SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
5383       return;
5384     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5385                VT == MVT::v2f32) {
5386       SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
5387       return;
5388     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5389                VT == MVT::v1f64) {
5390       SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
5391       return;
5392     }
5393     break;
5394   }
5395   case AArch64ISD::LD3LANEpost: {
5396     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5397       SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
5398       return;
5399     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5400                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5401       SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
5402       return;
5403     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5404                VT == MVT::v2f32) {
5405       SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
5406       return;
5407     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5408                VT == MVT::v1f64) {
5409       SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
5410       return;
5411     }
5412     break;
5413   }
5414   case AArch64ISD::LD4LANEpost: {
5415     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5416       SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
5417       return;
5418     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5419                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5420       SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
5421       return;
5422     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5423                VT == MVT::v2f32) {
5424       SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
5425       return;
5426     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5427                VT == MVT::v1f64) {
5428       SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
5429       return;
5430     }
5431     break;
5432   }
5433   case AArch64ISD::ST2post: {
5434     VT = Node->getOperand(1).getValueType();
5435     if (VT == MVT::v8i8) {
5436       SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
5437       return;
5438     } else if (VT == MVT::v16i8) {
5439       SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
5440       return;
5441     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5442       SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
5443       return;
5444     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5445       SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
5446       return;
5447     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5448       SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
5449       return;
5450     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5451       SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
5452       return;
5453     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5454       SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
5455       return;
5456     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5457       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
5458       return;
5459     }
5460     break;
5461   }
5462   case AArch64ISD::ST3post: {
5463     VT = Node->getOperand(1).getValueType();
5464     if (VT == MVT::v8i8) {
5465       SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
5466       return;
5467     } else if (VT == MVT::v16i8) {
5468       SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
5469       return;
5470     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5471       SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
5472       return;
5473     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5474       SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
5475       return;
5476     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5477       SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
5478       return;
5479     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5480       SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
5481       return;
5482     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5483       SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
5484       return;
5485     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5486       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
5487       return;
5488     }
5489     break;
5490   }
5491   case AArch64ISD::ST4post: {
5492     VT = Node->getOperand(1).getValueType();
5493     if (VT == MVT::v8i8) {
5494       SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
5495       return;
5496     } else if (VT == MVT::v16i8) {
5497       SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
5498       return;
5499     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5500       SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
5501       return;
5502     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5503       SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
5504       return;
5505     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5506       SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
5507       return;
5508     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5509       SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
5510       return;
5511     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5512       SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
5513       return;
5514     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5515       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
5516       return;
5517     }
5518     break;
5519   }
5520   case AArch64ISD::ST1x2post: {
5521     VT = Node->getOperand(1).getValueType();
5522     if (VT == MVT::v8i8) {
5523       SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
5524       return;
5525     } else if (VT == MVT::v16i8) {
5526       SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
5527       return;
5528     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5529       SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
5530       return;
5531     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5532       SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
5533       return;
5534     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5535       SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
5536       return;
5537     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5538       SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
5539       return;
5540     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5541       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
5542       return;
5543     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5544       SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
5545       return;
5546     }
5547     break;
5548   }
5549   case AArch64ISD::ST1x3post: {
5550     VT = Node->getOperand(1).getValueType();
5551     if (VT == MVT::v8i8) {
5552       SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
5553       return;
5554     } else if (VT == MVT::v16i8) {
5555       SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
5556       return;
5557     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5558       SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
5559       return;
5560     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
5561       SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
5562       return;
5563     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5564       SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
5565       return;
5566     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5567       SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
5568       return;
5569     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5570       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
5571       return;
5572     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5573       SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
5574       return;
5575     }
5576     break;
5577   }
5578   case AArch64ISD::ST1x4post: {
5579     VT = Node->getOperand(1).getValueType();
5580     if (VT == MVT::v8i8) {
5581       SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
5582       return;
5583     } else if (VT == MVT::v16i8) {
5584       SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
5585       return;
5586     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5587       SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
5588       return;
5589     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5590       SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
5591       return;
5592     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5593       SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
5594       return;
5595     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5596       SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
5597       return;
5598     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5599       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
5600       return;
5601     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5602       SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
5603       return;
5604     }
5605     break;
5606   }
5607   case AArch64ISD::ST2LANEpost: {
5608     VT = Node->getOperand(1).getValueType();
5609     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5610       SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
5611       return;
5612     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5613                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5614       SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
5615       return;
5616     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5617                VT == MVT::v2f32) {
5618       SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
5619       return;
5620     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5621                VT == MVT::v1f64) {
5622       SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
5623       return;
5624     }
5625     break;
5626   }
5627   case AArch64ISD::ST3LANEpost: {
5628     VT = Node->getOperand(1).getValueType();
5629     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5630       SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
5631       return;
5632     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5633                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5634       SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
5635       return;
5636     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5637                VT == MVT::v2f32) {
5638       SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
5639       return;
5640     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5641                VT == MVT::v1f64) {
5642       SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
5643       return;
5644     }
5645     break;
5646   }
5647   case AArch64ISD::ST4LANEpost: {
5648     VT = Node->getOperand(1).getValueType();
5649     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5650       SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
5651       return;
5652     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5653                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5654       SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
5655       return;
5656     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5657                VT == MVT::v2f32) {
5658       SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
5659       return;
5660     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5661                VT == MVT::v1f64) {
5662       SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
5663       return;
5664     }
5665     break;
5666   }
5667   case AArch64ISD::SVE_LD2_MERGE_ZERO: {
5668     if (VT == MVT::nxv16i8) {
5669       SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
5670       return;
5671     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5672                VT == MVT::nxv8bf16) {
5673       SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
5674       return;
5675     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5676       SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
5677       return;
5678     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5679       SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
5680       return;
5681     }
5682     break;
5683   }
5684   case AArch64ISD::SVE_LD3_MERGE_ZERO: {
5685     if (VT == MVT::nxv16i8) {
5686       SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
5687       return;
5688     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5689                VT == MVT::nxv8bf16) {
5690       SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
5691       return;
5692     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5693       SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
5694       return;
5695     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5696       SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
5697       return;
5698     }
5699     break;
5700   }
5701   case AArch64ISD::SVE_LD4_MERGE_ZERO: {
5702     if (VT == MVT::nxv16i8) {
5703       SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
5704       return;
5705     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5706                VT == MVT::nxv8bf16) {
5707       SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
5708       return;
5709     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5710       SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
5711       return;
5712     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5713       SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
5714       return;
5715     }
5716     break;
5717   }
5718   }
5719 
5720   // Select the default instruction
5721   SelectCode(Node);
5722 }
5723 
5724 /// createAArch64ISelDag - This pass converts a legalized DAG into a
5725 /// AArch64-specific DAG, ready for instruction scheduling.
5726 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
5727                                          CodeGenOpt::Level OptLevel) {
5728   return new AArch64DAGToDAGISel(TM, OptLevel);
5729 }
5730 
5731 /// When \p PredVT is a scalable vector predicate in the form
5732 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
5733 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
5734 /// structured vectors (NumVec >1), the output data type is
5735 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
5736 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
5737 /// EVT.
5738 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
5739                                                 unsigned NumVec) {
5740   assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
5741   if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
5742     return EVT();
5743 
5744   if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
5745       PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
5746     return EVT();
5747 
5748   ElementCount EC = PredVT.getVectorElementCount();
5749   EVT ScalarVT =
5750       EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
5751   EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
5752 
5753   return MemVT;
5754 }
5755 
5756 /// Return the EVT of the data associated to a memory operation in \p
5757 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
5758 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
5759   if (isa<MemSDNode>(Root))
5760     return cast<MemSDNode>(Root)->getMemoryVT();
5761 
5762   if (isa<MemIntrinsicSDNode>(Root))
5763     return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
5764 
5765   const unsigned Opcode = Root->getOpcode();
5766   // For custom ISD nodes, we have to look at them individually to extract the
5767   // type of the data moved to/from memory.
5768   switch (Opcode) {
5769   case AArch64ISD::LD1_MERGE_ZERO:
5770   case AArch64ISD::LD1S_MERGE_ZERO:
5771   case AArch64ISD::LDNF1_MERGE_ZERO:
5772   case AArch64ISD::LDNF1S_MERGE_ZERO:
5773     return cast<VTSDNode>(Root->getOperand(3))->getVT();
5774   case AArch64ISD::ST1_PRED:
5775     return cast<VTSDNode>(Root->getOperand(4))->getVT();
5776   case AArch64ISD::SVE_LD2_MERGE_ZERO:
5777     return getPackedVectorTypeFromPredicateType(
5778         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
5779   case AArch64ISD::SVE_LD3_MERGE_ZERO:
5780     return getPackedVectorTypeFromPredicateType(
5781         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
5782   case AArch64ISD::SVE_LD4_MERGE_ZERO:
5783     return getPackedVectorTypeFromPredicateType(
5784         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
5785   default:
5786     break;
5787   }
5788 
5789   if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
5790     return EVT();
5791 
5792   switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
5793   default:
5794     return EVT();
5795   case Intrinsic::aarch64_sme_ldr:
5796   case Intrinsic::aarch64_sme_str:
5797     return MVT::nxv16i8;
5798   case Intrinsic::aarch64_sve_prf:
5799     // We are using an SVE prefetch intrinsic. Type must be inferred from the
5800     // width of the predicate.
5801     return getPackedVectorTypeFromPredicateType(
5802         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
5803   case Intrinsic::aarch64_sve_ld2_sret:
5804     return getPackedVectorTypeFromPredicateType(
5805         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
5806   case Intrinsic::aarch64_sve_ld3_sret:
5807     return getPackedVectorTypeFromPredicateType(
5808         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
5809   case Intrinsic::aarch64_sve_ld4_sret:
5810     return getPackedVectorTypeFromPredicateType(
5811         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
5812   }
5813 }
5814 
5815 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
5816 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
5817 /// where Root is the memory access using N for its address.
5818 template <int64_t Min, int64_t Max>
5819 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
5820                                                    SDValue &Base,
5821                                                    SDValue &OffImm) {
5822   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
5823   const DataLayout &DL = CurDAG->getDataLayout();
5824   const MachineFrameInfo &MFI = MF->getFrameInfo();
5825 
5826   if (N.getOpcode() == ISD::FrameIndex) {
5827     int FI = cast<FrameIndexSDNode>(N)->getIndex();
5828     // We can only encode VL scaled offsets, so only fold in frame indexes
5829     // referencing SVE objects.
5830     if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
5831       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5832       OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5833       return true;
5834     }
5835 
5836     return false;
5837   }
5838 
5839   if (MemVT == EVT())
5840     return false;
5841 
5842   if (N.getOpcode() != ISD::ADD)
5843     return false;
5844 
5845   SDValue VScale = N.getOperand(1);
5846   if (VScale.getOpcode() != ISD::VSCALE)
5847     return false;
5848 
5849   TypeSize TS = MemVT.getSizeInBits();
5850   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
5851   int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
5852 
5853   if ((MulImm % MemWidthBytes) != 0)
5854     return false;
5855 
5856   int64_t Offset = MulImm / MemWidthBytes;
5857   if (Offset < Min || Offset > Max)
5858     return false;
5859 
5860   Base = N.getOperand(0);
5861   if (Base.getOpcode() == ISD::FrameIndex) {
5862     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
5863     // We can only encode VL scaled offsets, so only fold in frame indexes
5864     // referencing SVE objects.
5865     if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
5866       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5867   }
5868 
5869   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
5870   return true;
5871 }
5872 
5873 /// Select register plus register addressing mode for SVE, with scaled
5874 /// offset.
5875 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
5876                                                   SDValue &Base,
5877                                                   SDValue &Offset) {
5878   if (N.getOpcode() != ISD::ADD)
5879     return false;
5880 
5881   // Process an ADD node.
5882   const SDValue LHS = N.getOperand(0);
5883   const SDValue RHS = N.getOperand(1);
5884 
5885   // 8 bit data does not come with the SHL node, so it is treated
5886   // separately.
5887   if (Scale == 0) {
5888     Base = LHS;
5889     Offset = RHS;
5890     return true;
5891   }
5892 
5893   if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
5894     int64_t ImmOff = C->getSExtValue();
5895     unsigned Size = 1 << Scale;
5896 
5897     // To use the reg+reg addressing mode, the immediate must be a multiple of
5898     // the vector element's byte size.
5899     if (ImmOff % Size)
5900       return false;
5901 
5902     SDLoc DL(N);
5903     Base = LHS;
5904     Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
5905     SDValue Ops[] = {Offset};
5906     SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
5907     Offset = SDValue(MI, 0);
5908     return true;
5909   }
5910 
5911   // Check if the RHS is a shift node with a constant.
5912   if (RHS.getOpcode() != ISD::SHL)
5913     return false;
5914 
5915   const SDValue ShiftRHS = RHS.getOperand(1);
5916   if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
5917     if (C->getZExtValue() == Scale) {
5918       Base = LHS;
5919       Offset = RHS.getOperand(0);
5920       return true;
5921     }
5922 
5923   return false;
5924 }
5925 
5926 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
5927   const AArch64TargetLowering *TLI =
5928       static_cast<const AArch64TargetLowering *>(getTargetLowering());
5929 
5930   return TLI->isAllActivePredicate(*CurDAG, N);
5931 }
5932 
5933 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
5934                                              SDValue &Base, SDValue &Offset,
5935                                              unsigned Scale) {
5936   if (N.getOpcode() != ISD::ADD) {
5937     Base = N;
5938     Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5939     return true;
5940   }
5941 
5942   // Process an ADD node.
5943   const SDValue LHS = N.getOperand(0);
5944   const SDValue RHS = N.getOperand(1);
5945 
5946   if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
5947     int64_t ImmOff = C->getSExtValue();
5948 
5949     if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
5950       return false;
5951 
5952     Base = LHS;
5953     Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
5954     return true;
5955   }
5956 
5957   return false;
5958 }
5959