xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64MachineFunctionInfo.h"
14 #include "AArch64TargetMachine.h"
15 #include "MCTargetDesc/AArch64AddressingModes.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/IR/Function.h" // To access function attributes.
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsAArch64.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "aarch64-isel"
32 #define PASS_NAME "AArch64 Instruction Selection"
33 
34 //===--------------------------------------------------------------------===//
35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36 /// instructions for SelectionDAG operations.
37 ///
38 namespace {
39 
40 class AArch64DAGToDAGISel : public SelectionDAGISel {
41 
42   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43   /// make the right decision when generating code for different targets.
44   const AArch64Subtarget *Subtarget;
45 
46 public:
47   AArch64DAGToDAGISel() = delete;
48 
49   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50                                CodeGenOptLevel OptLevel)
51       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52 
53   bool runOnMachineFunction(MachineFunction &MF) override {
54     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
55     return SelectionDAGISel::runOnMachineFunction(MF);
56   }
57 
58   void Select(SDNode *Node) override;
59 
60   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61   /// inline asm expressions.
62   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
63                                     InlineAsm::ConstraintCode ConstraintID,
64                                     std::vector<SDValue> &OutOps) override;
65 
66   template <signed Low, signed High, signed Scale>
67   bool SelectRDVLImm(SDValue N, SDValue &Imm);
68 
69   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70   bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74     return SelectShiftedRegister(N, false, Reg, Shift);
75   }
76   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77     return SelectShiftedRegister(N, true, Reg, Shift);
78   }
79   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81   }
82   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84   }
85   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87   }
88   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90   }
91   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93   }
94   bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95     return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96   }
97   bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98     return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99   }
100   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101     return SelectAddrModeIndexed(N, 1, Base, OffImm);
102   }
103   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104     return SelectAddrModeIndexed(N, 2, Base, OffImm);
105   }
106   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107     return SelectAddrModeIndexed(N, 4, Base, OffImm);
108   }
109   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110     return SelectAddrModeIndexed(N, 8, Base, OffImm);
111   }
112   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113     return SelectAddrModeIndexed(N, 16, Base, OffImm);
114   }
115   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117   }
118   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120   }
121   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123   }
124   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126   }
127   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129   }
130   template <unsigned Size, unsigned Max>
131   bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132     // Test if there is an appropriate addressing mode and check if the
133     // immediate fits.
134     bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135     if (Found) {
136       if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137         int64_t C = CI->getSExtValue();
138         if (C <= Max)
139           return true;
140       }
141     }
142 
143     // Otherwise, base only, materialize address in register.
144     Base = N;
145     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146     return true;
147   }
148 
149   template<int Width>
150   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151                          SDValue &SignExtend, SDValue &DoShift) {
152     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153   }
154 
155   template<int Width>
156   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157                          SDValue &SignExtend, SDValue &DoShift) {
158     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159   }
160 
161   bool SelectExtractHigh(SDValue N, SDValue &Res) {
162     if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163       N = N->getOperand(0);
164     if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165         !isa<ConstantSDNode>(N->getOperand(1)))
166       return false;
167     EVT VT = N->getValueType(0);
168     EVT LVT = N->getOperand(0).getValueType();
169     unsigned Index = N->getConstantOperandVal(1);
170     if (!VT.is64BitVector() || !LVT.is128BitVector() ||
171         Index != VT.getVectorNumElements())
172       return false;
173     Res = N->getOperand(0);
174     return true;
175   }
176 
177   bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178     if (N.getOpcode() != AArch64ISD::VLSHR)
179       return false;
180     SDValue Op = N->getOperand(0);
181     EVT VT = Op.getValueType();
182     unsigned ShtAmt = N->getConstantOperandVal(1);
183     if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184       return false;
185 
186     APInt Imm;
187     if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188       Imm = APInt(VT.getScalarSizeInBits(),
189                   Op.getOperand(1).getConstantOperandVal(0)
190                       << Op.getOperand(1).getConstantOperandVal(1));
191     else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192              isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193       Imm = APInt(VT.getScalarSizeInBits(),
194                   Op.getOperand(1).getConstantOperandVal(0));
195     else
196       return false;
197 
198     if (Imm != 1ULL << (ShtAmt - 1))
199       return false;
200 
201     Res1 = Op.getOperand(0);
202     Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203     return true;
204   }
205 
206   bool SelectDupZeroOrUndef(SDValue N) {
207     switch(N->getOpcode()) {
208     case ISD::UNDEF:
209       return true;
210     case AArch64ISD::DUP:
211     case ISD::SPLAT_VECTOR: {
212       auto Opnd0 = N->getOperand(0);
213       if (isNullConstant(Opnd0))
214         return true;
215       if (isNullFPConstant(Opnd0))
216         return true;
217       break;
218     }
219     default:
220       break;
221     }
222 
223     return false;
224   }
225 
226   bool SelectDupZero(SDValue N) {
227     switch(N->getOpcode()) {
228     case AArch64ISD::DUP:
229     case ISD::SPLAT_VECTOR: {
230       auto Opnd0 = N->getOperand(0);
231       if (isNullConstant(Opnd0))
232         return true;
233       if (isNullFPConstant(Opnd0))
234         return true;
235       break;
236     }
237     }
238 
239     return false;
240   }
241 
242   bool SelectDupNegativeZero(SDValue N) {
243     switch(N->getOpcode()) {
244     case AArch64ISD::DUP:
245     case ISD::SPLAT_VECTOR: {
246       ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247       return Const && Const->isZero() && Const->isNegative();
248     }
249     }
250 
251     return false;
252   }
253 
254   template<MVT::SimpleValueType VT>
255   bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256     return SelectSVEAddSubImm(N, VT, Imm, Shift);
257   }
258 
259   template <MVT::SimpleValueType VT, bool Negate>
260   bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261     return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262   }
263 
264   template <MVT::SimpleValueType VT>
265   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266     return SelectSVECpyDupImm(N, VT, Imm, Shift);
267   }
268 
269   template <MVT::SimpleValueType VT, bool Invert = false>
270   bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271     return SelectSVELogicalImm(N, VT, Imm, Invert);
272   }
273 
274   template <MVT::SimpleValueType VT>
275   bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276     return SelectSVEArithImm(N, VT, Imm);
277   }
278 
279   template <unsigned Low, unsigned High, bool AllowSaturation = false>
280   bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281     return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282   }
283 
284   bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285     if (N->getOpcode() != ISD::SPLAT_VECTOR)
286       return false;
287 
288     EVT EltVT = N->getValueType(0).getVectorElementType();
289     return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290                              /* High */ EltVT.getFixedSizeInBits(),
291                              /* AllowSaturation */ true, Imm);
292   }
293 
294   // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295   template<signed Min, signed Max, signed Scale, bool Shift>
296   bool SelectCntImm(SDValue N, SDValue &Imm) {
297     if (!isa<ConstantSDNode>(N))
298       return false;
299 
300     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301     if (Shift)
302       MulImm = 1LL << MulImm;
303 
304     if ((MulImm % std::abs(Scale)) != 0)
305       return false;
306 
307     MulImm /= Scale;
308     if ((MulImm >= Min) && (MulImm <= Max)) {
309       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310       return true;
311     }
312 
313     return false;
314   }
315 
316   template <signed Max, signed Scale>
317   bool SelectEXTImm(SDValue N, SDValue &Imm) {
318     if (!isa<ConstantSDNode>(N))
319       return false;
320 
321     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322 
323     if (MulImm >= 0 && MulImm <= Max) {
324       MulImm *= Scale;
325       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326       return true;
327     }
328 
329     return false;
330   }
331 
332   template <unsigned BaseReg, unsigned Max>
333   bool ImmToReg(SDValue N, SDValue &Imm) {
334     if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335       uint64_t C = CI->getZExtValue();
336 
337       if (C > Max)
338         return false;
339 
340       Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341       return true;
342     }
343     return false;
344   }
345 
346   /// Form sequences of consecutive 64/128-bit registers for use in NEON
347   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348   /// between 1 and 4 elements. If it contains a single element that is returned
349   /// unchanged; otherwise a REG_SEQUENCE value is returned.
350   SDValue createDTuple(ArrayRef<SDValue> Vecs);
351   SDValue createQTuple(ArrayRef<SDValue> Vecs);
352   // Form a sequence of SVE registers for instructions using list of vectors,
353   // e.g. structured loads and stores (ldN, stN).
354   SDValue createZTuple(ArrayRef<SDValue> Vecs);
355 
356   // Similar to above, except the register must start at a multiple of the
357   // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358   SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359 
360   /// Generic helper for the createDTuple/createQTuple
361   /// functions. Those should almost always be called instead.
362   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363                       const unsigned SubRegs[]);
364 
365   void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366 
367   bool tryIndexedLoad(SDNode *N);
368 
369   void SelectPtrauthAuth(SDNode *N);
370   void SelectPtrauthResign(SDNode *N);
371 
372   bool trySelectStackSlotTagP(SDNode *N);
373   void SelectTagP(SDNode *N);
374 
375   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376                      unsigned SubRegIdx);
377   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378                          unsigned SubRegIdx);
379   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381   void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382                             unsigned Opc_rr, unsigned Opc_ri,
383                             bool IsIntr = false);
384   void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385                                        unsigned Scale, unsigned Opc_ri,
386                                        unsigned Opc_rr);
387   void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388                                        bool IsZmMulti, unsigned Opcode,
389                                        bool HasPred = false);
390   void SelectPExtPair(SDNode *N, unsigned Opc);
391   void SelectWhilePair(SDNode *N, unsigned Opc);
392   void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393   void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394   void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395                                  bool IsTupleInput, unsigned Opc);
396   void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397 
398   template <unsigned MaxIdx, unsigned Scale>
399   void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400                              unsigned Op);
401   void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402                               unsigned Op, unsigned MaxIdx, unsigned Scale,
403                               unsigned BaseReg = 0);
404   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405   /// SVE Reg+Imm addressing mode.
406   template <int64_t Min, int64_t Max>
407   bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408                                 SDValue &OffImm);
409   /// SVE Reg+Reg address mode.
410   template <unsigned Scale>
411   bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412     return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413   }
414 
415   void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
416                              uint32_t MaxImm);
417 
418   template <unsigned MaxIdx, unsigned Scale>
419   bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
420     return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
421   }
422 
423   void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
424   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
426   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427   void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
428                              unsigned Opc_rr, unsigned Opc_ri);
429   std::tuple<unsigned, SDValue, SDValue>
430   findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
431                            const SDValue &OldBase, const SDValue &OldOffset,
432                            unsigned Scale);
433 
434   bool tryBitfieldExtractOp(SDNode *N);
435   bool tryBitfieldExtractOpFromSExt(SDNode *N);
436   bool tryBitfieldInsertOp(SDNode *N);
437   bool tryBitfieldInsertInZeroOp(SDNode *N);
438   bool tryShiftAmountMod(SDNode *N);
439 
440   bool tryReadRegister(SDNode *N);
441   bool tryWriteRegister(SDNode *N);
442 
443   bool trySelectCastFixedLengthToScalableVector(SDNode *N);
444   bool trySelectCastScalableToFixedLengthVector(SDNode *N);
445 
446   bool trySelectXAR(SDNode *N);
447 
448 // Include the pieces autogenerated from the target description.
449 #include "AArch64GenDAGISel.inc"
450 
451 private:
452   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
453                              SDValue &Shift);
454   bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
455   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
456                                SDValue &OffImm) {
457     return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
458   }
459   bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
460                                      unsigned Size, SDValue &Base,
461                                      SDValue &OffImm);
462   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
463                              SDValue &OffImm);
464   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
465                               SDValue &OffImm);
466   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
467                          SDValue &Offset, SDValue &SignExtend,
468                          SDValue &DoShift);
469   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
470                          SDValue &Offset, SDValue &SignExtend,
471                          SDValue &DoShift);
472   bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
473   bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
474   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
475                          SDValue &Offset, SDValue &SignExtend);
476 
477   template<unsigned RegWidth>
478   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
479     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
480   }
481 
482   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483 
484   template<unsigned RegWidth>
485   bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
486     return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
487   }
488 
489   bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
490                                      unsigned Width);
491 
492   bool SelectCMP_SWAP(SDNode *N);
493 
494   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495   bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496                               bool Negate);
497   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
498   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
499 
500   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
501   bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
502                          bool AllowSaturation, SDValue &Imm);
503 
504   bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
505   bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
506                                SDValue &Offset);
507   bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
508                           SDValue &Offset, unsigned Scale = 1);
509 
510   bool SelectAllActivePredicate(SDValue N);
511   bool SelectAnyPredicate(SDValue N);
512 };
513 
514 class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
515 public:
516   static char ID;
517   explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
518                                      CodeGenOptLevel OptLevel)
519       : SelectionDAGISelLegacy(
520             ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
521 };
522 } // end anonymous namespace
523 
524 char AArch64DAGToDAGISelLegacy::ID = 0;
525 
526 INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
527 
528 /// isIntImmediate - This method tests to see if the node is a constant
529 /// operand. If so Imm will receive the 32-bit value.
530 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
531   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
532     Imm = C->getZExtValue();
533     return true;
534   }
535   return false;
536 }
537 
538 // isIntImmediate - This method tests to see if a constant operand.
539 // If so Imm will receive the value.
540 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
541   return isIntImmediate(N.getNode(), Imm);
542 }
543 
544 // isOpcWithIntImmediate - This method tests to see if the node is a specific
545 // opcode and that it has a immediate integer right operand.
546 // If so Imm will receive the 32 bit value.
547 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
548                                   uint64_t &Imm) {
549   return N->getOpcode() == Opc &&
550          isIntImmediate(N->getOperand(1).getNode(), Imm);
551 }
552 
553 // isIntImmediateEq - This method tests to see if N is a constant operand that
554 // is equivalent to 'ImmExpected'.
555 #ifndef NDEBUG
556 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
557   uint64_t Imm;
558   if (!isIntImmediate(N.getNode(), Imm))
559     return false;
560   return Imm == ImmExpected;
561 }
562 #endif
563 
564 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
565     const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
566     std::vector<SDValue> &OutOps) {
567   switch(ConstraintID) {
568   default:
569     llvm_unreachable("Unexpected asm memory constraint");
570   case InlineAsm::ConstraintCode::m:
571   case InlineAsm::ConstraintCode::o:
572   case InlineAsm::ConstraintCode::Q:
573     // We need to make sure that this one operand does not end up in XZR, thus
574     // require the address to be in a PointerRegClass register.
575     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
576     const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
577     SDLoc dl(Op);
578     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
579     SDValue NewOp =
580         SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
581                                        dl, Op.getValueType(),
582                                        Op, RC), 0);
583     OutOps.push_back(NewOp);
584     return false;
585   }
586   return true;
587 }
588 
589 /// SelectArithImmed - Select an immediate value that can be represented as
590 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
591 /// Val set to the 12-bit value and Shift set to the shifter operand.
592 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
593                                            SDValue &Shift) {
594   // This function is called from the addsub_shifted_imm ComplexPattern,
595   // which lists [imm] as the list of opcode it's interested in, however
596   // we still need to check whether the operand is actually an immediate
597   // here because the ComplexPattern opcode list is only used in
598   // root-level opcode matching.
599   if (!isa<ConstantSDNode>(N.getNode()))
600     return false;
601 
602   uint64_t Immed = N.getNode()->getAsZExtVal();
603   unsigned ShiftAmt;
604 
605   if (Immed >> 12 == 0) {
606     ShiftAmt = 0;
607   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
608     ShiftAmt = 12;
609     Immed = Immed >> 12;
610   } else
611     return false;
612 
613   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
614   SDLoc dl(N);
615   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
616   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
617   return true;
618 }
619 
620 /// SelectNegArithImmed - As above, but negates the value before trying to
621 /// select it.
622 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
623                                               SDValue &Shift) {
624   // This function is called from the addsub_shifted_imm ComplexPattern,
625   // which lists [imm] as the list of opcode it's interested in, however
626   // we still need to check whether the operand is actually an immediate
627   // here because the ComplexPattern opcode list is only used in
628   // root-level opcode matching.
629   if (!isa<ConstantSDNode>(N.getNode()))
630     return false;
631 
632   // The immediate operand must be a 24-bit zero-extended immediate.
633   uint64_t Immed = N.getNode()->getAsZExtVal();
634 
635   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
636   // have the opposite effect on the C flag, so this pattern mustn't match under
637   // those circumstances.
638   if (Immed == 0)
639     return false;
640 
641   if (N.getValueType() == MVT::i32)
642     Immed = ~((uint32_t)Immed) + 1;
643   else
644     Immed = ~Immed + 1ULL;
645   if (Immed & 0xFFFFFFFFFF000000ULL)
646     return false;
647 
648   Immed &= 0xFFFFFFULL;
649   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
650                           Shift);
651 }
652 
653 /// getShiftTypeForNode - Translate a shift node to the corresponding
654 /// ShiftType value.
655 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
656   switch (N.getOpcode()) {
657   default:
658     return AArch64_AM::InvalidShiftExtend;
659   case ISD::SHL:
660     return AArch64_AM::LSL;
661   case ISD::SRL:
662     return AArch64_AM::LSR;
663   case ISD::SRA:
664     return AArch64_AM::ASR;
665   case ISD::ROTR:
666     return AArch64_AM::ROR;
667   }
668 }
669 
670 /// Determine whether it is worth it to fold SHL into the addressing
671 /// mode.
672 static bool isWorthFoldingSHL(SDValue V) {
673   assert(V.getOpcode() == ISD::SHL && "invalid opcode");
674   // It is worth folding logical shift of up to three places.
675   auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
676   if (!CSD)
677     return false;
678   unsigned ShiftVal = CSD->getZExtValue();
679   if (ShiftVal > 3)
680     return false;
681 
682   // Check if this particular node is reused in any non-memory related
683   // operation.  If yes, do not try to fold this node into the address
684   // computation, since the computation will be kept.
685   const SDNode *Node = V.getNode();
686   for (SDNode *UI : Node->uses())
687     if (!isa<MemSDNode>(*UI))
688       for (SDNode *UII : UI->uses())
689         if (!isa<MemSDNode>(*UII))
690           return false;
691   return true;
692 }
693 
694 /// Determine whether it is worth to fold V into an extended register addressing
695 /// mode.
696 bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
697   // Trivial if we are optimizing for code size or if there is only
698   // one use of the value.
699   if (CurDAG->shouldOptForSize() || V.hasOneUse())
700     return true;
701 
702   // If a subtarget has a slow shift, folding a shift into multiple loads
703   // costs additional micro-ops.
704   if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
705     return false;
706 
707   // Check whether we're going to emit the address arithmetic anyway because
708   // it's used by a non-address operation.
709   if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
710     return true;
711   if (V.getOpcode() == ISD::ADD) {
712     const SDValue LHS = V.getOperand(0);
713     const SDValue RHS = V.getOperand(1);
714     if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
715       return true;
716     if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
717       return true;
718   }
719 
720   // It hurts otherwise, since the value will be reused.
721   return false;
722 }
723 
724 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
725 /// to select more shifted register
726 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
727                                                        SDValue &Shift) {
728   EVT VT = N.getValueType();
729   if (VT != MVT::i32 && VT != MVT::i64)
730     return false;
731 
732   if (N->getOpcode() != ISD::AND || !N->hasOneUse())
733     return false;
734   SDValue LHS = N.getOperand(0);
735   if (!LHS->hasOneUse())
736     return false;
737 
738   unsigned LHSOpcode = LHS->getOpcode();
739   if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
740     return false;
741 
742   ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
743   if (!ShiftAmtNode)
744     return false;
745 
746   uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
747   ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
748   if (!RHSC)
749     return false;
750 
751   APInt AndMask = RHSC->getAPIntValue();
752   unsigned LowZBits, MaskLen;
753   if (!AndMask.isShiftedMask(LowZBits, MaskLen))
754     return false;
755 
756   unsigned BitWidth = N.getValueSizeInBits();
757   SDLoc DL(LHS);
758   uint64_t NewShiftC;
759   unsigned NewShiftOp;
760   if (LHSOpcode == ISD::SHL) {
761     // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
762     // BitWidth != LowZBits + MaskLen doesn't match the pattern
763     if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
764       return false;
765 
766     NewShiftC = LowZBits - ShiftAmtC;
767     NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
768   } else {
769     if (LowZBits == 0)
770       return false;
771 
772     // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
773     NewShiftC = LowZBits + ShiftAmtC;
774     if (NewShiftC >= BitWidth)
775       return false;
776 
777     // SRA need all high bits
778     if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
779       return false;
780 
781     // SRL high bits can be 0 or 1
782     if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
783       return false;
784 
785     if (LHSOpcode == ISD::SRL)
786       NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
787     else
788       NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
789   }
790 
791   assert(NewShiftC < BitWidth && "Invalid shift amount");
792   SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
793   SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
794   Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
795                                        NewShiftAmt, BitWidthMinus1),
796                 0);
797   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
798   Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
799   return true;
800 }
801 
802 /// getExtendTypeForNode - Translate an extend node to the corresponding
803 /// ExtendType value.
804 static AArch64_AM::ShiftExtendType
805 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
806   if (N.getOpcode() == ISD::SIGN_EXTEND ||
807       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
808     EVT SrcVT;
809     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
810       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
811     else
812       SrcVT = N.getOperand(0).getValueType();
813 
814     if (!IsLoadStore && SrcVT == MVT::i8)
815       return AArch64_AM::SXTB;
816     else if (!IsLoadStore && SrcVT == MVT::i16)
817       return AArch64_AM::SXTH;
818     else if (SrcVT == MVT::i32)
819       return AArch64_AM::SXTW;
820     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821 
822     return AArch64_AM::InvalidShiftExtend;
823   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
824              N.getOpcode() == ISD::ANY_EXTEND) {
825     EVT SrcVT = N.getOperand(0).getValueType();
826     if (!IsLoadStore && SrcVT == MVT::i8)
827       return AArch64_AM::UXTB;
828     else if (!IsLoadStore && SrcVT == MVT::i16)
829       return AArch64_AM::UXTH;
830     else if (SrcVT == MVT::i32)
831       return AArch64_AM::UXTW;
832     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
833 
834     return AArch64_AM::InvalidShiftExtend;
835   } else if (N.getOpcode() == ISD::AND) {
836     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
837     if (!CSD)
838       return AArch64_AM::InvalidShiftExtend;
839     uint64_t AndMask = CSD->getZExtValue();
840 
841     switch (AndMask) {
842     default:
843       return AArch64_AM::InvalidShiftExtend;
844     case 0xFF:
845       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
846     case 0xFFFF:
847       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
848     case 0xFFFFFFFF:
849       return AArch64_AM::UXTW;
850     }
851   }
852 
853   return AArch64_AM::InvalidShiftExtend;
854 }
855 
856 /// Determine whether it is worth to fold V into an extended register of an
857 /// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
858 /// instruction, and the shift should be treated as worth folding even if has
859 /// multiple uses.
860 bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
861   // Trivial if we are optimizing for code size or if there is only
862   // one use of the value.
863   if (CurDAG->shouldOptForSize() || V.hasOneUse())
864     return true;
865 
866   // If a subtarget has a fastpath LSL we can fold a logical shift into
867   // the add/sub and save a cycle.
868   if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
869       V.getConstantOperandVal(1) <= 4 &&
870       getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend)
871     return true;
872 
873   // It hurts otherwise, since the value will be reused.
874   return false;
875 }
876 
877 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
878 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
879 /// instructions allow the shifted register to be rotated, but the arithmetic
880 /// instructions do not.  The AllowROR parameter specifies whether ROR is
881 /// supported.
882 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
883                                                 SDValue &Reg, SDValue &Shift) {
884   if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
885     return true;
886 
887   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
888   if (ShType == AArch64_AM::InvalidShiftExtend)
889     return false;
890   if (!AllowROR && ShType == AArch64_AM::ROR)
891     return false;
892 
893   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
894     unsigned BitSize = N.getValueSizeInBits();
895     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
896     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
897 
898     Reg = N.getOperand(0);
899     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
900     return isWorthFoldingALU(N, true);
901   }
902 
903   return false;
904 }
905 
906 /// Instructions that accept extend modifiers like UXTW expect the register
907 /// being extended to be a GPR32, but the incoming DAG might be acting on a
908 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
909 /// this is the case.
910 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
911   if (N.getValueType() == MVT::i32)
912     return N;
913 
914   SDLoc dl(N);
915   return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
916 }
917 
918 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
919 template<signed Low, signed High, signed Scale>
920 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
921   if (!isa<ConstantSDNode>(N))
922     return false;
923 
924   int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
925   if ((MulImm % std::abs(Scale)) == 0) {
926     int64_t RDVLImm = MulImm / Scale;
927     if ((RDVLImm >= Low) && (RDVLImm <= High)) {
928       Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
929       return true;
930     }
931   }
932 
933   return false;
934 }
935 
936 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
937 /// operand folds in an extend followed by an optional left shift.
938 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
939                                                       SDValue &Shift) {
940   unsigned ShiftVal = 0;
941   AArch64_AM::ShiftExtendType Ext;
942 
943   if (N.getOpcode() == ISD::SHL) {
944     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
945     if (!CSD)
946       return false;
947     ShiftVal = CSD->getZExtValue();
948     if (ShiftVal > 4)
949       return false;
950 
951     Ext = getExtendTypeForNode(N.getOperand(0));
952     if (Ext == AArch64_AM::InvalidShiftExtend)
953       return false;
954 
955     Reg = N.getOperand(0).getOperand(0);
956   } else {
957     Ext = getExtendTypeForNode(N);
958     if (Ext == AArch64_AM::InvalidShiftExtend)
959       return false;
960 
961     Reg = N.getOperand(0);
962 
963     // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
964     // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
965     auto isDef32 = [](SDValue N) {
966       unsigned Opc = N.getOpcode();
967       return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
968              Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
969              Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
970              Opc != ISD::FREEZE;
971     };
972     if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
973         isDef32(Reg))
974       return false;
975   }
976 
977   // AArch64 mandates that the RHS of the operation must use the smallest
978   // register class that could contain the size being extended from.  Thus,
979   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
980   // there might not be an actual 32-bit value in the program.  We can
981   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
982   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
983   Reg = narrowIfNeeded(CurDAG, Reg);
984   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
985                                     MVT::i32);
986   return isWorthFoldingALU(N);
987 }
988 
989 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This
990 /// operand is refered by the instructions have SP operand
991 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
992                                                   SDValue &Shift) {
993   unsigned ShiftVal = 0;
994   AArch64_AM::ShiftExtendType Ext;
995 
996   if (N.getOpcode() != ISD::SHL)
997     return false;
998 
999   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1000   if (!CSD)
1001     return false;
1002   ShiftVal = CSD->getZExtValue();
1003   if (ShiftVal > 4)
1004     return false;
1005 
1006   Ext = AArch64_AM::UXTX;
1007   Reg = N.getOperand(0);
1008   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1009                                     MVT::i32);
1010   return isWorthFoldingALU(N);
1011 }
1012 
1013 /// If there's a use of this ADDlow that's not itself a load/store then we'll
1014 /// need to create a real ADD instruction from it anyway and there's no point in
1015 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1016 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1017 /// leads to duplicated ADRP instructions.
1018 static bool isWorthFoldingADDlow(SDValue N) {
1019   for (auto *Use : N->uses()) {
1020     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1021         Use->getOpcode() != ISD::ATOMIC_LOAD &&
1022         Use->getOpcode() != ISD::ATOMIC_STORE)
1023       return false;
1024 
1025     // ldar and stlr have much more restrictive addressing modes (just a
1026     // register).
1027     if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1028       return false;
1029   }
1030 
1031   return true;
1032 }
1033 
1034 /// Check if the immediate offset is valid as a scaled immediate.
1035 static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1036                                      unsigned Size) {
1037   if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1038       Offset < (Range << Log2_32(Size)))
1039     return true;
1040   return false;
1041 }
1042 
1043 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1044 /// immediate" address.  The "Size" argument is the size in bytes of the memory
1045 /// reference, which determines the scale.
1046 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1047                                                         unsigned BW, unsigned Size,
1048                                                         SDValue &Base,
1049                                                         SDValue &OffImm) {
1050   SDLoc dl(N);
1051   const DataLayout &DL = CurDAG->getDataLayout();
1052   const TargetLowering *TLI = getTargetLowering();
1053   if (N.getOpcode() == ISD::FrameIndex) {
1054     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1055     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1056     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1057     return true;
1058   }
1059 
1060   // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1061   // selected here doesn't support labels/immediates, only base+offset.
1062   if (CurDAG->isBaseWithConstantOffset(N)) {
1063     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1064       if (IsSignedImm) {
1065         int64_t RHSC = RHS->getSExtValue();
1066         unsigned Scale = Log2_32(Size);
1067         int64_t Range = 0x1LL << (BW - 1);
1068 
1069         if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1070             RHSC < (Range << Scale)) {
1071           Base = N.getOperand(0);
1072           if (Base.getOpcode() == ISD::FrameIndex) {
1073             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1074             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1075           }
1076           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1077           return true;
1078         }
1079       } else {
1080         // unsigned Immediate
1081         uint64_t RHSC = RHS->getZExtValue();
1082         unsigned Scale = Log2_32(Size);
1083         uint64_t Range = 0x1ULL << BW;
1084 
1085         if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1086           Base = N.getOperand(0);
1087           if (Base.getOpcode() == ISD::FrameIndex) {
1088             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1089             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1090           }
1091           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1092           return true;
1093         }
1094       }
1095     }
1096   }
1097   // Base only. The address will be materialized into a register before
1098   // the memory is accessed.
1099   //    add x0, Xbase, #offset
1100   //    stp x1, x2, [x0]
1101   Base = N;
1102   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1103   return true;
1104 }
1105 
1106 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1107 /// immediate" address.  The "Size" argument is the size in bytes of the memory
1108 /// reference, which determines the scale.
1109 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1110                                               SDValue &Base, SDValue &OffImm) {
1111   SDLoc dl(N);
1112   const DataLayout &DL = CurDAG->getDataLayout();
1113   const TargetLowering *TLI = getTargetLowering();
1114   if (N.getOpcode() == ISD::FrameIndex) {
1115     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1116     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1117     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1118     return true;
1119   }
1120 
1121   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1122     GlobalAddressSDNode *GAN =
1123         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1124     Base = N.getOperand(0);
1125     OffImm = N.getOperand(1);
1126     if (!GAN)
1127       return true;
1128 
1129     if (GAN->getOffset() % Size == 0 &&
1130         GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1131       return true;
1132   }
1133 
1134   if (CurDAG->isBaseWithConstantOffset(N)) {
1135     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1136       int64_t RHSC = (int64_t)RHS->getZExtValue();
1137       unsigned Scale = Log2_32(Size);
1138       if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1139         Base = N.getOperand(0);
1140         if (Base.getOpcode() == ISD::FrameIndex) {
1141           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1142           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143         }
1144         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1145         return true;
1146       }
1147     }
1148   }
1149 
1150   // Before falling back to our general case, check if the unscaled
1151   // instructions can handle this. If so, that's preferable.
1152   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1153     return false;
1154 
1155   // Base only. The address will be materialized into a register before
1156   // the memory is accessed.
1157   //    add x0, Xbase, #offset
1158   //    ldr x0, [x0]
1159   Base = N;
1160   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1161   return true;
1162 }
1163 
1164 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1165 /// immediate" address.  This should only match when there is an offset that
1166 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
1167 /// is the size in bytes of the memory reference, which is needed here to know
1168 /// what is valid for a scaled immediate.
1169 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1170                                                  SDValue &Base,
1171                                                  SDValue &OffImm) {
1172   if (!CurDAG->isBaseWithConstantOffset(N))
1173     return false;
1174   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1175     int64_t RHSC = RHS->getSExtValue();
1176     if (RHSC >= -256 && RHSC < 256) {
1177       Base = N.getOperand(0);
1178       if (Base.getOpcode() == ISD::FrameIndex) {
1179         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1180         const TargetLowering *TLI = getTargetLowering();
1181         Base = CurDAG->getTargetFrameIndex(
1182             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1183       }
1184       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1185       return true;
1186     }
1187   }
1188   return false;
1189 }
1190 
1191 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1192   SDLoc dl(N);
1193   SDValue ImpDef = SDValue(
1194       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1195   return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1196                                        N);
1197 }
1198 
1199 /// Check if the given SHL node (\p N), can be used to form an
1200 /// extended register for an addressing mode.
1201 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1202                                             bool WantExtend, SDValue &Offset,
1203                                             SDValue &SignExtend) {
1204   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1205   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1206   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1207     return false;
1208 
1209   SDLoc dl(N);
1210   if (WantExtend) {
1211     AArch64_AM::ShiftExtendType Ext =
1212         getExtendTypeForNode(N.getOperand(0), true);
1213     if (Ext == AArch64_AM::InvalidShiftExtend)
1214       return false;
1215 
1216     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1217     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1218                                            MVT::i32);
1219   } else {
1220     Offset = N.getOperand(0);
1221     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1222   }
1223 
1224   unsigned LegalShiftVal = Log2_32(Size);
1225   unsigned ShiftVal = CSD->getZExtValue();
1226 
1227   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1228     return false;
1229 
1230   return isWorthFoldingAddr(N, Size);
1231 }
1232 
1233 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1234                                             SDValue &Base, SDValue &Offset,
1235                                             SDValue &SignExtend,
1236                                             SDValue &DoShift) {
1237   if (N.getOpcode() != ISD::ADD)
1238     return false;
1239   SDValue LHS = N.getOperand(0);
1240   SDValue RHS = N.getOperand(1);
1241   SDLoc dl(N);
1242 
1243   // We don't want to match immediate adds here, because they are better lowered
1244   // to the register-immediate addressing modes.
1245   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1246     return false;
1247 
1248   // Check if this particular node is reused in any non-memory related
1249   // operation.  If yes, do not try to fold this node into the address
1250   // computation, since the computation will be kept.
1251   const SDNode *Node = N.getNode();
1252   for (SDNode *UI : Node->uses()) {
1253     if (!isa<MemSDNode>(*UI))
1254       return false;
1255   }
1256 
1257   // Remember if it is worth folding N when it produces extended register.
1258   bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1259 
1260   // Try to match a shifted extend on the RHS.
1261   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1262       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1263     Base = LHS;
1264     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1265     return true;
1266   }
1267 
1268   // Try to match a shifted extend on the LHS.
1269   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1270       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1271     Base = RHS;
1272     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1273     return true;
1274   }
1275 
1276   // There was no shift, whatever else we find.
1277   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1278 
1279   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1280   // Try to match an unshifted extend on the LHS.
1281   if (IsExtendedRegisterWorthFolding &&
1282       (Ext = getExtendTypeForNode(LHS, true)) !=
1283           AArch64_AM::InvalidShiftExtend) {
1284     Base = RHS;
1285     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1286     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287                                            MVT::i32);
1288     if (isWorthFoldingAddr(LHS, Size))
1289       return true;
1290   }
1291 
1292   // Try to match an unshifted extend on the RHS.
1293   if (IsExtendedRegisterWorthFolding &&
1294       (Ext = getExtendTypeForNode(RHS, true)) !=
1295           AArch64_AM::InvalidShiftExtend) {
1296     Base = LHS;
1297     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1298     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1299                                            MVT::i32);
1300     if (isWorthFoldingAddr(RHS, Size))
1301       return true;
1302   }
1303 
1304   return false;
1305 }
1306 
1307 // Check if the given immediate is preferred by ADD. If an immediate can be
1308 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1309 // encoded by one MOVZ, return true.
1310 static bool isPreferredADD(int64_t ImmOff) {
1311   // Constant in [0x0, 0xfff] can be encoded in ADD.
1312   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1313     return true;
1314   // Check if it can be encoded in an "ADD LSL #12".
1315   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1316     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1317     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1318            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1319   return false;
1320 }
1321 
1322 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1323                                             SDValue &Base, SDValue &Offset,
1324                                             SDValue &SignExtend,
1325                                             SDValue &DoShift) {
1326   if (N.getOpcode() != ISD::ADD)
1327     return false;
1328   SDValue LHS = N.getOperand(0);
1329   SDValue RHS = N.getOperand(1);
1330   SDLoc DL(N);
1331 
1332   // Check if this particular node is reused in any non-memory related
1333   // operation.  If yes, do not try to fold this node into the address
1334   // computation, since the computation will be kept.
1335   const SDNode *Node = N.getNode();
1336   for (SDNode *UI : Node->uses()) {
1337     if (!isa<MemSDNode>(*UI))
1338       return false;
1339   }
1340 
1341   // Watch out if RHS is a wide immediate, it can not be selected into
1342   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1343   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1344   // instructions like:
1345   //     MOV  X0, WideImmediate
1346   //     ADD  X1, BaseReg, X0
1347   //     LDR  X2, [X1, 0]
1348   // For such situation, using [BaseReg, XReg] addressing mode can save one
1349   // ADD/SUB:
1350   //     MOV  X0, WideImmediate
1351   //     LDR  X2, [BaseReg, X0]
1352   if (isa<ConstantSDNode>(RHS)) {
1353     int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1354     // Skip the immediate can be selected by load/store addressing mode.
1355     // Also skip the immediate can be encoded by a single ADD (SUB is also
1356     // checked by using -ImmOff).
1357     if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1358         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1359       return false;
1360 
1361     SDValue Ops[] = { RHS };
1362     SDNode *MOVI =
1363         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1364     SDValue MOVIV = SDValue(MOVI, 0);
1365     // This ADD of two X register will be selected into [Reg+Reg] mode.
1366     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1367   }
1368 
1369   // Remember if it is worth folding N when it produces extended register.
1370   bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1371 
1372   // Try to match a shifted extend on the RHS.
1373   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1374       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1375     Base = LHS;
1376     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1377     return true;
1378   }
1379 
1380   // Try to match a shifted extend on the LHS.
1381   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1382       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1383     Base = RHS;
1384     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1385     return true;
1386   }
1387 
1388   // Match any non-shifted, non-extend, non-immediate add expression.
1389   Base = LHS;
1390   Offset = RHS;
1391   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1392   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1393   // Reg1 + Reg2 is free: no check needed.
1394   return true;
1395 }
1396 
1397 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1398   static const unsigned RegClassIDs[] = {
1399       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1400   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1401                                      AArch64::dsub2, AArch64::dsub3};
1402 
1403   return createTuple(Regs, RegClassIDs, SubRegs);
1404 }
1405 
1406 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1407   static const unsigned RegClassIDs[] = {
1408       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1409   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1410                                      AArch64::qsub2, AArch64::qsub3};
1411 
1412   return createTuple(Regs, RegClassIDs, SubRegs);
1413 }
1414 
1415 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1416   static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1417                                          AArch64::ZPR3RegClassID,
1418                                          AArch64::ZPR4RegClassID};
1419   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1420                                      AArch64::zsub2, AArch64::zsub3};
1421 
1422   return createTuple(Regs, RegClassIDs, SubRegs);
1423 }
1424 
1425 SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1426   assert(Regs.size() == 2 || Regs.size() == 4);
1427 
1428   // The createTuple interface requires 3 RegClassIDs for each possible
1429   // tuple type even though we only have them for ZPR2 and ZPR4.
1430   static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1431                                          AArch64::ZPR4Mul4RegClassID};
1432   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1433                                      AArch64::zsub2, AArch64::zsub3};
1434   return createTuple(Regs, RegClassIDs, SubRegs);
1435 }
1436 
1437 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1438                                          const unsigned RegClassIDs[],
1439                                          const unsigned SubRegs[]) {
1440   // There's no special register-class for a vector-list of 1 element: it's just
1441   // a vector.
1442   if (Regs.size() == 1)
1443     return Regs[0];
1444 
1445   assert(Regs.size() >= 2 && Regs.size() <= 4);
1446 
1447   SDLoc DL(Regs[0]);
1448 
1449   SmallVector<SDValue, 4> Ops;
1450 
1451   // First operand of REG_SEQUENCE is the desired RegClass.
1452   Ops.push_back(
1453       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1454 
1455   // Then we get pairs of source & subregister-position for the components.
1456   for (unsigned i = 0; i < Regs.size(); ++i) {
1457     Ops.push_back(Regs[i]);
1458     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1459   }
1460 
1461   SDNode *N =
1462       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1463   return SDValue(N, 0);
1464 }
1465 
1466 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1467                                       bool isExt) {
1468   SDLoc dl(N);
1469   EVT VT = N->getValueType(0);
1470 
1471   unsigned ExtOff = isExt;
1472 
1473   // Form a REG_SEQUENCE to force register allocation.
1474   unsigned Vec0Off = ExtOff + 1;
1475   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1476                                N->op_begin() + Vec0Off + NumVecs);
1477   SDValue RegSeq = createQTuple(Regs);
1478 
1479   SmallVector<SDValue, 6> Ops;
1480   if (isExt)
1481     Ops.push_back(N->getOperand(1));
1482   Ops.push_back(RegSeq);
1483   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1484   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1485 }
1486 
1487 static std::tuple<SDValue, SDValue>
1488 extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1489   SDLoc DL(Disc);
1490   SDValue AddrDisc;
1491   SDValue ConstDisc;
1492 
1493   // If this is a blend, remember the constant and address discriminators.
1494   // Otherwise, it's either a constant discriminator, or a non-blended
1495   // address discriminator.
1496   if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1497       Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1498     AddrDisc = Disc->getOperand(1);
1499     ConstDisc = Disc->getOperand(2);
1500   } else {
1501     ConstDisc = Disc;
1502   }
1503 
1504   // If the constant discriminator (either the blend RHS, or the entire
1505   // discriminator value) isn't a 16-bit constant, bail out, and let the
1506   // discriminator be computed separately.
1507   auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1508   if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1509     return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1510 
1511   // If there's no address discriminator, use XZR directly.
1512   if (!AddrDisc)
1513     AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1514 
1515   return std::make_tuple(
1516       DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1517       AddrDisc);
1518 }
1519 
1520 void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1521   SDLoc DL(N);
1522   // IntrinsicID is operand #0
1523   SDValue Val = N->getOperand(1);
1524   SDValue AUTKey = N->getOperand(2);
1525   SDValue AUTDisc = N->getOperand(3);
1526 
1527   unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1528   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1529 
1530   SDValue AUTAddrDisc, AUTConstDisc;
1531   std::tie(AUTConstDisc, AUTAddrDisc) =
1532       extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1533 
1534   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1535                                          AArch64::X16, Val, SDValue());
1536   SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1537 
1538   SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1539   ReplaceNode(N, AUT);
1540   return;
1541 }
1542 
1543 void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1544   SDLoc DL(N);
1545   // IntrinsicID is operand #0
1546   SDValue Val = N->getOperand(1);
1547   SDValue AUTKey = N->getOperand(2);
1548   SDValue AUTDisc = N->getOperand(3);
1549   SDValue PACKey = N->getOperand(4);
1550   SDValue PACDisc = N->getOperand(5);
1551 
1552   unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553   unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1554 
1555   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1556   PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1557 
1558   SDValue AUTAddrDisc, AUTConstDisc;
1559   std::tie(AUTConstDisc, AUTAddrDisc) =
1560       extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1561 
1562   SDValue PACAddrDisc, PACConstDisc;
1563   std::tie(PACConstDisc, PACAddrDisc) =
1564       extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1565 
1566   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1567                                          AArch64::X16, Val, SDValue());
1568 
1569   SDValue Ops[] = {AUTKey,       AUTConstDisc, AUTAddrDisc,        PACKey,
1570                    PACConstDisc, PACAddrDisc,  X16Copy.getValue(1)};
1571 
1572   SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1573   ReplaceNode(N, AUTPAC);
1574   return;
1575 }
1576 
1577 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1578   LoadSDNode *LD = cast<LoadSDNode>(N);
1579   if (LD->isUnindexed())
1580     return false;
1581   EVT VT = LD->getMemoryVT();
1582   EVT DstVT = N->getValueType(0);
1583   ISD::MemIndexedMode AM = LD->getAddressingMode();
1584   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1585 
1586   // We're not doing validity checking here. That was done when checking
1587   // if we should mark the load as indexed or not. We're just selecting
1588   // the right instruction.
1589   unsigned Opcode = 0;
1590 
1591   ISD::LoadExtType ExtType = LD->getExtensionType();
1592   bool InsertTo64 = false;
1593   if (VT == MVT::i64)
1594     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1595   else if (VT == MVT::i32) {
1596     if (ExtType == ISD::NON_EXTLOAD)
1597       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598     else if (ExtType == ISD::SEXTLOAD)
1599       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1600     else {
1601       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1602       InsertTo64 = true;
1603       // The result of the load is only i32. It's the subreg_to_reg that makes
1604       // it into an i64.
1605       DstVT = MVT::i32;
1606     }
1607   } else if (VT == MVT::i16) {
1608     if (ExtType == ISD::SEXTLOAD) {
1609       if (DstVT == MVT::i64)
1610         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1611       else
1612         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1613     } else {
1614       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1615       InsertTo64 = DstVT == MVT::i64;
1616       // The result of the load is only i32. It's the subreg_to_reg that makes
1617       // it into an i64.
1618       DstVT = MVT::i32;
1619     }
1620   } else if (VT == MVT::i8) {
1621     if (ExtType == ISD::SEXTLOAD) {
1622       if (DstVT == MVT::i64)
1623         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1624       else
1625         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1626     } else {
1627       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1628       InsertTo64 = DstVT == MVT::i64;
1629       // The result of the load is only i32. It's the subreg_to_reg that makes
1630       // it into an i64.
1631       DstVT = MVT::i32;
1632     }
1633   } else if (VT == MVT::f16) {
1634     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1635   } else if (VT == MVT::bf16) {
1636     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1637   } else if (VT == MVT::f32) {
1638     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1639   } else if (VT == MVT::f64 || VT.is64BitVector()) {
1640     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1641   } else if (VT.is128BitVector()) {
1642     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1643   } else
1644     return false;
1645   SDValue Chain = LD->getChain();
1646   SDValue Base = LD->getBasePtr();
1647   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1648   int OffsetVal = (int)OffsetOp->getZExtValue();
1649   SDLoc dl(N);
1650   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1651   SDValue Ops[] = { Base, Offset, Chain };
1652   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1653                                        MVT::Other, Ops);
1654 
1655   // Transfer memoperands.
1656   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1657   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1658 
1659   // Either way, we're replacing the node, so tell the caller that.
1660   SDValue LoadedVal = SDValue(Res, 1);
1661   if (InsertTo64) {
1662     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1663     LoadedVal =
1664         SDValue(CurDAG->getMachineNode(
1665                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
1666                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1667                     SubReg),
1668                 0);
1669   }
1670 
1671   ReplaceUses(SDValue(N, 0), LoadedVal);
1672   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1673   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1674   CurDAG->RemoveDeadNode(N);
1675   return true;
1676 }
1677 
1678 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1679                                      unsigned SubRegIdx) {
1680   SDLoc dl(N);
1681   EVT VT = N->getValueType(0);
1682   SDValue Chain = N->getOperand(0);
1683 
1684   SDValue Ops[] = {N->getOperand(2), // Mem operand;
1685                    Chain};
1686 
1687   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1688 
1689   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1690   SDValue SuperReg = SDValue(Ld, 0);
1691   for (unsigned i = 0; i < NumVecs; ++i)
1692     ReplaceUses(SDValue(N, i),
1693         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1694 
1695   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1696 
1697   // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1698   // because it's too simple to have needed special treatment during lowering.
1699   if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1700     MachineMemOperand *MemOp = MemIntr->getMemOperand();
1701     CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1702   }
1703 
1704   CurDAG->RemoveDeadNode(N);
1705 }
1706 
1707 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1708                                          unsigned Opc, unsigned SubRegIdx) {
1709   SDLoc dl(N);
1710   EVT VT = N->getValueType(0);
1711   SDValue Chain = N->getOperand(0);
1712 
1713   SDValue Ops[] = {N->getOperand(1), // Mem operand
1714                    N->getOperand(2), // Incremental
1715                    Chain};
1716 
1717   const EVT ResTys[] = {MVT::i64, // Type of the write back register
1718                         MVT::Untyped, MVT::Other};
1719 
1720   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1721 
1722   // Update uses of write back register
1723   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1724 
1725   // Update uses of vector list
1726   SDValue SuperReg = SDValue(Ld, 1);
1727   if (NumVecs == 1)
1728     ReplaceUses(SDValue(N, 0), SuperReg);
1729   else
1730     for (unsigned i = 0; i < NumVecs; ++i)
1731       ReplaceUses(SDValue(N, i),
1732           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1733 
1734   // Update the chain
1735   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1736   CurDAG->RemoveDeadNode(N);
1737 }
1738 
1739 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1740 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1741 /// new Base and an SDValue representing the new offset.
1742 std::tuple<unsigned, SDValue, SDValue>
1743 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1744                                               unsigned Opc_ri,
1745                                               const SDValue &OldBase,
1746                                               const SDValue &OldOffset,
1747                                               unsigned Scale) {
1748   SDValue NewBase = OldBase;
1749   SDValue NewOffset = OldOffset;
1750   // Detect a possible Reg+Imm addressing mode.
1751   const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1752       N, OldBase, NewBase, NewOffset);
1753 
1754   // Detect a possible reg+reg addressing mode, but only if we haven't already
1755   // detected a Reg+Imm one.
1756   const bool IsRegReg =
1757       !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1758 
1759   // Select the instruction.
1760   return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1761 }
1762 
1763 enum class SelectTypeKind {
1764   Int1 = 0,
1765   Int = 1,
1766   FP = 2,
1767   AnyType = 3,
1768 };
1769 
1770 /// This function selects an opcode from a list of opcodes, which is
1771 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1772 /// element types, in this order.
1773 template <SelectTypeKind Kind>
1774 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1775   // Only match scalable vector VTs
1776   if (!VT.isScalableVector())
1777     return 0;
1778 
1779   EVT EltVT = VT.getVectorElementType();
1780   unsigned Key = VT.getVectorMinNumElements();
1781   switch (Kind) {
1782   case SelectTypeKind::AnyType:
1783     break;
1784   case SelectTypeKind::Int:
1785     if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1786         EltVT != MVT::i64)
1787       return 0;
1788     break;
1789   case SelectTypeKind::Int1:
1790     if (EltVT != MVT::i1)
1791       return 0;
1792     break;
1793   case SelectTypeKind::FP:
1794     if (EltVT == MVT::bf16)
1795       Key = 16;
1796     else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1797              EltVT != MVT::f64)
1798       return 0;
1799     break;
1800   }
1801 
1802   unsigned Offset;
1803   switch (Key) {
1804   case 16: // 8-bit or bf16
1805     Offset = 0;
1806     break;
1807   case 8: // 16-bit
1808     Offset = 1;
1809     break;
1810   case 4: // 32-bit
1811     Offset = 2;
1812     break;
1813   case 2: // 64-bit
1814     Offset = 3;
1815     break;
1816   default:
1817     return 0;
1818   }
1819 
1820   return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1821 }
1822 
1823 // This function is almost identical to SelectWhilePair, but has an
1824 // extra check on the range of the immediate operand.
1825 // TODO: Merge these two functions together at some point?
1826 void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1827   // Immediate can be either 0 or 1.
1828   if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1829     if (Imm->getZExtValue() > 1)
1830       return;
1831 
1832   SDLoc DL(N);
1833   EVT VT = N->getValueType(0);
1834   SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1835   SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1836   SDValue SuperReg = SDValue(WhilePair, 0);
1837 
1838   for (unsigned I = 0; I < 2; ++I)
1839     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1840                                    AArch64::psub0 + I, DL, VT, SuperReg));
1841 
1842   CurDAG->RemoveDeadNode(N);
1843 }
1844 
1845 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1846   SDLoc DL(N);
1847   EVT VT = N->getValueType(0);
1848 
1849   SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1850 
1851   SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1852   SDValue SuperReg = SDValue(WhilePair, 0);
1853 
1854   for (unsigned I = 0; I < 2; ++I)
1855     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1856                                    AArch64::psub0 + I, DL, VT, SuperReg));
1857 
1858   CurDAG->RemoveDeadNode(N);
1859 }
1860 
1861 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1862                                              unsigned Opcode) {
1863   EVT VT = N->getValueType(0);
1864   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1865   SDValue Ops = createZTuple(Regs);
1866   SDLoc DL(N);
1867   SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1868   SDValue SuperReg = SDValue(Intrinsic, 0);
1869   for (unsigned i = 0; i < NumVecs; ++i)
1870     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1871                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1872 
1873   CurDAG->RemoveDeadNode(N);
1874 }
1875 
1876 void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1877                                                           unsigned NumVecs,
1878                                                           bool IsZmMulti,
1879                                                           unsigned Opcode,
1880                                                           bool HasPred) {
1881   assert(Opcode != 0 && "Unexpected opcode");
1882 
1883   SDLoc DL(N);
1884   EVT VT = N->getValueType(0);
1885   unsigned FirstVecIdx = HasPred ? 2 : 1;
1886 
1887   auto GetMultiVecOperand = [=](unsigned StartIdx) {
1888     SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1889                                  N->op_begin() + StartIdx + NumVecs);
1890     return createZMulTuple(Regs);
1891   };
1892 
1893   SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1894 
1895   SDValue Zm;
1896   if (IsZmMulti)
1897     Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1898   else
1899     Zm = N->getOperand(NumVecs + FirstVecIdx);
1900 
1901   SDNode *Intrinsic;
1902   if (HasPred)
1903     Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1904                                        N->getOperand(1), Zdn, Zm);
1905   else
1906     Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1907   SDValue SuperReg = SDValue(Intrinsic, 0);
1908   for (unsigned i = 0; i < NumVecs; ++i)
1909     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1910                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1911 
1912   CurDAG->RemoveDeadNode(N);
1913 }
1914 
1915 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1916                                                unsigned Scale, unsigned Opc_ri,
1917                                                unsigned Opc_rr, bool IsIntr) {
1918   assert(Scale < 5 && "Invalid scaling value.");
1919   SDLoc DL(N);
1920   EVT VT = N->getValueType(0);
1921   SDValue Chain = N->getOperand(0);
1922 
1923   // Optimize addressing mode.
1924   SDValue Base, Offset;
1925   unsigned Opc;
1926   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1927       N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1928       CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1929 
1930   SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1931                    Base,                          // Memory operand
1932                    Offset, Chain};
1933 
1934   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1935 
1936   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1937   SDValue SuperReg = SDValue(Load, 0);
1938   for (unsigned i = 0; i < NumVecs; ++i)
1939     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1940                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1941 
1942   // Copy chain
1943   unsigned ChainIdx = NumVecs;
1944   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1945   CurDAG->RemoveDeadNode(N);
1946 }
1947 
1948 void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1949                                                           unsigned NumVecs,
1950                                                           unsigned Scale,
1951                                                           unsigned Opc_ri,
1952                                                           unsigned Opc_rr) {
1953   assert(Scale < 4 && "Invalid scaling value.");
1954   SDLoc DL(N);
1955   EVT VT = N->getValueType(0);
1956   SDValue Chain = N->getOperand(0);
1957 
1958   SDValue PNg = N->getOperand(2);
1959   SDValue Base = N->getOperand(3);
1960   SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1961   unsigned Opc;
1962   std::tie(Opc, Base, Offset) =
1963       findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1964 
1965   SDValue Ops[] = {PNg,            // Predicate-as-counter
1966                    Base,           // Memory operand
1967                    Offset, Chain};
1968 
1969   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1970 
1971   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1972   SDValue SuperReg = SDValue(Load, 0);
1973   for (unsigned i = 0; i < NumVecs; ++i)
1974     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1975                                    AArch64::zsub0 + i, DL, VT, SuperReg));
1976 
1977   // Copy chain
1978   unsigned ChainIdx = NumVecs;
1979   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1980   CurDAG->RemoveDeadNode(N);
1981 }
1982 
1983 void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1984                                             unsigned Opcode) {
1985   if (N->getValueType(0) != MVT::nxv4f32)
1986     return;
1987   SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1988 }
1989 
1990 void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1991                                                 unsigned NumOutVecs,
1992                                                 unsigned Opc, uint32_t MaxImm) {
1993   if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1994     if (Imm->getZExtValue() > MaxImm)
1995       return;
1996 
1997   SDValue ZtValue;
1998   if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1999     return;
2000   SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2001   SDLoc DL(Node);
2002   EVT VT = Node->getValueType(0);
2003 
2004   SDNode *Instruction =
2005       CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2006   SDValue SuperReg = SDValue(Instruction, 0);
2007 
2008   for (unsigned I = 0; I < NumOutVecs; ++I)
2009     ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2010                                       AArch64::zsub0 + I, DL, VT, SuperReg));
2011 
2012   // Copy chain
2013   unsigned ChainIdx = NumOutVecs;
2014   ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2015   CurDAG->RemoveDeadNode(Node);
2016 }
2017 
2018 void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2019                                       unsigned Op) {
2020   SDLoc DL(N);
2021   EVT VT = N->getValueType(0);
2022 
2023   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2024   SDValue Zd = createZMulTuple(Regs);
2025   SDValue Zn = N->getOperand(1 + NumVecs);
2026   SDValue Zm = N->getOperand(2 + NumVecs);
2027 
2028   SDValue Ops[] = {Zd, Zn, Zm};
2029 
2030   SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2031   SDValue SuperReg = SDValue(Intrinsic, 0);
2032   for (unsigned i = 0; i < NumVecs; ++i)
2033     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2034                                    AArch64::zsub0 + i, DL, VT, SuperReg));
2035 
2036   CurDAG->RemoveDeadNode(N);
2037 }
2038 
2039 bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2040   switch (BaseReg) {
2041   default:
2042     return false;
2043   case AArch64::ZA:
2044   case AArch64::ZAB0:
2045     if (TileNum == 0)
2046       break;
2047     return false;
2048   case AArch64::ZAH0:
2049     if (TileNum <= 1)
2050       break;
2051     return false;
2052   case AArch64::ZAS0:
2053     if (TileNum <= 3)
2054       break;
2055     return false;
2056   case AArch64::ZAD0:
2057     if (TileNum <= 7)
2058       break;
2059     return false;
2060   }
2061 
2062   BaseReg += TileNum;
2063   return true;
2064 }
2065 
2066 template <unsigned MaxIdx, unsigned Scale>
2067 void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2068                                                 unsigned BaseReg, unsigned Op) {
2069   unsigned TileNum = 0;
2070   if (BaseReg != AArch64::ZA)
2071     TileNum = N->getConstantOperandVal(2);
2072 
2073   if (!SelectSMETile(BaseReg, TileNum))
2074     return;
2075 
2076   SDValue SliceBase, Base, Offset;
2077   if (BaseReg == AArch64::ZA)
2078     SliceBase = N->getOperand(2);
2079   else
2080     SliceBase = N->getOperand(3);
2081 
2082   if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2083     return;
2084 
2085   SDLoc DL(N);
2086   SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2087   SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2088   SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2089 
2090   EVT VT = N->getValueType(0);
2091   for (unsigned I = 0; I < NumVecs; ++I)
2092     ReplaceUses(SDValue(N, I),
2093                 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2094                                                SDValue(Mov, 0)));
2095   // Copy chain
2096   unsigned ChainIdx = NumVecs;
2097   ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2098   CurDAG->RemoveDeadNode(N);
2099 }
2100 
2101 void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2102                                                  unsigned Op, unsigned MaxIdx,
2103                                                  unsigned Scale, unsigned BaseReg) {
2104   // Slice can be in different positions
2105   // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2106   // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2107   SDValue SliceBase = N->getOperand(2);
2108   if (BaseReg != AArch64::ZA)
2109     SliceBase = N->getOperand(3);
2110 
2111   SDValue Base, Offset;
2112   if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2113     return;
2114   // The correct Za tile number is computed in Machine Instruction
2115   // See EmitZAInstr
2116   // DAG cannot select Za tile as an output register with ZReg
2117   SDLoc DL(N);
2118   SmallVector<SDValue, 6> Ops;
2119   if (BaseReg != AArch64::ZA )
2120     Ops.push_back(N->getOperand(2));
2121   Ops.push_back(Base);
2122   Ops.push_back(Offset);
2123   Ops.push_back(N->getOperand(0)); //Chain
2124   SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2125 
2126   EVT VT = N->getValueType(0);
2127   for (unsigned I = 0; I < NumVecs; ++I)
2128     ReplaceUses(SDValue(N, I),
2129                 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2130                                                SDValue(Mov, 0)));
2131 
2132   // Copy chain
2133   unsigned ChainIdx = NumVecs;
2134   ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2135   CurDAG->RemoveDeadNode(N);
2136 }
2137 
2138 void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2139                                                     unsigned NumOutVecs,
2140                                                     bool IsTupleInput,
2141                                                     unsigned Opc) {
2142   SDLoc DL(N);
2143   EVT VT = N->getValueType(0);
2144   unsigned NumInVecs = N->getNumOperands() - 1;
2145 
2146   SmallVector<SDValue, 6> Ops;
2147   if (IsTupleInput) {
2148     assert((NumInVecs == 2 || NumInVecs == 4) &&
2149            "Don't know how to handle multi-register input!");
2150     SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2151                                  N->op_begin() + 1 + NumInVecs);
2152     Ops.push_back(createZMulTuple(Regs));
2153   } else {
2154     // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2155     for (unsigned I = 0; I < NumInVecs; I++)
2156       Ops.push_back(N->getOperand(1 + I));
2157   }
2158 
2159   SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2160   SDValue SuperReg = SDValue(Res, 0);
2161 
2162   for (unsigned I = 0; I < NumOutVecs; I++)
2163     ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2164                                    AArch64::zsub0 + I, DL, VT, SuperReg));
2165   CurDAG->RemoveDeadNode(N);
2166 }
2167 
2168 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2169                                       unsigned Opc) {
2170   SDLoc dl(N);
2171   EVT VT = N->getOperand(2)->getValueType(0);
2172 
2173   // Form a REG_SEQUENCE to force register allocation.
2174   bool Is128Bit = VT.getSizeInBits() == 128;
2175   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2176   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2177 
2178   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2179   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2180 
2181   // Transfer memoperands.
2182   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2183   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2184 
2185   ReplaceNode(N, St);
2186 }
2187 
2188 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2189                                                 unsigned Scale, unsigned Opc_rr,
2190                                                 unsigned Opc_ri) {
2191   SDLoc dl(N);
2192 
2193   // Form a REG_SEQUENCE to force register allocation.
2194   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2195   SDValue RegSeq = createZTuple(Regs);
2196 
2197   // Optimize addressing mode.
2198   unsigned Opc;
2199   SDValue Offset, Base;
2200   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2201       N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2202       CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2203 
2204   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2205                    Base,                               // address
2206                    Offset,                             // offset
2207                    N->getOperand(0)};                  // chain
2208   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2209 
2210   ReplaceNode(N, St);
2211 }
2212 
2213 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2214                                                       SDValue &OffImm) {
2215   SDLoc dl(N);
2216   const DataLayout &DL = CurDAG->getDataLayout();
2217   const TargetLowering *TLI = getTargetLowering();
2218 
2219   // Try to match it for the frame address
2220   if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2221     int FI = FINode->getIndex();
2222     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2223     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2224     return true;
2225   }
2226 
2227   return false;
2228 }
2229 
2230 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2231                                           unsigned Opc) {
2232   SDLoc dl(N);
2233   EVT VT = N->getOperand(2)->getValueType(0);
2234   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
2235                         MVT::Other}; // Type for the Chain
2236 
2237   // Form a REG_SEQUENCE to force register allocation.
2238   bool Is128Bit = VT.getSizeInBits() == 128;
2239   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2240   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2241 
2242   SDValue Ops[] = {RegSeq,
2243                    N->getOperand(NumVecs + 1), // base register
2244                    N->getOperand(NumVecs + 2), // Incremental
2245                    N->getOperand(0)};          // Chain
2246   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2247 
2248   ReplaceNode(N, St);
2249 }
2250 
2251 namespace {
2252 /// WidenVector - Given a value in the V64 register class, produce the
2253 /// equivalent value in the V128 register class.
2254 class WidenVector {
2255   SelectionDAG &DAG;
2256 
2257 public:
2258   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2259 
2260   SDValue operator()(SDValue V64Reg) {
2261     EVT VT = V64Reg.getValueType();
2262     unsigned NarrowSize = VT.getVectorNumElements();
2263     MVT EltTy = VT.getVectorElementType().getSimpleVT();
2264     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2265     SDLoc DL(V64Reg);
2266 
2267     SDValue Undef =
2268         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2269     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2270   }
2271 };
2272 } // namespace
2273 
2274 /// NarrowVector - Given a value in the V128 register class, produce the
2275 /// equivalent value in the V64 register class.
2276 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2277   EVT VT = V128Reg.getValueType();
2278   unsigned WideSize = VT.getVectorNumElements();
2279   MVT EltTy = VT.getVectorElementType().getSimpleVT();
2280   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2281 
2282   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2283                                     V128Reg);
2284 }
2285 
2286 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2287                                          unsigned Opc) {
2288   SDLoc dl(N);
2289   EVT VT = N->getValueType(0);
2290   bool Narrow = VT.getSizeInBits() == 64;
2291 
2292   // Form a REG_SEQUENCE to force register allocation.
2293   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2294 
2295   if (Narrow)
2296     transform(Regs, Regs.begin(),
2297                    WidenVector(*CurDAG));
2298 
2299   SDValue RegSeq = createQTuple(Regs);
2300 
2301   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2302 
2303   unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2304 
2305   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2306                    N->getOperand(NumVecs + 3), N->getOperand(0)};
2307   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2308   SDValue SuperReg = SDValue(Ld, 0);
2309 
2310   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2311   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2312                                     AArch64::qsub2, AArch64::qsub3 };
2313   for (unsigned i = 0; i < NumVecs; ++i) {
2314     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2315     if (Narrow)
2316       NV = NarrowVector(NV, *CurDAG);
2317     ReplaceUses(SDValue(N, i), NV);
2318   }
2319 
2320   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2321   CurDAG->RemoveDeadNode(N);
2322 }
2323 
2324 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2325                                              unsigned Opc) {
2326   SDLoc dl(N);
2327   EVT VT = N->getValueType(0);
2328   bool Narrow = VT.getSizeInBits() == 64;
2329 
2330   // Form a REG_SEQUENCE to force register allocation.
2331   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2332 
2333   if (Narrow)
2334     transform(Regs, Regs.begin(),
2335                    WidenVector(*CurDAG));
2336 
2337   SDValue RegSeq = createQTuple(Regs);
2338 
2339   const EVT ResTys[] = {MVT::i64, // Type of the write back register
2340                         RegSeq->getValueType(0), MVT::Other};
2341 
2342   unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2343 
2344   SDValue Ops[] = {RegSeq,
2345                    CurDAG->getTargetConstant(LaneNo, dl,
2346                                              MVT::i64),         // Lane Number
2347                    N->getOperand(NumVecs + 2),                  // Base register
2348                    N->getOperand(NumVecs + 3),                  // Incremental
2349                    N->getOperand(0)};
2350   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2351 
2352   // Update uses of the write back register
2353   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2354 
2355   // Update uses of the vector list
2356   SDValue SuperReg = SDValue(Ld, 1);
2357   if (NumVecs == 1) {
2358     ReplaceUses(SDValue(N, 0),
2359                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2360   } else {
2361     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2362     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2363                                       AArch64::qsub2, AArch64::qsub3 };
2364     for (unsigned i = 0; i < NumVecs; ++i) {
2365       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2366                                                   SuperReg);
2367       if (Narrow)
2368         NV = NarrowVector(NV, *CurDAG);
2369       ReplaceUses(SDValue(N, i), NV);
2370     }
2371   }
2372 
2373   // Update the Chain
2374   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2375   CurDAG->RemoveDeadNode(N);
2376 }
2377 
2378 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2379                                           unsigned Opc) {
2380   SDLoc dl(N);
2381   EVT VT = N->getOperand(2)->getValueType(0);
2382   bool Narrow = VT.getSizeInBits() == 64;
2383 
2384   // Form a REG_SEQUENCE to force register allocation.
2385   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2386 
2387   if (Narrow)
2388     transform(Regs, Regs.begin(),
2389                    WidenVector(*CurDAG));
2390 
2391   SDValue RegSeq = createQTuple(Regs);
2392 
2393   unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2394 
2395   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2396                    N->getOperand(NumVecs + 3), N->getOperand(0)};
2397   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2398 
2399   // Transfer memoperands.
2400   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2401   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2402 
2403   ReplaceNode(N, St);
2404 }
2405 
2406 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2407                                               unsigned Opc) {
2408   SDLoc dl(N);
2409   EVT VT = N->getOperand(2)->getValueType(0);
2410   bool Narrow = VT.getSizeInBits() == 64;
2411 
2412   // Form a REG_SEQUENCE to force register allocation.
2413   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2414 
2415   if (Narrow)
2416     transform(Regs, Regs.begin(),
2417                    WidenVector(*CurDAG));
2418 
2419   SDValue RegSeq = createQTuple(Regs);
2420 
2421   const EVT ResTys[] = {MVT::i64, // Type of the write back register
2422                         MVT::Other};
2423 
2424   unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2425 
2426   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2427                    N->getOperand(NumVecs + 2), // Base Register
2428                    N->getOperand(NumVecs + 3), // Incremental
2429                    N->getOperand(0)};
2430   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2431 
2432   // Transfer memoperands.
2433   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2434   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2435 
2436   ReplaceNode(N, St);
2437 }
2438 
2439 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2440                                        unsigned &Opc, SDValue &Opd0,
2441                                        unsigned &LSB, unsigned &MSB,
2442                                        unsigned NumberOfIgnoredLowBits,
2443                                        bool BiggerPattern) {
2444   assert(N->getOpcode() == ISD::AND &&
2445          "N must be a AND operation to call this function");
2446 
2447   EVT VT = N->getValueType(0);
2448 
2449   // Here we can test the type of VT and return false when the type does not
2450   // match, but since it is done prior to that call in the current context
2451   // we turned that into an assert to avoid redundant code.
2452   assert((VT == MVT::i32 || VT == MVT::i64) &&
2453          "Type checking must have been done before calling this function");
2454 
2455   // FIXME: simplify-demanded-bits in DAGCombine will probably have
2456   // changed the AND node to a 32-bit mask operation. We'll have to
2457   // undo that as part of the transform here if we want to catch all
2458   // the opportunities.
2459   // Currently the NumberOfIgnoredLowBits argument helps to recover
2460   // from these situations when matching bigger pattern (bitfield insert).
2461 
2462   // For unsigned extracts, check for a shift right and mask
2463   uint64_t AndImm = 0;
2464   if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2465     return false;
2466 
2467   const SDNode *Op0 = N->getOperand(0).getNode();
2468 
2469   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2470   // simplified. Try to undo that
2471   AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2472 
2473   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2474   if (AndImm & (AndImm + 1))
2475     return false;
2476 
2477   bool ClampMSB = false;
2478   uint64_t SrlImm = 0;
2479   // Handle the SRL + ANY_EXTEND case.
2480   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2481       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2482     // Extend the incoming operand of the SRL to 64-bit.
2483     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2484     // Make sure to clamp the MSB so that we preserve the semantics of the
2485     // original operations.
2486     ClampMSB = true;
2487   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2488              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2489                                    SrlImm)) {
2490     // If the shift result was truncated, we can still combine them.
2491     Opd0 = Op0->getOperand(0).getOperand(0);
2492 
2493     // Use the type of SRL node.
2494     VT = Opd0->getValueType(0);
2495   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2496     Opd0 = Op0->getOperand(0);
2497     ClampMSB = (VT == MVT::i32);
2498   } else if (BiggerPattern) {
2499     // Let's pretend a 0 shift right has been performed.
2500     // The resulting code will be at least as good as the original one
2501     // plus it may expose more opportunities for bitfield insert pattern.
2502     // FIXME: Currently we limit this to the bigger pattern, because
2503     // some optimizations expect AND and not UBFM.
2504     Opd0 = N->getOperand(0);
2505   } else
2506     return false;
2507 
2508   // Bail out on large immediates. This happens when no proper
2509   // combining/constant folding was performed.
2510   if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2511     LLVM_DEBUG(
2512         (dbgs() << N
2513                 << ": Found large shift immediate, this should not happen\n"));
2514     return false;
2515   }
2516 
2517   LSB = SrlImm;
2518   MSB = SrlImm +
2519         (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2520                         : llvm::countr_one<uint64_t>(AndImm)) -
2521         1;
2522   if (ClampMSB)
2523     // Since we're moving the extend before the right shift operation, we need
2524     // to clamp the MSB to make sure we don't shift in undefined bits instead of
2525     // the zeros which would get shifted in with the original right shift
2526     // operation.
2527     MSB = MSB > 31 ? 31 : MSB;
2528 
2529   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2530   return true;
2531 }
2532 
2533 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2534                                              SDValue &Opd0, unsigned &Immr,
2535                                              unsigned &Imms) {
2536   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2537 
2538   EVT VT = N->getValueType(0);
2539   unsigned BitWidth = VT.getSizeInBits();
2540   assert((VT == MVT::i32 || VT == MVT::i64) &&
2541          "Type checking must have been done before calling this function");
2542 
2543   SDValue Op = N->getOperand(0);
2544   if (Op->getOpcode() == ISD::TRUNCATE) {
2545     Op = Op->getOperand(0);
2546     VT = Op->getValueType(0);
2547     BitWidth = VT.getSizeInBits();
2548   }
2549 
2550   uint64_t ShiftImm;
2551   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2552       !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2553     return false;
2554 
2555   unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2556   if (ShiftImm + Width > BitWidth)
2557     return false;
2558 
2559   Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2560   Opd0 = Op.getOperand(0);
2561   Immr = ShiftImm;
2562   Imms = ShiftImm + Width - 1;
2563   return true;
2564 }
2565 
2566 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2567                                           SDValue &Opd0, unsigned &LSB,
2568                                           unsigned &MSB) {
2569   // We are looking for the following pattern which basically extracts several
2570   // continuous bits from the source value and places it from the LSB of the
2571   // destination value, all other bits of the destination value or set to zero:
2572   //
2573   // Value2 = AND Value, MaskImm
2574   // SRL Value2, ShiftImm
2575   //
2576   // with MaskImm >> ShiftImm to search for the bit width.
2577   //
2578   // This gets selected into a single UBFM:
2579   //
2580   // UBFM Value, ShiftImm, Log2_64(MaskImm)
2581   //
2582 
2583   if (N->getOpcode() != ISD::SRL)
2584     return false;
2585 
2586   uint64_t AndMask = 0;
2587   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2588     return false;
2589 
2590   Opd0 = N->getOperand(0).getOperand(0);
2591 
2592   uint64_t SrlImm = 0;
2593   if (!isIntImmediate(N->getOperand(1), SrlImm))
2594     return false;
2595 
2596   // Check whether we really have several bits extract here.
2597   if (!isMask_64(AndMask >> SrlImm))
2598     return false;
2599 
2600   Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2601   LSB = SrlImm;
2602   MSB = llvm::Log2_64(AndMask);
2603   return true;
2604 }
2605 
2606 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2607                                        unsigned &Immr, unsigned &Imms,
2608                                        bool BiggerPattern) {
2609   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2610          "N must be a SHR/SRA operation to call this function");
2611 
2612   EVT VT = N->getValueType(0);
2613 
2614   // Here we can test the type of VT and return false when the type does not
2615   // match, but since it is done prior to that call in the current context
2616   // we turned that into an assert to avoid redundant code.
2617   assert((VT == MVT::i32 || VT == MVT::i64) &&
2618          "Type checking must have been done before calling this function");
2619 
2620   // Check for AND + SRL doing several bits extract.
2621   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2622     return true;
2623 
2624   // We're looking for a shift of a shift.
2625   uint64_t ShlImm = 0;
2626   uint64_t TruncBits = 0;
2627   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2628     Opd0 = N->getOperand(0).getOperand(0);
2629   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2630              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2631     // We are looking for a shift of truncate. Truncate from i64 to i32 could
2632     // be considered as setting high 32 bits as zero. Our strategy here is to
2633     // always generate 64bit UBFM. This consistency will help the CSE pass
2634     // later find more redundancy.
2635     Opd0 = N->getOperand(0).getOperand(0);
2636     TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2637     VT = Opd0.getValueType();
2638     assert(VT == MVT::i64 && "the promoted type should be i64");
2639   } else if (BiggerPattern) {
2640     // Let's pretend a 0 shift left has been performed.
2641     // FIXME: Currently we limit this to the bigger pattern case,
2642     // because some optimizations expect AND and not UBFM
2643     Opd0 = N->getOperand(0);
2644   } else
2645     return false;
2646 
2647   // Missing combines/constant folding may have left us with strange
2648   // constants.
2649   if (ShlImm >= VT.getSizeInBits()) {
2650     LLVM_DEBUG(
2651         (dbgs() << N
2652                 << ": Found large shift immediate, this should not happen\n"));
2653     return false;
2654   }
2655 
2656   uint64_t SrlImm = 0;
2657   if (!isIntImmediate(N->getOperand(1), SrlImm))
2658     return false;
2659 
2660   assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2661          "bad amount in shift node!");
2662   int immr = SrlImm - ShlImm;
2663   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2664   Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2665   // SRA requires a signed extraction
2666   if (VT == MVT::i32)
2667     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2668   else
2669     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2670   return true;
2671 }
2672 
2673 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2674   assert(N->getOpcode() == ISD::SIGN_EXTEND);
2675 
2676   EVT VT = N->getValueType(0);
2677   EVT NarrowVT = N->getOperand(0)->getValueType(0);
2678   if (VT != MVT::i64 || NarrowVT != MVT::i32)
2679     return false;
2680 
2681   uint64_t ShiftImm;
2682   SDValue Op = N->getOperand(0);
2683   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2684     return false;
2685 
2686   SDLoc dl(N);
2687   // Extend the incoming operand of the shift to 64-bits.
2688   SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2689   unsigned Immr = ShiftImm;
2690   unsigned Imms = NarrowVT.getSizeInBits() - 1;
2691   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2692                    CurDAG->getTargetConstant(Imms, dl, VT)};
2693   CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2694   return true;
2695 }
2696 
2697 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2698                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2699                                 unsigned NumberOfIgnoredLowBits = 0,
2700                                 bool BiggerPattern = false) {
2701   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2702     return false;
2703 
2704   switch (N->getOpcode()) {
2705   default:
2706     if (!N->isMachineOpcode())
2707       return false;
2708     break;
2709   case ISD::AND:
2710     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2711                                       NumberOfIgnoredLowBits, BiggerPattern);
2712   case ISD::SRL:
2713   case ISD::SRA:
2714     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2715 
2716   case ISD::SIGN_EXTEND_INREG:
2717     return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2718   }
2719 
2720   unsigned NOpc = N->getMachineOpcode();
2721   switch (NOpc) {
2722   default:
2723     return false;
2724   case AArch64::SBFMWri:
2725   case AArch64::UBFMWri:
2726   case AArch64::SBFMXri:
2727   case AArch64::UBFMXri:
2728     Opc = NOpc;
2729     Opd0 = N->getOperand(0);
2730     Immr = N->getConstantOperandVal(1);
2731     Imms = N->getConstantOperandVal(2);
2732     return true;
2733   }
2734   // Unreachable
2735   return false;
2736 }
2737 
2738 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2739   unsigned Opc, Immr, Imms;
2740   SDValue Opd0;
2741   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2742     return false;
2743 
2744   EVT VT = N->getValueType(0);
2745   SDLoc dl(N);
2746 
2747   // If the bit extract operation is 64bit but the original type is 32bit, we
2748   // need to add one EXTRACT_SUBREG.
2749   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2750     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2751                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2752 
2753     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2754     SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2755                                                    MVT::i32, SDValue(BFM, 0));
2756     ReplaceNode(N, Inner.getNode());
2757     return true;
2758   }
2759 
2760   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2761                    CurDAG->getTargetConstant(Imms, dl, VT)};
2762   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2763   return true;
2764 }
2765 
2766 /// Does DstMask form a complementary pair with the mask provided by
2767 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2768 /// this asks whether DstMask zeroes precisely those bits that will be set by
2769 /// the other half.
2770 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2771                               unsigned NumberOfIgnoredHighBits, EVT VT) {
2772   assert((VT == MVT::i32 || VT == MVT::i64) &&
2773          "i32 or i64 mask type expected!");
2774   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2775 
2776   APInt SignificantDstMask = APInt(BitWidth, DstMask);
2777   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2778 
2779   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2780          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2781 }
2782 
2783 // Look for bits that will be useful for later uses.
2784 // A bit is consider useless as soon as it is dropped and never used
2785 // before it as been dropped.
2786 // E.g., looking for useful bit of x
2787 // 1. y = x & 0x7
2788 // 2. z = y >> 2
2789 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2790 // y.
2791 // After #2, the useful bits of x are 0x4.
2792 // However, if x is used on an unpredicatable instruction, then all its bits
2793 // are useful.
2794 // E.g.
2795 // 1. y = x & 0x7
2796 // 2. z = y >> 2
2797 // 3. str x, [@x]
2798 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2799 
2800 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2801                                               unsigned Depth) {
2802   uint64_t Imm =
2803       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2804   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2805   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2806   getUsefulBits(Op, UsefulBits, Depth + 1);
2807 }
2808 
2809 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2810                                              uint64_t Imm, uint64_t MSB,
2811                                              unsigned Depth) {
2812   // inherit the bitwidth value
2813   APInt OpUsefulBits(UsefulBits);
2814   OpUsefulBits = 1;
2815 
2816   if (MSB >= Imm) {
2817     OpUsefulBits <<= MSB - Imm + 1;
2818     --OpUsefulBits;
2819     // The interesting part will be in the lower part of the result
2820     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2821     // The interesting part was starting at Imm in the argument
2822     OpUsefulBits <<= Imm;
2823   } else {
2824     OpUsefulBits <<= MSB + 1;
2825     --OpUsefulBits;
2826     // The interesting part will be shifted in the result
2827     OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2828     getUsefulBits(Op, OpUsefulBits, Depth + 1);
2829     // The interesting part was at zero in the argument
2830     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2831   }
2832 
2833   UsefulBits &= OpUsefulBits;
2834 }
2835 
2836 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2837                                   unsigned Depth) {
2838   uint64_t Imm =
2839       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2840   uint64_t MSB =
2841       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2842 
2843   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2844 }
2845 
2846 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2847                                               unsigned Depth) {
2848   uint64_t ShiftTypeAndValue =
2849       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2850   APInt Mask(UsefulBits);
2851   Mask.clearAllBits();
2852   Mask.flipAllBits();
2853 
2854   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2855     // Shift Left
2856     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2857     Mask <<= ShiftAmt;
2858     getUsefulBits(Op, Mask, Depth + 1);
2859     Mask.lshrInPlace(ShiftAmt);
2860   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2861     // Shift Right
2862     // We do not handle AArch64_AM::ASR, because the sign will change the
2863     // number of useful bits
2864     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2865     Mask.lshrInPlace(ShiftAmt);
2866     getUsefulBits(Op, Mask, Depth + 1);
2867     Mask <<= ShiftAmt;
2868   } else
2869     return;
2870 
2871   UsefulBits &= Mask;
2872 }
2873 
2874 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2875                                  unsigned Depth) {
2876   uint64_t Imm =
2877       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2878   uint64_t MSB =
2879       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2880 
2881   APInt OpUsefulBits(UsefulBits);
2882   OpUsefulBits = 1;
2883 
2884   APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2885   ResultUsefulBits.flipAllBits();
2886   APInt Mask(UsefulBits.getBitWidth(), 0);
2887 
2888   getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2889 
2890   if (MSB >= Imm) {
2891     // The instruction is a BFXIL.
2892     uint64_t Width = MSB - Imm + 1;
2893     uint64_t LSB = Imm;
2894 
2895     OpUsefulBits <<= Width;
2896     --OpUsefulBits;
2897 
2898     if (Op.getOperand(1) == Orig) {
2899       // Copy the low bits from the result to bits starting from LSB.
2900       Mask = ResultUsefulBits & OpUsefulBits;
2901       Mask <<= LSB;
2902     }
2903 
2904     if (Op.getOperand(0) == Orig)
2905       // Bits starting from LSB in the input contribute to the result.
2906       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2907   } else {
2908     // The instruction is a BFI.
2909     uint64_t Width = MSB + 1;
2910     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2911 
2912     OpUsefulBits <<= Width;
2913     --OpUsefulBits;
2914     OpUsefulBits <<= LSB;
2915 
2916     if (Op.getOperand(1) == Orig) {
2917       // Copy the bits from the result to the zero bits.
2918       Mask = ResultUsefulBits & OpUsefulBits;
2919       Mask.lshrInPlace(LSB);
2920     }
2921 
2922     if (Op.getOperand(0) == Orig)
2923       Mask |= (ResultUsefulBits & ~OpUsefulBits);
2924   }
2925 
2926   UsefulBits &= Mask;
2927 }
2928 
2929 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2930                                 SDValue Orig, unsigned Depth) {
2931 
2932   // Users of this node should have already been instruction selected
2933   // FIXME: Can we turn that into an assert?
2934   if (!UserNode->isMachineOpcode())
2935     return;
2936 
2937   switch (UserNode->getMachineOpcode()) {
2938   default:
2939     return;
2940   case AArch64::ANDSWri:
2941   case AArch64::ANDSXri:
2942   case AArch64::ANDWri:
2943   case AArch64::ANDXri:
2944     // We increment Depth only when we call the getUsefulBits
2945     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2946                                              Depth);
2947   case AArch64::UBFMWri:
2948   case AArch64::UBFMXri:
2949     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2950 
2951   case AArch64::ORRWrs:
2952   case AArch64::ORRXrs:
2953     if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2954       getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2955                                         Depth);
2956     return;
2957   case AArch64::BFMWri:
2958   case AArch64::BFMXri:
2959     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2960 
2961   case AArch64::STRBBui:
2962   case AArch64::STURBBi:
2963     if (UserNode->getOperand(0) != Orig)
2964       return;
2965     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2966     return;
2967 
2968   case AArch64::STRHHui:
2969   case AArch64::STURHHi:
2970     if (UserNode->getOperand(0) != Orig)
2971       return;
2972     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2973     return;
2974   }
2975 }
2976 
2977 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2978   if (Depth >= SelectionDAG::MaxRecursionDepth)
2979     return;
2980   // Initialize UsefulBits
2981   if (!Depth) {
2982     unsigned Bitwidth = Op.getScalarValueSizeInBits();
2983     // At the beginning, assume every produced bits is useful
2984     UsefulBits = APInt(Bitwidth, 0);
2985     UsefulBits.flipAllBits();
2986   }
2987   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2988 
2989   for (SDNode *Node : Op.getNode()->uses()) {
2990     // A use cannot produce useful bits
2991     APInt UsefulBitsForUse = APInt(UsefulBits);
2992     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2993     UsersUsefulBits |= UsefulBitsForUse;
2994   }
2995   // UsefulBits contains the produced bits that are meaningful for the
2996   // current definition, thus a user cannot make a bit meaningful at
2997   // this point
2998   UsefulBits &= UsersUsefulBits;
2999 }
3000 
3001 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
3002 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3003 /// 0, return Op unchanged.
3004 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3005   if (ShlAmount == 0)
3006     return Op;
3007 
3008   EVT VT = Op.getValueType();
3009   SDLoc dl(Op);
3010   unsigned BitWidth = VT.getSizeInBits();
3011   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3012 
3013   SDNode *ShiftNode;
3014   if (ShlAmount > 0) {
3015     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3016     ShiftNode = CurDAG->getMachineNode(
3017         UBFMOpc, dl, VT, Op,
3018         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3019         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3020   } else {
3021     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3022     assert(ShlAmount < 0 && "expected right shift");
3023     int ShrAmount = -ShlAmount;
3024     ShiftNode = CurDAG->getMachineNode(
3025         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3026         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3027   }
3028 
3029   return SDValue(ShiftNode, 0);
3030 }
3031 
3032 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3033 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3034                                            bool BiggerPattern,
3035                                            const uint64_t NonZeroBits,
3036                                            SDValue &Src, int &DstLSB,
3037                                            int &Width);
3038 
3039 // For bit-field-positioning pattern "shl VAL, N)".
3040 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3041                                            bool BiggerPattern,
3042                                            const uint64_t NonZeroBits,
3043                                            SDValue &Src, int &DstLSB,
3044                                            int &Width);
3045 
3046 /// Does this tree qualify as an attempt to move a bitfield into position,
3047 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3048 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3049                                     bool BiggerPattern, SDValue &Src,
3050                                     int &DstLSB, int &Width) {
3051   EVT VT = Op.getValueType();
3052   unsigned BitWidth = VT.getSizeInBits();
3053   (void)BitWidth;
3054   assert(BitWidth == 32 || BitWidth == 64);
3055 
3056   KnownBits Known = CurDAG->computeKnownBits(Op);
3057 
3058   // Non-zero in the sense that they're not provably zero, which is the key
3059   // point if we want to use this value
3060   const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3061   if (!isShiftedMask_64(NonZeroBits))
3062     return false;
3063 
3064   switch (Op.getOpcode()) {
3065   default:
3066     break;
3067   case ISD::AND:
3068     return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3069                                           NonZeroBits, Src, DstLSB, Width);
3070   case ISD::SHL:
3071     return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3072                                           NonZeroBits, Src, DstLSB, Width);
3073   }
3074 
3075   return false;
3076 }
3077 
3078 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3079                                            bool BiggerPattern,
3080                                            const uint64_t NonZeroBits,
3081                                            SDValue &Src, int &DstLSB,
3082                                            int &Width) {
3083   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3084 
3085   EVT VT = Op.getValueType();
3086   assert((VT == MVT::i32 || VT == MVT::i64) &&
3087          "Caller guarantees VT is one of i32 or i64");
3088   (void)VT;
3089 
3090   uint64_t AndImm;
3091   if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3092     return false;
3093 
3094   // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3095   //   1) (AndImm & (1 << POS) == 0)
3096   //   2) the result of AND is not zero at POS bit (according to NonZeroBits)
3097   //
3098   // 1) and 2) don't agree so something must be wrong (e.g., in
3099   // 'SelectionDAG::computeKnownBits')
3100   assert((~AndImm & NonZeroBits) == 0 &&
3101          "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3102 
3103   SDValue AndOp0 = Op.getOperand(0);
3104 
3105   uint64_t ShlImm;
3106   SDValue ShlOp0;
3107   if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3108     // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3109     ShlOp0 = AndOp0.getOperand(0);
3110   } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3111              isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
3112                                    ShlImm)) {
3113     // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3114 
3115     // ShlVal == shl(val, N), which is a left shift on a smaller type.
3116     SDValue ShlVal = AndOp0.getOperand(0);
3117 
3118     // Since this is after type legalization and ShlVal is extended to MVT::i64,
3119     // expect VT to be MVT::i32.
3120     assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3121 
3122     // Widens 'val' to MVT::i64 as the source of bit field positioning.
3123     ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3124   } else
3125     return false;
3126 
3127   // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3128   // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3129   // AndOp0+AND.
3130   if (!BiggerPattern && !AndOp0.hasOneUse())
3131     return false;
3132 
3133   DstLSB = llvm::countr_zero(NonZeroBits);
3134   Width = llvm::countr_one(NonZeroBits >> DstLSB);
3135 
3136   // Bail out on large Width. This happens when no proper combining / constant
3137   // folding was performed.
3138   if (Width >= (int)VT.getSizeInBits()) {
3139     // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3140     // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3141     // "val".
3142     // If VT is i32, what Width >= 32 means:
3143     // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3144     //   demands at least 'Width' bits (after dag-combiner). This together with
3145     //   `any_extend` Op (undefined higher bits) indicates missed combination
3146     //   when lowering the 'and' IR instruction to an machine IR instruction.
3147     LLVM_DEBUG(
3148         dbgs()
3149         << "Found large Width in bit-field-positioning -- this indicates no "
3150            "proper combining / constant folding was performed\n");
3151     return false;
3152   }
3153 
3154   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3155   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3156   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
3157   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3158   // which case it is not profitable to insert an extra shift.
3159   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3160     return false;
3161 
3162   Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3163   return true;
3164 }
3165 
3166 // For node (shl (and val, mask), N)), returns true if the node is equivalent to
3167 // UBFIZ.
3168 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3169                                               SDValue &Src, int &DstLSB,
3170                                               int &Width) {
3171   // Caller should have verified that N is a left shift with constant shift
3172   // amount; asserts that.
3173   assert(Op.getOpcode() == ISD::SHL &&
3174          "Op.getNode() should be a SHL node to call this function");
3175   assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3176          "Op.getNode() should shift ShlImm to call this function");
3177 
3178   uint64_t AndImm = 0;
3179   SDValue Op0 = Op.getOperand(0);
3180   if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3181     return false;
3182 
3183   const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3184   if (isMask_64(ShiftedAndImm)) {
3185     // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3186     // should end with Mask, and could be prefixed with random bits if those
3187     // bits are shifted out.
3188     //
3189     // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3190     // the AND result corresponding to those bits are shifted out, so it's fine
3191     // to not extract them.
3192     Width = llvm::countr_one(ShiftedAndImm);
3193     DstLSB = ShlImm;
3194     Src = Op0.getOperand(0);
3195     return true;
3196   }
3197   return false;
3198 }
3199 
3200 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3201                                            bool BiggerPattern,
3202                                            const uint64_t NonZeroBits,
3203                                            SDValue &Src, int &DstLSB,
3204                                            int &Width) {
3205   assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3206 
3207   EVT VT = Op.getValueType();
3208   assert((VT == MVT::i32 || VT == MVT::i64) &&
3209          "Caller guarantees that type is i32 or i64");
3210   (void)VT;
3211 
3212   uint64_t ShlImm;
3213   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3214     return false;
3215 
3216   if (!BiggerPattern && !Op.hasOneUse())
3217     return false;
3218 
3219   if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3220     return true;
3221 
3222   DstLSB = llvm::countr_zero(NonZeroBits);
3223   Width = llvm::countr_one(NonZeroBits >> DstLSB);
3224 
3225   if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3226     return false;
3227 
3228   Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3229   return true;
3230 }
3231 
3232 static bool isShiftedMask(uint64_t Mask, EVT VT) {
3233   assert(VT == MVT::i32 || VT == MVT::i64);
3234   if (VT == MVT::i32)
3235     return isShiftedMask_32(Mask);
3236   return isShiftedMask_64(Mask);
3237 }
3238 
3239 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3240 // inserted only sets known zero bits.
3241 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3242   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3243 
3244   EVT VT = N->getValueType(0);
3245   if (VT != MVT::i32 && VT != MVT::i64)
3246     return false;
3247 
3248   unsigned BitWidth = VT.getSizeInBits();
3249 
3250   uint64_t OrImm;
3251   if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3252     return false;
3253 
3254   // Skip this transformation if the ORR immediate can be encoded in the ORR.
3255   // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3256   // performance neutral.
3257   if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
3258     return false;
3259 
3260   uint64_t MaskImm;
3261   SDValue And = N->getOperand(0);
3262   // Must be a single use AND with an immediate operand.
3263   if (!And.hasOneUse() ||
3264       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3265     return false;
3266 
3267   // Compute the Known Zero for the AND as this allows us to catch more general
3268   // cases than just looking for AND with imm.
3269   KnownBits Known = CurDAG->computeKnownBits(And);
3270 
3271   // Non-zero in the sense that they're not provably zero, which is the key
3272   // point if we want to use this value.
3273   uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3274 
3275   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3276   if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3277     return false;
3278 
3279   // The bits being inserted must only set those bits that are known to be zero.
3280   if ((OrImm & NotKnownZero) != 0) {
3281     // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3282     // currently handle this case.
3283     return false;
3284   }
3285 
3286   // BFI/BFXIL dst, src, #lsb, #width.
3287   int LSB = llvm::countr_one(NotKnownZero);
3288   int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3289 
3290   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3291   unsigned ImmR = (BitWidth - LSB) % BitWidth;
3292   unsigned ImmS = Width - 1;
3293 
3294   // If we're creating a BFI instruction avoid cases where we need more
3295   // instructions to materialize the BFI constant as compared to the original
3296   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
3297   // should be no worse in this case.
3298   bool IsBFI = LSB != 0;
3299   uint64_t BFIImm = OrImm >> LSB;
3300   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3301     // We have a BFI instruction and we know the constant can't be materialized
3302     // with a ORR-immediate with the zero register.
3303     unsigned OrChunks = 0, BFIChunks = 0;
3304     for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3305       if (((OrImm >> Shift) & 0xFFFF) != 0)
3306         ++OrChunks;
3307       if (((BFIImm >> Shift) & 0xFFFF) != 0)
3308         ++BFIChunks;
3309     }
3310     if (BFIChunks > OrChunks)
3311       return false;
3312   }
3313 
3314   // Materialize the constant to be inserted.
3315   SDLoc DL(N);
3316   unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3317   SDNode *MOVI = CurDAG->getMachineNode(
3318       MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3319 
3320   // Create the BFI/BFXIL instruction.
3321   SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3322                    CurDAG->getTargetConstant(ImmR, DL, VT),
3323                    CurDAG->getTargetConstant(ImmS, DL, VT)};
3324   unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3325   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3326   return true;
3327 }
3328 
3329 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3330                                            SDValue &ShiftedOperand,
3331                                            uint64_t &EncodedShiftImm) {
3332   // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3333   if (!Dst.hasOneUse())
3334     return false;
3335 
3336   EVT VT = Dst.getValueType();
3337   assert((VT == MVT::i32 || VT == MVT::i64) &&
3338          "Caller should guarantee that VT is one of i32 or i64");
3339   const unsigned SizeInBits = VT.getSizeInBits();
3340 
3341   SDLoc DL(Dst.getNode());
3342   uint64_t AndImm, ShlImm;
3343   if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3344       isShiftedMask_64(AndImm)) {
3345     // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3346     SDValue DstOp0 = Dst.getOperand(0);
3347     if (!DstOp0.hasOneUse())
3348       return false;
3349 
3350     // An example to illustrate the transformation
3351     // From:
3352     //    lsr     x8, x1, #1
3353     //    and     x8, x8, #0x3f80
3354     //    bfxil   x8, x1, #0, #7
3355     // To:
3356     //    and    x8, x23, #0x7f
3357     //    ubfx   x9, x23, #8, #7
3358     //    orr    x23, x8, x9, lsl #7
3359     //
3360     // The number of instructions remains the same, but ORR is faster than BFXIL
3361     // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3362     // the dependency chain is improved after the transformation.
3363     uint64_t SrlImm;
3364     if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3365       uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3366       if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3367         unsigned MaskWidth =
3368             llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3369         unsigned UBFMOpc =
3370             (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3371         SDNode *UBFMNode = CurDAG->getMachineNode(
3372             UBFMOpc, DL, VT, DstOp0.getOperand(0),
3373             CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3374                                       VT),
3375             CurDAG->getTargetConstant(
3376                 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3377         ShiftedOperand = SDValue(UBFMNode, 0);
3378         EncodedShiftImm = AArch64_AM::getShifterImm(
3379             AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3380         return true;
3381       }
3382     }
3383     return false;
3384   }
3385 
3386   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3387     ShiftedOperand = Dst.getOperand(0);
3388     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3389     return true;
3390   }
3391 
3392   uint64_t SrlImm;
3393   if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3394     ShiftedOperand = Dst.getOperand(0);
3395     EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3396     return true;
3397   }
3398   return false;
3399 }
3400 
3401 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3402 // the operands and select it to AArch64::ORR with shifted registers if
3403 // that's more efficient. Returns true iff selection to AArch64::ORR happens.
3404 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3405                             SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3406                             const bool BiggerPattern) {
3407   EVT VT = N->getValueType(0);
3408   assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3409   assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3410           (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3411          "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3412   assert((VT == MVT::i32 || VT == MVT::i64) &&
3413          "Expect result type to be i32 or i64 since N is combinable to BFM");
3414   SDLoc DL(N);
3415 
3416   // Bail out if BFM simplifies away one node in BFM Dst.
3417   if (OrOpd1 != Dst)
3418     return false;
3419 
3420   const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3421   // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3422   // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3423   if (BiggerPattern) {
3424     uint64_t SrcAndImm;
3425     if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3426         isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3427       // OrOpd0 = AND Src, #Mask
3428       // So BFM simplifies away one AND node from Src and doesn't simplify away
3429       // nodes from Dst. If ORR with left-shifted operand also simplifies away
3430       // one node (from Rd), ORR is better since it has higher throughput and
3431       // smaller latency than BFM on many AArch64 processors (and for the rest
3432       // ORR is at least as good as BFM).
3433       SDValue ShiftedOperand;
3434       uint64_t EncodedShiftImm;
3435       if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3436                                          EncodedShiftImm)) {
3437         SDValue Ops[] = {OrOpd0, ShiftedOperand,
3438                          CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3439         CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3440         return true;
3441       }
3442     }
3443     return false;
3444   }
3445 
3446   assert((!BiggerPattern) && "BiggerPattern should be handled above");
3447 
3448   uint64_t ShlImm;
3449   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3450     if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3451       SDValue Ops[] = {
3452           Dst, Src,
3453           CurDAG->getTargetConstant(
3454               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3455       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3456       return true;
3457     }
3458 
3459     // Select the following pattern to left-shifted operand rather than BFI.
3460     // %val1 = op ..
3461     // %val2 = shl %val1, #imm
3462     // %res = or %val1, %val2
3463     //
3464     // If N is selected to be BFI, we know that
3465     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3466     // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3467     //
3468     // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3469     if (OrOpd0.getOperand(0) == OrOpd1) {
3470       SDValue Ops[] = {
3471           OrOpd1, OrOpd1,
3472           CurDAG->getTargetConstant(
3473               AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3474       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3475       return true;
3476     }
3477   }
3478 
3479   uint64_t SrlImm;
3480   if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3481     // Select the following pattern to right-shifted operand rather than BFXIL.
3482     // %val1 = op ..
3483     // %val2 = lshr %val1, #imm
3484     // %res = or %val1, %val2
3485     //
3486     // If N is selected to be BFXIL, we know that
3487     // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3488     // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3489     //
3490     // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3491     if (OrOpd0.getOperand(0) == OrOpd1) {
3492       SDValue Ops[] = {
3493           OrOpd1, OrOpd1,
3494           CurDAG->getTargetConstant(
3495               AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
3496       CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3497       return true;
3498     }
3499   }
3500 
3501   return false;
3502 }
3503 
3504 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3505                                       SelectionDAG *CurDAG) {
3506   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3507 
3508   EVT VT = N->getValueType(0);
3509   if (VT != MVT::i32 && VT != MVT::i64)
3510     return false;
3511 
3512   unsigned BitWidth = VT.getSizeInBits();
3513 
3514   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3515   // have the expected shape. Try to undo that.
3516 
3517   unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3518   unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3519 
3520   // Given a OR operation, check if we have the following pattern
3521   // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3522   //                       isBitfieldExtractOp)
3523   // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3524   //                 countTrailingZeros(mask2) == imm2 - imm + 1
3525   // f = d | c
3526   // if yes, replace the OR instruction with:
3527   // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3528 
3529   // OR is commutative, check all combinations of operand order and values of
3530   // BiggerPattern, i.e.
3531   //     Opd0, Opd1, BiggerPattern=false
3532   //     Opd1, Opd0, BiggerPattern=false
3533   //     Opd0, Opd1, BiggerPattern=true
3534   //     Opd1, Opd0, BiggerPattern=true
3535   // Several of these combinations may match, so check with BiggerPattern=false
3536   // first since that will produce better results by matching more instructions
3537   // and/or inserting fewer extra instructions.
3538   for (int I = 0; I < 4; ++I) {
3539 
3540     SDValue Dst, Src;
3541     unsigned ImmR, ImmS;
3542     bool BiggerPattern = I / 2;
3543     SDValue OrOpd0Val = N->getOperand(I % 2);
3544     SDNode *OrOpd0 = OrOpd0Val.getNode();
3545     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3546     SDNode *OrOpd1 = OrOpd1Val.getNode();
3547 
3548     unsigned BFXOpc;
3549     int DstLSB, Width;
3550     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3551                             NumberOfIgnoredLowBits, BiggerPattern)) {
3552       // Check that the returned opcode is compatible with the pattern,
3553       // i.e., same type and zero extended (U and not S)
3554       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3555           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3556         continue;
3557 
3558       // Compute the width of the bitfield insertion
3559       DstLSB = 0;
3560       Width = ImmS - ImmR + 1;
3561       // FIXME: This constraint is to catch bitfield insertion we may
3562       // want to widen the pattern if we want to grab general bitfied
3563       // move case
3564       if (Width <= 0)
3565         continue;
3566 
3567       // If the mask on the insertee is correct, we have a BFXIL operation. We
3568       // can share the ImmR and ImmS values from the already-computed UBFM.
3569     } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3570                                        BiggerPattern,
3571                                        Src, DstLSB, Width)) {
3572       ImmR = (BitWidth - DstLSB) % BitWidth;
3573       ImmS = Width - 1;
3574     } else
3575       continue;
3576 
3577     // Check the second part of the pattern
3578     EVT VT = OrOpd1Val.getValueType();
3579     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3580 
3581     // Compute the Known Zero for the candidate of the first operand.
3582     // This allows to catch more general case than just looking for
3583     // AND with imm. Indeed, simplify-demanded-bits may have removed
3584     // the AND instruction because it proves it was useless.
3585     KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3586 
3587     // Check if there is enough room for the second operand to appear
3588     // in the first one
3589     APInt BitsToBeInserted =
3590         APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3591 
3592     if ((BitsToBeInserted & ~Known.Zero) != 0)
3593       continue;
3594 
3595     // Set the first operand
3596     uint64_t Imm;
3597     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3598         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3599       // In that case, we can eliminate the AND
3600       Dst = OrOpd1->getOperand(0);
3601     else
3602       // Maybe the AND has been removed by simplify-demanded-bits
3603       // or is useful because it discards more bits
3604       Dst = OrOpd1Val;
3605 
3606     // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3607     // with shifted operand is more efficient.
3608     if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3609                         BiggerPattern))
3610       return true;
3611 
3612     // both parts match
3613     SDLoc DL(N);
3614     SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3615                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3616     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3617     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3618     return true;
3619   }
3620 
3621   // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3622   // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3623   // mask (e.g., 0x000ffff0).
3624   uint64_t Mask0Imm, Mask1Imm;
3625   SDValue And0 = N->getOperand(0);
3626   SDValue And1 = N->getOperand(1);
3627   if (And0.hasOneUse() && And1.hasOneUse() &&
3628       isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3629       isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3630       APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3631       (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3632 
3633     // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3634     // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3635     // bits to be inserted.
3636     if (isShiftedMask(Mask0Imm, VT)) {
3637       std::swap(And0, And1);
3638       std::swap(Mask0Imm, Mask1Imm);
3639     }
3640 
3641     SDValue Src = And1->getOperand(0);
3642     SDValue Dst = And0->getOperand(0);
3643     unsigned LSB = llvm::countr_zero(Mask1Imm);
3644     int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3645 
3646     // The BFXIL inserts the low-order bits from a source register, so right
3647     // shift the needed bits into place.
3648     SDLoc DL(N);
3649     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3650     uint64_t LsrImm = LSB;
3651     if (Src->hasOneUse() &&
3652         isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3653         (LsrImm + LSB) < BitWidth) {
3654       Src = Src->getOperand(0);
3655       LsrImm += LSB;
3656     }
3657 
3658     SDNode *LSR = CurDAG->getMachineNode(
3659         ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3660         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3661 
3662     // BFXIL is an alias of BFM, so translate to BFM operands.
3663     unsigned ImmR = (BitWidth - LSB) % BitWidth;
3664     unsigned ImmS = Width - 1;
3665 
3666     // Create the BFXIL instruction.
3667     SDValue Ops[] = {Dst, SDValue(LSR, 0),
3668                      CurDAG->getTargetConstant(ImmR, DL, VT),
3669                      CurDAG->getTargetConstant(ImmS, DL, VT)};
3670     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3671     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3672     return true;
3673   }
3674 
3675   return false;
3676 }
3677 
3678 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3679   if (N->getOpcode() != ISD::OR)
3680     return false;
3681 
3682   APInt NUsefulBits;
3683   getUsefulBits(SDValue(N, 0), NUsefulBits);
3684 
3685   // If all bits are not useful, just return UNDEF.
3686   if (!NUsefulBits) {
3687     CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3688     return true;
3689   }
3690 
3691   if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3692     return true;
3693 
3694   return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3695 }
3696 
3697 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3698 /// equivalent of a left shift by a constant amount followed by an and masking
3699 /// out a contiguous set of bits.
3700 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3701   if (N->getOpcode() != ISD::AND)
3702     return false;
3703 
3704   EVT VT = N->getValueType(0);
3705   if (VT != MVT::i32 && VT != MVT::i64)
3706     return false;
3707 
3708   SDValue Op0;
3709   int DstLSB, Width;
3710   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3711                                Op0, DstLSB, Width))
3712     return false;
3713 
3714   // ImmR is the rotate right amount.
3715   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3716   // ImmS is the most significant bit of the source to be moved.
3717   unsigned ImmS = Width - 1;
3718 
3719   SDLoc DL(N);
3720   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3721                    CurDAG->getTargetConstant(ImmS, DL, VT)};
3722   unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3723   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3724   return true;
3725 }
3726 
3727 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3728 /// variable shift/rotate instructions.
3729 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3730   EVT VT = N->getValueType(0);
3731 
3732   unsigned Opc;
3733   switch (N->getOpcode()) {
3734   case ISD::ROTR:
3735     Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3736     break;
3737   case ISD::SHL:
3738     Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3739     break;
3740   case ISD::SRL:
3741     Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3742     break;
3743   case ISD::SRA:
3744     Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3745     break;
3746   default:
3747     return false;
3748   }
3749 
3750   uint64_t Size;
3751   uint64_t Bits;
3752   if (VT == MVT::i32) {
3753     Bits = 5;
3754     Size = 32;
3755   } else if (VT == MVT::i64) {
3756     Bits = 6;
3757     Size = 64;
3758   } else
3759     return false;
3760 
3761   SDValue ShiftAmt = N->getOperand(1);
3762   SDLoc DL(N);
3763   SDValue NewShiftAmt;
3764 
3765   // Skip over an extend of the shift amount.
3766   if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3767       ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3768     ShiftAmt = ShiftAmt->getOperand(0);
3769 
3770   if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3771     SDValue Add0 = ShiftAmt->getOperand(0);
3772     SDValue Add1 = ShiftAmt->getOperand(1);
3773     uint64_t Add0Imm;
3774     uint64_t Add1Imm;
3775     if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3776       // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3777       // to avoid the ADD/SUB.
3778       NewShiftAmt = Add0;
3779     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3780                isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3781                (Add0Imm % Size == 0)) {
3782       // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3783       // to generate a NEG instead of a SUB from a constant.
3784       unsigned NegOpc;
3785       unsigned ZeroReg;
3786       EVT SubVT = ShiftAmt->getValueType(0);
3787       if (SubVT == MVT::i32) {
3788         NegOpc = AArch64::SUBWrr;
3789         ZeroReg = AArch64::WZR;
3790       } else {
3791         assert(SubVT == MVT::i64);
3792         NegOpc = AArch64::SUBXrr;
3793         ZeroReg = AArch64::XZR;
3794       }
3795       SDValue Zero =
3796           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3797       MachineSDNode *Neg =
3798           CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3799       NewShiftAmt = SDValue(Neg, 0);
3800     } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3801                isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3802       // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3803       // to generate a NOT instead of a SUB from a constant.
3804       unsigned NotOpc;
3805       unsigned ZeroReg;
3806       EVT SubVT = ShiftAmt->getValueType(0);
3807       if (SubVT == MVT::i32) {
3808         NotOpc = AArch64::ORNWrr;
3809         ZeroReg = AArch64::WZR;
3810       } else {
3811         assert(SubVT == MVT::i64);
3812         NotOpc = AArch64::ORNXrr;
3813         ZeroReg = AArch64::XZR;
3814       }
3815       SDValue Zero =
3816           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3817       MachineSDNode *Not =
3818           CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3819       NewShiftAmt = SDValue(Not, 0);
3820     } else
3821       return false;
3822   } else {
3823     // If the shift amount is masked with an AND, check that the mask covers the
3824     // bits that are implicitly ANDed off by the above opcodes and if so, skip
3825     // the AND.
3826     uint64_t MaskImm;
3827     if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3828         !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3829       return false;
3830 
3831     if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3832       return false;
3833 
3834     NewShiftAmt = ShiftAmt->getOperand(0);
3835   }
3836 
3837   // Narrow/widen the shift amount to match the size of the shift operation.
3838   if (VT == MVT::i32)
3839     NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3840   else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3841     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3842     MachineSDNode *Ext = CurDAG->getMachineNode(
3843         AArch64::SUBREG_TO_REG, DL, VT,
3844         CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3845     NewShiftAmt = SDValue(Ext, 0);
3846   }
3847 
3848   SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3849   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3850   return true;
3851 }
3852 
3853 static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
3854                                                SDValue &FixedPos,
3855                                                unsigned RegWidth,
3856                                                bool isReciprocal) {
3857   APFloat FVal(0.0);
3858   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3859     FVal = CN->getValueAPF();
3860   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3861     // Some otherwise illegal constants are allowed in this case.
3862     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3863         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3864       return false;
3865 
3866     ConstantPoolSDNode *CN =
3867         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3868     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3869   } else
3870     return false;
3871 
3872   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3873   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3874   // x-register.
3875   //
3876   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3877   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3878   // integers.
3879   bool IsExact;
3880 
3881   if (isReciprocal)
3882     if (!FVal.getExactInverse(&FVal))
3883       return false;
3884 
3885   // fbits is between 1 and 64 in the worst-case, which means the fmul
3886   // could have 2^64 as an actual operand. Need 65 bits of precision.
3887   APSInt IntVal(65, true);
3888   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3889 
3890   // N.b. isPowerOf2 also checks for > 0.
3891   if (!IsExact || !IntVal.isPowerOf2())
3892     return false;
3893   unsigned FBits = IntVal.logBase2();
3894 
3895   // Checks above should have guaranteed that we haven't lost information in
3896   // finding FBits, but it must still be in range.
3897   if (FBits == 0 || FBits > RegWidth) return false;
3898 
3899   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3900   return true;
3901 }
3902 
3903 bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3904                                                    unsigned RegWidth) {
3905   return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3906                                             false);
3907 }
3908 
3909 bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3910                                                         SDValue &FixedPos,
3911                                                         unsigned RegWidth) {
3912   return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3913                                             true);
3914 }
3915 
3916 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3917 // of the string and obtains the integer values from them and combines these
3918 // into a single value to be used in the MRS/MSR instruction.
3919 static int getIntOperandFromRegisterString(StringRef RegString) {
3920   SmallVector<StringRef, 5> Fields;
3921   RegString.split(Fields, ':');
3922 
3923   if (Fields.size() == 1)
3924     return -1;
3925 
3926   assert(Fields.size() == 5
3927             && "Invalid number of fields in read register string");
3928 
3929   SmallVector<int, 5> Ops;
3930   bool AllIntFields = true;
3931 
3932   for (StringRef Field : Fields) {
3933     unsigned IntField;
3934     AllIntFields &= !Field.getAsInteger(10, IntField);
3935     Ops.push_back(IntField);
3936   }
3937 
3938   assert(AllIntFields &&
3939           "Unexpected non-integer value in special register string.");
3940   (void)AllIntFields;
3941 
3942   // Need to combine the integer fields of the string into a single value
3943   // based on the bit encoding of MRS/MSR instruction.
3944   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3945          (Ops[3] << 3) | (Ops[4]);
3946 }
3947 
3948 // Lower the read_register intrinsic to an MRS instruction node if the special
3949 // register string argument is either of the form detailed in the ALCE (the
3950 // form described in getIntOperandsFromRegsterString) or is a named register
3951 // known by the MRS SysReg mapper.
3952 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3953   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3954   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3955   SDLoc DL(N);
3956 
3957   bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3958 
3959   unsigned Opcode64Bit = AArch64::MRS;
3960   int Imm = getIntOperandFromRegisterString(RegString->getString());
3961   if (Imm == -1) {
3962     // No match, Use the sysreg mapper to map the remaining possible strings to
3963     // the value for the register to be used for the instruction operand.
3964     const auto *TheReg =
3965         AArch64SysReg::lookupSysRegByName(RegString->getString());
3966     if (TheReg && TheReg->Readable &&
3967         TheReg->haveFeatures(Subtarget->getFeatureBits()))
3968       Imm = TheReg->Encoding;
3969     else
3970       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3971 
3972     if (Imm == -1) {
3973       // Still no match, see if this is "pc" or give up.
3974       if (!ReadIs128Bit && RegString->getString() == "pc") {
3975         Opcode64Bit = AArch64::ADR;
3976         Imm = 0;
3977       } else {
3978         return false;
3979       }
3980     }
3981   }
3982 
3983   SDValue InChain = N->getOperand(0);
3984   SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3985   if (!ReadIs128Bit) {
3986     CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3987                          {SysRegImm, InChain});
3988   } else {
3989     SDNode *MRRS = CurDAG->getMachineNode(
3990         AArch64::MRRS, DL,
3991         {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3992         {SysRegImm, InChain});
3993 
3994     // Sysregs are not endian. The even register always contains the low half
3995     // of the register.
3996     SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3997                                                 SDValue(MRRS, 0));
3998     SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3999                                                 SDValue(MRRS, 0));
4000     SDValue OutChain = SDValue(MRRS, 1);
4001 
4002     ReplaceUses(SDValue(N, 0), Lo);
4003     ReplaceUses(SDValue(N, 1), Hi);
4004     ReplaceUses(SDValue(N, 2), OutChain);
4005   };
4006   return true;
4007 }
4008 
4009 // Lower the write_register intrinsic to an MSR instruction node if the special
4010 // register string argument is either of the form detailed in the ALCE (the
4011 // form described in getIntOperandsFromRegsterString) or is a named register
4012 // known by the MSR SysReg mapper.
4013 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4014   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4015   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4016   SDLoc DL(N);
4017 
4018   bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4019 
4020   if (!WriteIs128Bit) {
4021     // Check if the register was one of those allowed as the pstatefield value
4022     // in the MSR (immediate) instruction. To accept the values allowed in the
4023     // pstatefield for the MSR (immediate) instruction, we also require that an
4024     // immediate value has been provided as an argument, we know that this is
4025     // the case as it has been ensured by semantic checking.
4026     auto trySelectPState = [&](auto PMapper, unsigned State) {
4027       if (PMapper) {
4028         assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4029                "Expected a constant integer expression.");
4030         unsigned Reg = PMapper->Encoding;
4031         uint64_t Immed = N->getConstantOperandVal(2);
4032         CurDAG->SelectNodeTo(
4033             N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4034             CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4035         return true;
4036       }
4037       return false;
4038     };
4039 
4040     if (trySelectPState(
4041             AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4042             AArch64::MSRpstateImm4))
4043       return true;
4044     if (trySelectPState(
4045             AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4046             AArch64::MSRpstateImm1))
4047       return true;
4048   }
4049 
4050   int Imm = getIntOperandFromRegisterString(RegString->getString());
4051   if (Imm == -1) {
4052     // Use the sysreg mapper to attempt to map the remaining possible strings
4053     // to the value for the register to be used for the MSR (register)
4054     // instruction operand.
4055     auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4056     if (TheReg && TheReg->Writeable &&
4057         TheReg->haveFeatures(Subtarget->getFeatureBits()))
4058       Imm = TheReg->Encoding;
4059     else
4060       Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4061 
4062     if (Imm == -1)
4063       return false;
4064   }
4065 
4066   SDValue InChain = N->getOperand(0);
4067   if (!WriteIs128Bit) {
4068     CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4069                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4070                          N->getOperand(2), InChain);
4071   } else {
4072     // No endian swap. The lower half always goes into the even subreg, and the
4073     // higher half always into the odd supreg.
4074     SDNode *Pair = CurDAG->getMachineNode(
4075         TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4076         {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4077                                    MVT::i32),
4078          N->getOperand(2),
4079          CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4080          N->getOperand(3),
4081          CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4082 
4083     CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4084                          CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4085                          SDValue(Pair, 0), InChain);
4086   }
4087 
4088   return true;
4089 }
4090 
4091 /// We've got special pseudo-instructions for these
4092 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4093   unsigned Opcode;
4094   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4095 
4096   // Leave IR for LSE if subtarget supports it.
4097   if (Subtarget->hasLSE()) return false;
4098 
4099   if (MemTy == MVT::i8)
4100     Opcode = AArch64::CMP_SWAP_8;
4101   else if (MemTy == MVT::i16)
4102     Opcode = AArch64::CMP_SWAP_16;
4103   else if (MemTy == MVT::i32)
4104     Opcode = AArch64::CMP_SWAP_32;
4105   else if (MemTy == MVT::i64)
4106     Opcode = AArch64::CMP_SWAP_64;
4107   else
4108     llvm_unreachable("Unknown AtomicCmpSwap type");
4109 
4110   MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4111   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4112                    N->getOperand(0)};
4113   SDNode *CmpSwap = CurDAG->getMachineNode(
4114       Opcode, SDLoc(N),
4115       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4116 
4117   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4118   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4119 
4120   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4121   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4122   CurDAG->RemoveDeadNode(N);
4123 
4124   return true;
4125 }
4126 
4127 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4128                                              SDValue &Shift) {
4129   if (!isa<ConstantSDNode>(N))
4130     return false;
4131 
4132   SDLoc DL(N);
4133   uint64_t Val = cast<ConstantSDNode>(N)
4134                      ->getAPIntValue()
4135                      .trunc(VT.getFixedSizeInBits())
4136                      .getZExtValue();
4137 
4138   switch (VT.SimpleTy) {
4139   case MVT::i8:
4140     // All immediates are supported.
4141     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4142     Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4143     return true;
4144   case MVT::i16:
4145   case MVT::i32:
4146   case MVT::i64:
4147     // Support 8bit unsigned immediates.
4148     if (Val <= 255) {
4149       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4150       Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4151       return true;
4152     }
4153     // Support 16bit unsigned immediates that are a multiple of 256.
4154     if (Val <= 65280 && Val % 256 == 0) {
4155       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4156       Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4157       return true;
4158     }
4159     break;
4160   default:
4161     break;
4162   }
4163 
4164   return false;
4165 }
4166 
4167 bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4168                                                  SDValue &Imm, SDValue &Shift,
4169                                                  bool Negate) {
4170   if (!isa<ConstantSDNode>(N))
4171     return false;
4172 
4173   SDLoc DL(N);
4174   int64_t Val = cast<ConstantSDNode>(N)
4175                     ->getAPIntValue()
4176                     .trunc(VT.getFixedSizeInBits())
4177                     .getSExtValue();
4178 
4179   if (Negate)
4180     Val = -Val;
4181 
4182   // Signed saturating instructions treat their immediate operand as unsigned,
4183   // whereas the related intrinsics define their operands to be signed. This
4184   // means we can only use the immediate form when the operand is non-negative.
4185   if (Val < 0)
4186     return false;
4187 
4188   switch (VT.SimpleTy) {
4189   case MVT::i8:
4190     // All positive immediates are supported.
4191     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4192     Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4193     return true;
4194   case MVT::i16:
4195   case MVT::i32:
4196   case MVT::i64:
4197     // Support 8bit positive immediates.
4198     if (Val <= 255) {
4199       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4200       Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4201       return true;
4202     }
4203     // Support 16bit positive immediates that are a multiple of 256.
4204     if (Val <= 65280 && Val % 256 == 0) {
4205       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4206       Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4207       return true;
4208     }
4209     break;
4210   default:
4211     break;
4212   }
4213 
4214   return false;
4215 }
4216 
4217 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4218                                              SDValue &Shift) {
4219   if (!isa<ConstantSDNode>(N))
4220     return false;
4221 
4222   SDLoc DL(N);
4223   int64_t Val = cast<ConstantSDNode>(N)
4224                     ->getAPIntValue()
4225                     .trunc(VT.getFixedSizeInBits())
4226                     .getSExtValue();
4227 
4228   switch (VT.SimpleTy) {
4229   case MVT::i8:
4230     // All immediates are supported.
4231     Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4232     Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4233     return true;
4234   case MVT::i16:
4235   case MVT::i32:
4236   case MVT::i64:
4237     // Support 8bit signed immediates.
4238     if (Val >= -128 && Val <= 127) {
4239       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4240       Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4241       return true;
4242     }
4243     // Support 16bit signed immediates that are a multiple of 256.
4244     if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4245       Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4246       Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4247       return true;
4248     }
4249     break;
4250   default:
4251     break;
4252   }
4253 
4254   return false;
4255 }
4256 
4257 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4258   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4259     int64_t ImmVal = CNode->getSExtValue();
4260     SDLoc DL(N);
4261     if (ImmVal >= -128 && ImmVal < 128) {
4262       Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4263       return true;
4264     }
4265   }
4266   return false;
4267 }
4268 
4269 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4270   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4271     uint64_t ImmVal = CNode->getZExtValue();
4272 
4273     switch (VT.SimpleTy) {
4274     case MVT::i8:
4275       ImmVal &= 0xFF;
4276       break;
4277     case MVT::i16:
4278       ImmVal &= 0xFFFF;
4279       break;
4280     case MVT::i32:
4281       ImmVal &= 0xFFFFFFFF;
4282       break;
4283     case MVT::i64:
4284       break;
4285     default:
4286       llvm_unreachable("Unexpected type");
4287     }
4288 
4289     if (ImmVal < 256) {
4290       Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4291       return true;
4292     }
4293   }
4294   return false;
4295 }
4296 
4297 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4298                                               bool Invert) {
4299   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4300     uint64_t ImmVal = CNode->getZExtValue();
4301     SDLoc DL(N);
4302 
4303     if (Invert)
4304       ImmVal = ~ImmVal;
4305 
4306     // Shift mask depending on type size.
4307     switch (VT.SimpleTy) {
4308     case MVT::i8:
4309       ImmVal &= 0xFF;
4310       ImmVal |= ImmVal << 8;
4311       ImmVal |= ImmVal << 16;
4312       ImmVal |= ImmVal << 32;
4313       break;
4314     case MVT::i16:
4315       ImmVal &= 0xFFFF;
4316       ImmVal |= ImmVal << 16;
4317       ImmVal |= ImmVal << 32;
4318       break;
4319     case MVT::i32:
4320       ImmVal &= 0xFFFFFFFF;
4321       ImmVal |= ImmVal << 32;
4322       break;
4323     case MVT::i64:
4324       break;
4325     default:
4326       llvm_unreachable("Unexpected type");
4327     }
4328 
4329     uint64_t encoding;
4330     if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4331       Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4332       return true;
4333     }
4334   }
4335   return false;
4336 }
4337 
4338 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4339 // Rather than attempt to normalise everything we can sometimes saturate the
4340 // shift amount during selection. This function also allows for consistent
4341 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
4342 // required by the instructions.
4343 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4344                                             uint64_t High, bool AllowSaturation,
4345                                             SDValue &Imm) {
4346   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4347     uint64_t ImmVal = CN->getZExtValue();
4348 
4349     // Reject shift amounts that are too small.
4350     if (ImmVal < Low)
4351       return false;
4352 
4353     // Reject or saturate shift amounts that are too big.
4354     if (ImmVal > High) {
4355       if (!AllowSaturation)
4356         return false;
4357       ImmVal = High;
4358     }
4359 
4360     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4361     return true;
4362   }
4363 
4364   return false;
4365 }
4366 
4367 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4368   // tagp(FrameIndex, IRGstack, tag_offset):
4369   // since the offset between FrameIndex and IRGstack is a compile-time
4370   // constant, this can be lowered to a single ADDG instruction.
4371   if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4372     return false;
4373   }
4374 
4375   SDValue IRG_SP = N->getOperand(2);
4376   if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4377       IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4378     return false;
4379   }
4380 
4381   const TargetLowering *TLI = getTargetLowering();
4382   SDLoc DL(N);
4383   int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4384   SDValue FiOp = CurDAG->getTargetFrameIndex(
4385       FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4386   int TagOffset = N->getConstantOperandVal(3);
4387 
4388   SDNode *Out = CurDAG->getMachineNode(
4389       AArch64::TAGPstack, DL, MVT::i64,
4390       {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4391        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4392   ReplaceNode(N, Out);
4393   return true;
4394 }
4395 
4396 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4397   assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4398          "llvm.aarch64.tagp third argument must be an immediate");
4399   if (trySelectStackSlotTagP(N))
4400     return;
4401   // FIXME: above applies in any case when offset between Op1 and Op2 is a
4402   // compile-time constant, not just for stack allocations.
4403 
4404   // General case for unrelated pointers in Op1 and Op2.
4405   SDLoc DL(N);
4406   int TagOffset = N->getConstantOperandVal(3);
4407   SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4408                                       {N->getOperand(1), N->getOperand(2)});
4409   SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4410                                       {SDValue(N1, 0), N->getOperand(2)});
4411   SDNode *N3 = CurDAG->getMachineNode(
4412       AArch64::ADDG, DL, MVT::i64,
4413       {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4414        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4415   ReplaceNode(N, N3);
4416 }
4417 
4418 bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4419   assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4420 
4421   // Bail when not a "cast" like insert_subvector.
4422   if (N->getConstantOperandVal(2) != 0)
4423     return false;
4424   if (!N->getOperand(0).isUndef())
4425     return false;
4426 
4427   // Bail when normal isel should do the job.
4428   EVT VT = N->getValueType(0);
4429   EVT InVT = N->getOperand(1).getValueType();
4430   if (VT.isFixedLengthVector() || InVT.isScalableVector())
4431     return false;
4432   if (InVT.getSizeInBits() <= 128)
4433     return false;
4434 
4435   // NOTE: We can only get here when doing fixed length SVE code generation.
4436   // We do manual selection because the types involved are not linked to real
4437   // registers (despite being legal) and must be coerced into SVE registers.
4438 
4439   assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4440          "Expected to insert into a packed scalable vector!");
4441 
4442   SDLoc DL(N);
4443   auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4444   ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4445                                         N->getOperand(1), RC));
4446   return true;
4447 }
4448 
4449 bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4450   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4451 
4452   // Bail when not a "cast" like extract_subvector.
4453   if (N->getConstantOperandVal(1) != 0)
4454     return false;
4455 
4456   // Bail when normal isel can do the job.
4457   EVT VT = N->getValueType(0);
4458   EVT InVT = N->getOperand(0).getValueType();
4459   if (VT.isScalableVector() || InVT.isFixedLengthVector())
4460     return false;
4461   if (VT.getSizeInBits() <= 128)
4462     return false;
4463 
4464   // NOTE: We can only get here when doing fixed length SVE code generation.
4465   // We do manual selection because the types involved are not linked to real
4466   // registers (despite being legal) and must be coerced into SVE registers.
4467 
4468   assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4469          "Expected to extract from a packed scalable vector!");
4470 
4471   SDLoc DL(N);
4472   auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4473   ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4474                                         N->getOperand(0), RC));
4475   return true;
4476 }
4477 
4478 bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4479   assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4480 
4481   SDValue N0 = N->getOperand(0);
4482   SDValue N1 = N->getOperand(1);
4483   EVT VT = N->getValueType(0);
4484 
4485   // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4486   // Rotate by a constant is a funnel shift in IR which is exanded to
4487   // an OR with shifted operands.
4488   // We do the following transform:
4489   //   OR N0, N1 -> xar (x, y, imm)
4490   // Where:
4491   //   N1 = SRL_PRED true, V, splat(imm)  --> rotr amount
4492   //   N0 = SHL_PRED true, V, splat(bits-imm)
4493   //   V = (xor x, y)
4494   if (VT.isScalableVector() &&
4495       (Subtarget->hasSVE2() ||
4496        (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4497     if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4498         N1.getOpcode() != AArch64ISD::SRL_PRED)
4499       std::swap(N0, N1);
4500     if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4501         N1.getOpcode() != AArch64ISD::SRL_PRED)
4502       return false;
4503 
4504     auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4505     if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4506         !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4507       return false;
4508 
4509     SDValue XOR = N0.getOperand(1);
4510     if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4511       return false;
4512 
4513     APInt ShlAmt, ShrAmt;
4514     if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4515         !ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt))
4516       return false;
4517 
4518     if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4519       return false;
4520 
4521     SDLoc DL(N);
4522     SDValue Imm =
4523         CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4524 
4525     SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4526     if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4527             VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4528                  AArch64::XAR_ZZZI_D})) {
4529       CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4530       return true;
4531     }
4532     return false;
4533   }
4534 
4535   if (!Subtarget->hasSHA3())
4536     return false;
4537 
4538   if (N0->getOpcode() != AArch64ISD::VSHL ||
4539       N1->getOpcode() != AArch64ISD::VLSHR)
4540     return false;
4541 
4542   if (N0->getOperand(0) != N1->getOperand(0) ||
4543       N1->getOperand(0)->getOpcode() != ISD::XOR)
4544     return false;
4545 
4546   SDValue XOR = N0.getOperand(0);
4547   SDValue R1 = XOR.getOperand(0);
4548   SDValue R2 = XOR.getOperand(1);
4549 
4550   unsigned HsAmt = N0.getConstantOperandVal(1);
4551   unsigned ShAmt = N1.getConstantOperandVal(1);
4552 
4553   SDLoc DL = SDLoc(N0.getOperand(1));
4554   SDValue Imm = CurDAG->getTargetConstant(
4555       ShAmt, DL, N0.getOperand(1).getValueType(), false);
4556 
4557   if (ShAmt + HsAmt != 64)
4558     return false;
4559 
4560   SDValue Ops[] = {R1, R2, Imm};
4561   CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4562 
4563   return true;
4564 }
4565 
4566 void AArch64DAGToDAGISel::Select(SDNode *Node) {
4567   // If we have a custom node, we already have selected!
4568   if (Node->isMachineOpcode()) {
4569     LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4570     Node->setNodeId(-1);
4571     return;
4572   }
4573 
4574   // Few custom selection stuff.
4575   EVT VT = Node->getValueType(0);
4576 
4577   switch (Node->getOpcode()) {
4578   default:
4579     break;
4580 
4581   case ISD::ATOMIC_CMP_SWAP:
4582     if (SelectCMP_SWAP(Node))
4583       return;
4584     break;
4585 
4586   case ISD::READ_REGISTER:
4587   case AArch64ISD::MRRS:
4588     if (tryReadRegister(Node))
4589       return;
4590     break;
4591 
4592   case ISD::WRITE_REGISTER:
4593   case AArch64ISD::MSRR:
4594     if (tryWriteRegister(Node))
4595       return;
4596     break;
4597 
4598   case ISD::LOAD: {
4599     // Try to select as an indexed load. Fall through to normal processing
4600     // if we can't.
4601     if (tryIndexedLoad(Node))
4602       return;
4603     break;
4604   }
4605 
4606   case ISD::SRL:
4607   case ISD::AND:
4608   case ISD::SRA:
4609   case ISD::SIGN_EXTEND_INREG:
4610     if (tryBitfieldExtractOp(Node))
4611       return;
4612     if (tryBitfieldInsertInZeroOp(Node))
4613       return;
4614     [[fallthrough]];
4615   case ISD::ROTR:
4616   case ISD::SHL:
4617     if (tryShiftAmountMod(Node))
4618       return;
4619     break;
4620 
4621   case ISD::SIGN_EXTEND:
4622     if (tryBitfieldExtractOpFromSExt(Node))
4623       return;
4624     break;
4625 
4626   case ISD::OR:
4627     if (tryBitfieldInsertOp(Node))
4628       return;
4629     if (trySelectXAR(Node))
4630       return;
4631     break;
4632 
4633   case ISD::EXTRACT_SUBVECTOR: {
4634     if (trySelectCastScalableToFixedLengthVector(Node))
4635       return;
4636     break;
4637   }
4638 
4639   case ISD::INSERT_SUBVECTOR: {
4640     if (trySelectCastFixedLengthToScalableVector(Node))
4641       return;
4642     break;
4643   }
4644 
4645   case ISD::Constant: {
4646     // Materialize zero constants as copies from WZR/XZR.  This allows
4647     // the coalescer to propagate these into other instructions.
4648     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4649     if (ConstNode->isZero()) {
4650       if (VT == MVT::i32) {
4651         SDValue New = CurDAG->getCopyFromReg(
4652             CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4653         ReplaceNode(Node, New.getNode());
4654         return;
4655       } else if (VT == MVT::i64) {
4656         SDValue New = CurDAG->getCopyFromReg(
4657             CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4658         ReplaceNode(Node, New.getNode());
4659         return;
4660       }
4661     }
4662     break;
4663   }
4664 
4665   case ISD::FrameIndex: {
4666     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4667     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4668     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4669     const TargetLowering *TLI = getTargetLowering();
4670     SDValue TFI = CurDAG->getTargetFrameIndex(
4671         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4672     SDLoc DL(Node);
4673     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4674                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4675     CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4676     return;
4677   }
4678   case ISD::INTRINSIC_W_CHAIN: {
4679     unsigned IntNo = Node->getConstantOperandVal(1);
4680     switch (IntNo) {
4681     default:
4682       break;
4683     case Intrinsic::aarch64_gcsss: {
4684       SDLoc DL(Node);
4685       SDValue Chain = Node->getOperand(0);
4686       SDValue Val = Node->getOperand(2);
4687       SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4688       SDNode *SS1 =
4689           CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4690       SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4691                                            MVT::Other, Zero, SDValue(SS1, 0));
4692       ReplaceNode(Node, SS2);
4693       return;
4694     }
4695     case Intrinsic::aarch64_ldaxp:
4696     case Intrinsic::aarch64_ldxp: {
4697       unsigned Op =
4698           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4699       SDValue MemAddr = Node->getOperand(2);
4700       SDLoc DL(Node);
4701       SDValue Chain = Node->getOperand(0);
4702 
4703       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4704                                           MVT::Other, MemAddr, Chain);
4705 
4706       // Transfer memoperands.
4707       MachineMemOperand *MemOp =
4708           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4709       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4710       ReplaceNode(Node, Ld);
4711       return;
4712     }
4713     case Intrinsic::aarch64_stlxp:
4714     case Intrinsic::aarch64_stxp: {
4715       unsigned Op =
4716           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4717       SDLoc DL(Node);
4718       SDValue Chain = Node->getOperand(0);
4719       SDValue ValLo = Node->getOperand(2);
4720       SDValue ValHi = Node->getOperand(3);
4721       SDValue MemAddr = Node->getOperand(4);
4722 
4723       // Place arguments in the right order.
4724       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4725 
4726       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4727       // Transfer memoperands.
4728       MachineMemOperand *MemOp =
4729           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4730       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4731 
4732       ReplaceNode(Node, St);
4733       return;
4734     }
4735     case Intrinsic::aarch64_neon_ld1x2:
4736       if (VT == MVT::v8i8) {
4737         SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4738         return;
4739       } else if (VT == MVT::v16i8) {
4740         SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4741         return;
4742       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4743         SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4744         return;
4745       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4746         SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4747         return;
4748       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4749         SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4750         return;
4751       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4752         SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4753         return;
4754       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4755         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4756         return;
4757       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4758         SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4759         return;
4760       }
4761       break;
4762     case Intrinsic::aarch64_neon_ld1x3:
4763       if (VT == MVT::v8i8) {
4764         SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4765         return;
4766       } else if (VT == MVT::v16i8) {
4767         SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4768         return;
4769       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4770         SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4771         return;
4772       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4773         SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4774         return;
4775       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4776         SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4777         return;
4778       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4779         SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4780         return;
4781       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4782         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4783         return;
4784       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4785         SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4786         return;
4787       }
4788       break;
4789     case Intrinsic::aarch64_neon_ld1x4:
4790       if (VT == MVT::v8i8) {
4791         SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4792         return;
4793       } else if (VT == MVT::v16i8) {
4794         SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4795         return;
4796       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4797         SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4798         return;
4799       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4800         SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4801         return;
4802       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4803         SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4804         return;
4805       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4806         SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4807         return;
4808       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4809         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4810         return;
4811       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4812         SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4813         return;
4814       }
4815       break;
4816     case Intrinsic::aarch64_neon_ld2:
4817       if (VT == MVT::v8i8) {
4818         SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4819         return;
4820       } else if (VT == MVT::v16i8) {
4821         SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4822         return;
4823       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4824         SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4825         return;
4826       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4827         SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4828         return;
4829       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4830         SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4831         return;
4832       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4833         SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4834         return;
4835       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4836         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4837         return;
4838       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4839         SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4840         return;
4841       }
4842       break;
4843     case Intrinsic::aarch64_neon_ld3:
4844       if (VT == MVT::v8i8) {
4845         SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4846         return;
4847       } else if (VT == MVT::v16i8) {
4848         SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4849         return;
4850       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4851         SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4852         return;
4853       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4854         SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4855         return;
4856       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4857         SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4858         return;
4859       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4860         SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4861         return;
4862       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4863         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4864         return;
4865       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4866         SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4867         return;
4868       }
4869       break;
4870     case Intrinsic::aarch64_neon_ld4:
4871       if (VT == MVT::v8i8) {
4872         SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4873         return;
4874       } else if (VT == MVT::v16i8) {
4875         SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4876         return;
4877       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4878         SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4879         return;
4880       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4881         SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4882         return;
4883       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4884         SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4885         return;
4886       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4887         SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4888         return;
4889       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4890         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4891         return;
4892       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4893         SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4894         return;
4895       }
4896       break;
4897     case Intrinsic::aarch64_neon_ld2r:
4898       if (VT == MVT::v8i8) {
4899         SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4900         return;
4901       } else if (VT == MVT::v16i8) {
4902         SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4903         return;
4904       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4905         SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4906         return;
4907       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4908         SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4909         return;
4910       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4911         SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4912         return;
4913       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4914         SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4915         return;
4916       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4917         SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4918         return;
4919       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4920         SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4921         return;
4922       }
4923       break;
4924     case Intrinsic::aarch64_neon_ld3r:
4925       if (VT == MVT::v8i8) {
4926         SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4927         return;
4928       } else if (VT == MVT::v16i8) {
4929         SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4930         return;
4931       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4932         SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4933         return;
4934       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4935         SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4936         return;
4937       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4938         SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4939         return;
4940       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4941         SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4942         return;
4943       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4944         SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4945         return;
4946       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4947         SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4948         return;
4949       }
4950       break;
4951     case Intrinsic::aarch64_neon_ld4r:
4952       if (VT == MVT::v8i8) {
4953         SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4954         return;
4955       } else if (VT == MVT::v16i8) {
4956         SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4957         return;
4958       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4959         SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4960         return;
4961       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4962         SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4963         return;
4964       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4965         SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4966         return;
4967       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4968         SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4969         return;
4970       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4971         SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4972         return;
4973       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4974         SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4975         return;
4976       }
4977       break;
4978     case Intrinsic::aarch64_neon_ld2lane:
4979       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4980         SelectLoadLane(Node, 2, AArch64::LD2i8);
4981         return;
4982       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4983                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4984         SelectLoadLane(Node, 2, AArch64::LD2i16);
4985         return;
4986       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4987                  VT == MVT::v2f32) {
4988         SelectLoadLane(Node, 2, AArch64::LD2i32);
4989         return;
4990       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4991                  VT == MVT::v1f64) {
4992         SelectLoadLane(Node, 2, AArch64::LD2i64);
4993         return;
4994       }
4995       break;
4996     case Intrinsic::aarch64_neon_ld3lane:
4997       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4998         SelectLoadLane(Node, 3, AArch64::LD3i8);
4999         return;
5000       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5001                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5002         SelectLoadLane(Node, 3, AArch64::LD3i16);
5003         return;
5004       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5005                  VT == MVT::v2f32) {
5006         SelectLoadLane(Node, 3, AArch64::LD3i32);
5007         return;
5008       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5009                  VT == MVT::v1f64) {
5010         SelectLoadLane(Node, 3, AArch64::LD3i64);
5011         return;
5012       }
5013       break;
5014     case Intrinsic::aarch64_neon_ld4lane:
5015       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5016         SelectLoadLane(Node, 4, AArch64::LD4i8);
5017         return;
5018       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5019                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5020         SelectLoadLane(Node, 4, AArch64::LD4i16);
5021         return;
5022       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5023                  VT == MVT::v2f32) {
5024         SelectLoadLane(Node, 4, AArch64::LD4i32);
5025         return;
5026       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5027                  VT == MVT::v1f64) {
5028         SelectLoadLane(Node, 4, AArch64::LD4i64);
5029         return;
5030       }
5031       break;
5032     case Intrinsic::aarch64_ld64b:
5033       SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5034       return;
5035     case Intrinsic::aarch64_sve_ld2q_sret: {
5036       SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5037       return;
5038     }
5039     case Intrinsic::aarch64_sve_ld3q_sret: {
5040       SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5041       return;
5042     }
5043     case Intrinsic::aarch64_sve_ld4q_sret: {
5044       SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5045       return;
5046     }
5047     case Intrinsic::aarch64_sve_ld2_sret: {
5048       if (VT == MVT::nxv16i8) {
5049         SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5050                              true);
5051         return;
5052       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5053                  VT == MVT::nxv8bf16) {
5054         SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5055                              true);
5056         return;
5057       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5058         SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5059                              true);
5060         return;
5061       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5062         SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5063                              true);
5064         return;
5065       }
5066       break;
5067     }
5068     case Intrinsic::aarch64_sve_ld1_pn_x2: {
5069       if (VT == MVT::nxv16i8) {
5070         if (Subtarget->hasSME2())
5071           SelectContiguousMultiVectorLoad(
5072               Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5073         else if (Subtarget->hasSVE2p1())
5074           SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5075                                           AArch64::LD1B_2Z);
5076         else
5077           break;
5078         return;
5079       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5080                  VT == MVT::nxv8bf16) {
5081         if (Subtarget->hasSME2())
5082           SelectContiguousMultiVectorLoad(
5083               Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5084         else if (Subtarget->hasSVE2p1())
5085           SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5086                                           AArch64::LD1H_2Z);
5087         else
5088           break;
5089         return;
5090       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5091         if (Subtarget->hasSME2())
5092           SelectContiguousMultiVectorLoad(
5093               Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5094         else if (Subtarget->hasSVE2p1())
5095           SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5096                                           AArch64::LD1W_2Z);
5097         else
5098           break;
5099         return;
5100       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5101         if (Subtarget->hasSME2())
5102           SelectContiguousMultiVectorLoad(
5103               Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5104         else if (Subtarget->hasSVE2p1())
5105           SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5106                                           AArch64::LD1D_2Z);
5107         else
5108           break;
5109         return;
5110       }
5111       break;
5112     }
5113     case Intrinsic::aarch64_sve_ld1_pn_x4: {
5114       if (VT == MVT::nxv16i8) {
5115         if (Subtarget->hasSME2())
5116           SelectContiguousMultiVectorLoad(
5117               Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5118         else if (Subtarget->hasSVE2p1())
5119           SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5120                                           AArch64::LD1B_4Z);
5121         else
5122           break;
5123         return;
5124       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5125                  VT == MVT::nxv8bf16) {
5126         if (Subtarget->hasSME2())
5127           SelectContiguousMultiVectorLoad(
5128               Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5129         else if (Subtarget->hasSVE2p1())
5130           SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5131                                           AArch64::LD1H_4Z);
5132         else
5133           break;
5134         return;
5135       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5136         if (Subtarget->hasSME2())
5137           SelectContiguousMultiVectorLoad(
5138               Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5139         else if (Subtarget->hasSVE2p1())
5140           SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5141                                           AArch64::LD1W_4Z);
5142         else
5143           break;
5144         return;
5145       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5146         if (Subtarget->hasSME2())
5147           SelectContiguousMultiVectorLoad(
5148               Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5149         else if (Subtarget->hasSVE2p1())
5150           SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5151                                           AArch64::LD1D_4Z);
5152         else
5153           break;
5154         return;
5155       }
5156       break;
5157     }
5158     case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5159       if (VT == MVT::nxv16i8) {
5160         if (Subtarget->hasSME2())
5161           SelectContiguousMultiVectorLoad(Node, 2, 0,
5162                                           AArch64::LDNT1B_2Z_IMM_PSEUDO,
5163                                           AArch64::LDNT1B_2Z_PSEUDO);
5164         else if (Subtarget->hasSVE2p1())
5165           SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5166                                           AArch64::LDNT1B_2Z);
5167         else
5168           break;
5169         return;
5170       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5171                  VT == MVT::nxv8bf16) {
5172         if (Subtarget->hasSME2())
5173           SelectContiguousMultiVectorLoad(Node, 2, 1,
5174                                           AArch64::LDNT1H_2Z_IMM_PSEUDO,
5175                                           AArch64::LDNT1H_2Z_PSEUDO);
5176         else if (Subtarget->hasSVE2p1())
5177           SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5178                                           AArch64::LDNT1H_2Z);
5179         else
5180           break;
5181         return;
5182       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5183         if (Subtarget->hasSME2())
5184           SelectContiguousMultiVectorLoad(Node, 2, 2,
5185                                           AArch64::LDNT1W_2Z_IMM_PSEUDO,
5186                                           AArch64::LDNT1W_2Z_PSEUDO);
5187         else if (Subtarget->hasSVE2p1())
5188           SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5189                                           AArch64::LDNT1W_2Z);
5190         else
5191           break;
5192         return;
5193       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5194         if (Subtarget->hasSME2())
5195           SelectContiguousMultiVectorLoad(Node, 2, 3,
5196                                           AArch64::LDNT1D_2Z_IMM_PSEUDO,
5197                                           AArch64::LDNT1D_2Z_PSEUDO);
5198         else if (Subtarget->hasSVE2p1())
5199           SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5200                                           AArch64::LDNT1D_2Z);
5201         else
5202           break;
5203         return;
5204       }
5205       break;
5206     }
5207     case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5208       if (VT == MVT::nxv16i8) {
5209         if (Subtarget->hasSME2())
5210           SelectContiguousMultiVectorLoad(Node, 4, 0,
5211                                           AArch64::LDNT1B_4Z_IMM_PSEUDO,
5212                                           AArch64::LDNT1B_4Z_PSEUDO);
5213         else if (Subtarget->hasSVE2p1())
5214           SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5215                                           AArch64::LDNT1B_4Z);
5216         else
5217           break;
5218         return;
5219       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5220                  VT == MVT::nxv8bf16) {
5221         if (Subtarget->hasSME2())
5222           SelectContiguousMultiVectorLoad(Node, 4, 1,
5223                                           AArch64::LDNT1H_4Z_IMM_PSEUDO,
5224                                           AArch64::LDNT1H_4Z_PSEUDO);
5225         else if (Subtarget->hasSVE2p1())
5226           SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5227                                           AArch64::LDNT1H_4Z);
5228         else
5229           break;
5230         return;
5231       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5232         if (Subtarget->hasSME2())
5233           SelectContiguousMultiVectorLoad(Node, 4, 2,
5234                                           AArch64::LDNT1W_4Z_IMM_PSEUDO,
5235                                           AArch64::LDNT1W_4Z_PSEUDO);
5236         else if (Subtarget->hasSVE2p1())
5237           SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5238                                           AArch64::LDNT1W_4Z);
5239         else
5240           break;
5241         return;
5242       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5243         if (Subtarget->hasSME2())
5244           SelectContiguousMultiVectorLoad(Node, 4, 3,
5245                                           AArch64::LDNT1D_4Z_IMM_PSEUDO,
5246                                           AArch64::LDNT1D_4Z_PSEUDO);
5247         else if (Subtarget->hasSVE2p1())
5248           SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5249                                           AArch64::LDNT1D_4Z);
5250         else
5251           break;
5252         return;
5253       }
5254       break;
5255     }
5256     case Intrinsic::aarch64_sve_ld3_sret: {
5257       if (VT == MVT::nxv16i8) {
5258         SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5259                              true);
5260         return;
5261       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5262                  VT == MVT::nxv8bf16) {
5263         SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5264                              true);
5265         return;
5266       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5267         SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5268                              true);
5269         return;
5270       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5271         SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5272                              true);
5273         return;
5274       }
5275       break;
5276     }
5277     case Intrinsic::aarch64_sve_ld4_sret: {
5278       if (VT == MVT::nxv16i8) {
5279         SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5280                              true);
5281         return;
5282       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5283                  VT == MVT::nxv8bf16) {
5284         SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5285                              true);
5286         return;
5287       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5288         SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5289                              true);
5290         return;
5291       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5292         SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5293                              true);
5294         return;
5295       }
5296       break;
5297     }
5298     case Intrinsic::aarch64_sme_read_hor_vg2: {
5299       if (VT == MVT::nxv16i8) {
5300         SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5301                                      AArch64::MOVA_2ZMXI_H_B);
5302         return;
5303       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5304                  VT == MVT::nxv8bf16) {
5305         SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5306                                     AArch64::MOVA_2ZMXI_H_H);
5307         return;
5308       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5309         SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5310                                     AArch64::MOVA_2ZMXI_H_S);
5311         return;
5312       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5313         SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5314                                     AArch64::MOVA_2ZMXI_H_D);
5315         return;
5316       }
5317       break;
5318     }
5319     case Intrinsic::aarch64_sme_read_ver_vg2: {
5320       if (VT == MVT::nxv16i8) {
5321         SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5322                                      AArch64::MOVA_2ZMXI_V_B);
5323         return;
5324       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5325                  VT == MVT::nxv8bf16) {
5326         SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5327                                     AArch64::MOVA_2ZMXI_V_H);
5328         return;
5329       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5330         SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5331                                     AArch64::MOVA_2ZMXI_V_S);
5332         return;
5333       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5334         SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5335                                     AArch64::MOVA_2ZMXI_V_D);
5336         return;
5337       }
5338       break;
5339     }
5340     case Intrinsic::aarch64_sme_read_hor_vg4: {
5341       if (VT == MVT::nxv16i8) {
5342         SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5343                                      AArch64::MOVA_4ZMXI_H_B);
5344         return;
5345       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5346                  VT == MVT::nxv8bf16) {
5347         SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5348                                     AArch64::MOVA_4ZMXI_H_H);
5349         return;
5350       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5351         SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5352                                     AArch64::MOVA_4ZMXI_H_S);
5353         return;
5354       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5355         SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5356                                     AArch64::MOVA_4ZMXI_H_D);
5357         return;
5358       }
5359       break;
5360     }
5361     case Intrinsic::aarch64_sme_read_ver_vg4: {
5362       if (VT == MVT::nxv16i8) {
5363         SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5364                                      AArch64::MOVA_4ZMXI_V_B);
5365         return;
5366       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5367                  VT == MVT::nxv8bf16) {
5368         SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5369                                     AArch64::MOVA_4ZMXI_V_H);
5370         return;
5371       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5372         SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5373                                     AArch64::MOVA_4ZMXI_V_S);
5374         return;
5375       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5376         SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5377                                     AArch64::MOVA_4ZMXI_V_D);
5378         return;
5379       }
5380       break;
5381     }
5382     case Intrinsic::aarch64_sme_read_vg1x2: {
5383       SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5384                                   AArch64::MOVA_VG2_2ZMXI);
5385       return;
5386     }
5387     case Intrinsic::aarch64_sme_read_vg1x4: {
5388       SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5389                                   AArch64::MOVA_VG4_4ZMXI);
5390       return;
5391     }
5392     case Intrinsic::aarch64_sme_readz_horiz_x2: {
5393       if (VT == MVT::nxv16i8) {
5394         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5395         return;
5396       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5397                  VT == MVT::nxv8bf16) {
5398         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5399         return;
5400       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5401         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5402         return;
5403       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5404         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5405         return;
5406       }
5407       break;
5408     }
5409     case Intrinsic::aarch64_sme_readz_vert_x2: {
5410       if (VT == MVT::nxv16i8) {
5411         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5412         return;
5413       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5414                  VT == MVT::nxv8bf16) {
5415         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5416         return;
5417       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5418         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5419         return;
5420       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5421         SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5422         return;
5423       }
5424       break;
5425     }
5426     case Intrinsic::aarch64_sme_readz_horiz_x4: {
5427       if (VT == MVT::nxv16i8) {
5428         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5429         return;
5430       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5431                  VT == MVT::nxv8bf16) {
5432         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5433         return;
5434       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5435         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5436         return;
5437       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5438         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5439         return;
5440       }
5441       break;
5442     }
5443     case Intrinsic::aarch64_sme_readz_vert_x4: {
5444       if (VT == MVT::nxv16i8) {
5445         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5446         return;
5447       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5448                  VT == MVT::nxv8bf16) {
5449         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5450         return;
5451       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5452         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5453         return;
5454       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5455         SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5456         return;
5457       }
5458       break;
5459     }
5460     case Intrinsic::aarch64_sme_readz_x2: {
5461       SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5462                              AArch64::ZA);
5463       return;
5464     }
5465     case Intrinsic::aarch64_sme_readz_x4: {
5466       SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5467                              AArch64::ZA);
5468       return;
5469     }
5470     case Intrinsic::swift_async_context_addr: {
5471       SDLoc DL(Node);
5472       SDValue Chain = Node->getOperand(0);
5473       SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5474       SDValue Res = SDValue(
5475           CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5476                                  CurDAG->getTargetConstant(8, DL, MVT::i32),
5477                                  CurDAG->getTargetConstant(0, DL, MVT::i32)),
5478           0);
5479       ReplaceUses(SDValue(Node, 0), Res);
5480       ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5481       CurDAG->RemoveDeadNode(Node);
5482 
5483       auto &MF = CurDAG->getMachineFunction();
5484       MF.getFrameInfo().setFrameAddressIsTaken(true);
5485       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5486       return;
5487     }
5488     case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5489       if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5490               Node->getValueType(0),
5491               {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5492                AArch64::LUTI2_4ZTZI_S}))
5493         // Second Immediate must be <= 3:
5494         SelectMultiVectorLuti(Node, 4, Opc, 3);
5495       return;
5496     }
5497     case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5498       if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5499               Node->getValueType(0),
5500               {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5501         // Second Immediate must be <= 1:
5502         SelectMultiVectorLuti(Node, 4, Opc, 1);
5503       return;
5504     }
5505     case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5506       if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5507               Node->getValueType(0),
5508               {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5509                AArch64::LUTI2_2ZTZI_S}))
5510         // Second Immediate must be <= 7:
5511         SelectMultiVectorLuti(Node, 2, Opc, 7);
5512       return;
5513     }
5514     case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5515       if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5516               Node->getValueType(0),
5517               {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5518                AArch64::LUTI4_2ZTZI_S}))
5519         // Second Immediate must be <= 3:
5520         SelectMultiVectorLuti(Node, 2, Opc, 3);
5521       return;
5522     }
5523     }
5524   } break;
5525   case ISD::INTRINSIC_WO_CHAIN: {
5526     unsigned IntNo = Node->getConstantOperandVal(0);
5527     switch (IntNo) {
5528     default:
5529       break;
5530     case Intrinsic::aarch64_tagp:
5531       SelectTagP(Node);
5532       return;
5533 
5534     case Intrinsic::ptrauth_auth:
5535       SelectPtrauthAuth(Node);
5536       return;
5537 
5538     case Intrinsic::ptrauth_resign:
5539       SelectPtrauthResign(Node);
5540       return;
5541 
5542     case Intrinsic::aarch64_neon_tbl2:
5543       SelectTable(Node, 2,
5544                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5545                   false);
5546       return;
5547     case Intrinsic::aarch64_neon_tbl3:
5548       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5549                                            : AArch64::TBLv16i8Three,
5550                   false);
5551       return;
5552     case Intrinsic::aarch64_neon_tbl4:
5553       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5554                                            : AArch64::TBLv16i8Four,
5555                   false);
5556       return;
5557     case Intrinsic::aarch64_neon_tbx2:
5558       SelectTable(Node, 2,
5559                   VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5560                   true);
5561       return;
5562     case Intrinsic::aarch64_neon_tbx3:
5563       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5564                                            : AArch64::TBXv16i8Three,
5565                   true);
5566       return;
5567     case Intrinsic::aarch64_neon_tbx4:
5568       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5569                                            : AArch64::TBXv16i8Four,
5570                   true);
5571       return;
5572     case Intrinsic::aarch64_sve_srshl_single_x2:
5573       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5574               Node->getValueType(0),
5575               {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5576                AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5577         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5578       return;
5579     case Intrinsic::aarch64_sve_srshl_single_x4:
5580       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5581               Node->getValueType(0),
5582               {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5583                AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5584         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5585       return;
5586     case Intrinsic::aarch64_sve_urshl_single_x2:
5587       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5588               Node->getValueType(0),
5589               {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5590                AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5591         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5592       return;
5593     case Intrinsic::aarch64_sve_urshl_single_x4:
5594       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5595               Node->getValueType(0),
5596               {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5597                AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5598         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5599       return;
5600     case Intrinsic::aarch64_sve_srshl_x2:
5601       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5602               Node->getValueType(0),
5603               {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5604                AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5605         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5606       return;
5607     case Intrinsic::aarch64_sve_srshl_x4:
5608       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5609               Node->getValueType(0),
5610               {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5611                AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5612         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5613       return;
5614     case Intrinsic::aarch64_sve_urshl_x2:
5615       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5616               Node->getValueType(0),
5617               {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5618                AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5619         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5620       return;
5621     case Intrinsic::aarch64_sve_urshl_x4:
5622       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5623               Node->getValueType(0),
5624               {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5625                AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5626         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5627       return;
5628     case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5629       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5630               Node->getValueType(0),
5631               {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5632                AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5633         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5634       return;
5635     case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5636       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5637               Node->getValueType(0),
5638               {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5639                AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5640         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5641       return;
5642     case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5643       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5644               Node->getValueType(0),
5645               {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5646                AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5647         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5648       return;
5649     case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5650       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5651               Node->getValueType(0),
5652               {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5653                AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5654         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5655       return;
5656     case Intrinsic::aarch64_sve_whilege_x2:
5657       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5658               Node->getValueType(0),
5659               {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5660                AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5661         SelectWhilePair(Node, Op);
5662       return;
5663     case Intrinsic::aarch64_sve_whilegt_x2:
5664       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5665               Node->getValueType(0),
5666               {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5667                AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5668         SelectWhilePair(Node, Op);
5669       return;
5670     case Intrinsic::aarch64_sve_whilehi_x2:
5671       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5672               Node->getValueType(0),
5673               {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5674                AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5675         SelectWhilePair(Node, Op);
5676       return;
5677     case Intrinsic::aarch64_sve_whilehs_x2:
5678       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5679               Node->getValueType(0),
5680               {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5681                AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5682         SelectWhilePair(Node, Op);
5683       return;
5684     case Intrinsic::aarch64_sve_whilele_x2:
5685       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5686               Node->getValueType(0),
5687               {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5688                AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5689       SelectWhilePair(Node, Op);
5690       return;
5691     case Intrinsic::aarch64_sve_whilelo_x2:
5692       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5693               Node->getValueType(0),
5694               {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5695                AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5696       SelectWhilePair(Node, Op);
5697       return;
5698     case Intrinsic::aarch64_sve_whilels_x2:
5699       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5700               Node->getValueType(0),
5701               {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5702                AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5703         SelectWhilePair(Node, Op);
5704       return;
5705     case Intrinsic::aarch64_sve_whilelt_x2:
5706       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5707               Node->getValueType(0),
5708               {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5709                AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5710         SelectWhilePair(Node, Op);
5711       return;
5712     case Intrinsic::aarch64_sve_smax_single_x2:
5713       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5714               Node->getValueType(0),
5715               {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5716                AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5717         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5718       return;
5719     case Intrinsic::aarch64_sve_umax_single_x2:
5720       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5721               Node->getValueType(0),
5722               {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5723                AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5724         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5725       return;
5726     case Intrinsic::aarch64_sve_fmax_single_x2:
5727       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5728               Node->getValueType(0),
5729               {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5730                AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5731         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5732       return;
5733     case Intrinsic::aarch64_sve_smax_single_x4:
5734       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5735               Node->getValueType(0),
5736               {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5737                AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5738         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5739       return;
5740     case Intrinsic::aarch64_sve_umax_single_x4:
5741       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5742               Node->getValueType(0),
5743               {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5744                AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5745         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5746       return;
5747     case Intrinsic::aarch64_sve_fmax_single_x4:
5748       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5749               Node->getValueType(0),
5750               {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5751                AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5752         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5753       return;
5754     case Intrinsic::aarch64_sve_smin_single_x2:
5755       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5756               Node->getValueType(0),
5757               {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5758                AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5759         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5760       return;
5761     case Intrinsic::aarch64_sve_umin_single_x2:
5762       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5763               Node->getValueType(0),
5764               {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5765                AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5766         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5767       return;
5768     case Intrinsic::aarch64_sve_fmin_single_x2:
5769       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5770               Node->getValueType(0),
5771               {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5772                AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5773         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5774       return;
5775     case Intrinsic::aarch64_sve_smin_single_x4:
5776       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5777               Node->getValueType(0),
5778               {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5779                AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5780         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5781       return;
5782     case Intrinsic::aarch64_sve_umin_single_x4:
5783       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5784               Node->getValueType(0),
5785               {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5786                AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5787         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5788       return;
5789     case Intrinsic::aarch64_sve_fmin_single_x4:
5790       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5791               Node->getValueType(0),
5792               {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5793                AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5794         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5795       return;
5796     case Intrinsic::aarch64_sve_smax_x2:
5797       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5798               Node->getValueType(0),
5799               {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5800                AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5801         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5802       return;
5803     case Intrinsic::aarch64_sve_umax_x2:
5804       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5805               Node->getValueType(0),
5806               {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5807                AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5808         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5809       return;
5810     case Intrinsic::aarch64_sve_fmax_x2:
5811       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5812               Node->getValueType(0),
5813               {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5814                AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5815         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5816       return;
5817     case Intrinsic::aarch64_sve_smax_x4:
5818       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5819               Node->getValueType(0),
5820               {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5821                AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5822         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5823       return;
5824     case Intrinsic::aarch64_sve_umax_x4:
5825       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5826               Node->getValueType(0),
5827               {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5828                AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5829         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5830       return;
5831     case Intrinsic::aarch64_sve_fmax_x4:
5832       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5833               Node->getValueType(0),
5834               {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5835                AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5836         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5837       return;
5838     case Intrinsic::aarch64_sve_smin_x2:
5839       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5840               Node->getValueType(0),
5841               {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5842                AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5843         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5844       return;
5845     case Intrinsic::aarch64_sve_umin_x2:
5846       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847               Node->getValueType(0),
5848               {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5849                AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5850         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5851       return;
5852     case Intrinsic::aarch64_sve_fmin_x2:
5853       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5854               Node->getValueType(0),
5855               {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5856                AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5857         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5858       return;
5859     case Intrinsic::aarch64_sve_smin_x4:
5860       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5861               Node->getValueType(0),
5862               {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5863                AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5864         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5865       return;
5866     case Intrinsic::aarch64_sve_umin_x4:
5867       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5868               Node->getValueType(0),
5869               {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5870                AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5871         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872       return;
5873     case Intrinsic::aarch64_sve_fmin_x4:
5874       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5875               Node->getValueType(0),
5876               {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5877                AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5878         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5879       return;
5880     case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5881       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5882               Node->getValueType(0),
5883               {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5884                AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5885         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5886       return;
5887     case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5888       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5889               Node->getValueType(0),
5890               {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5891                AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5892         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5893       return;
5894     case Intrinsic::aarch64_sve_fminnm_single_x2:
5895       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5896               Node->getValueType(0),
5897               {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5898                AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5899         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5900       return;
5901     case Intrinsic::aarch64_sve_fminnm_single_x4:
5902       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5903               Node->getValueType(0),
5904               {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5905                AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5906         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5907       return;
5908     case Intrinsic::aarch64_sve_fmaxnm_x2:
5909       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5910               Node->getValueType(0),
5911               {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5912                AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5913         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5914       return;
5915     case Intrinsic::aarch64_sve_fmaxnm_x4:
5916       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917               Node->getValueType(0),
5918               {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5919                AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5920         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5921       return;
5922     case Intrinsic::aarch64_sve_fminnm_x2:
5923       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5924               Node->getValueType(0),
5925               {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5926                AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5927         SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5928       return;
5929     case Intrinsic::aarch64_sve_fminnm_x4:
5930       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5931               Node->getValueType(0),
5932               {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5933                AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5934         SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5935       return;
5936     case Intrinsic::aarch64_sve_fcvtzs_x2:
5937       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5938       return;
5939     case Intrinsic::aarch64_sve_scvtf_x2:
5940       SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5941       return;
5942     case Intrinsic::aarch64_sve_fcvtzu_x2:
5943       SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5944       return;
5945     case Intrinsic::aarch64_sve_ucvtf_x2:
5946       SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5947       return;
5948     case Intrinsic::aarch64_sve_fcvtzs_x4:
5949       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5950       return;
5951     case Intrinsic::aarch64_sve_scvtf_x4:
5952       SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5953       return;
5954     case Intrinsic::aarch64_sve_fcvtzu_x4:
5955       SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5956       return;
5957     case Intrinsic::aarch64_sve_ucvtf_x4:
5958       SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5959       return;
5960     case Intrinsic::aarch64_sve_fcvt_widen_x2:
5961       SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5962       return;
5963     case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5964       SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5965       return;
5966     case Intrinsic::aarch64_sve_sclamp_single_x2:
5967       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5968               Node->getValueType(0),
5969               {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5970                AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5971         SelectClamp(Node, 2, Op);
5972       return;
5973     case Intrinsic::aarch64_sve_uclamp_single_x2:
5974       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5975               Node->getValueType(0),
5976               {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5977                AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5978         SelectClamp(Node, 2, Op);
5979       return;
5980     case Intrinsic::aarch64_sve_fclamp_single_x2:
5981       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5982               Node->getValueType(0),
5983               {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5984                AArch64::FCLAMP_VG2_2Z2Z_D}))
5985         SelectClamp(Node, 2, Op);
5986       return;
5987     case Intrinsic::aarch64_sve_bfclamp_single_x2:
5988       SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5989       return;
5990     case Intrinsic::aarch64_sve_sclamp_single_x4:
5991       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992               Node->getValueType(0),
5993               {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5994                AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5995         SelectClamp(Node, 4, Op);
5996       return;
5997     case Intrinsic::aarch64_sve_uclamp_single_x4:
5998       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999               Node->getValueType(0),
6000               {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6001                AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6002         SelectClamp(Node, 4, Op);
6003       return;
6004     case Intrinsic::aarch64_sve_fclamp_single_x4:
6005       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006               Node->getValueType(0),
6007               {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6008                AArch64::FCLAMP_VG4_4Z4Z_D}))
6009         SelectClamp(Node, 4, Op);
6010       return;
6011     case Intrinsic::aarch64_sve_bfclamp_single_x4:
6012       SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6013       return;
6014     case Intrinsic::aarch64_sve_add_single_x2:
6015       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6016               Node->getValueType(0),
6017               {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6018                AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6019         SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6020       return;
6021     case Intrinsic::aarch64_sve_add_single_x4:
6022       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6023               Node->getValueType(0),
6024               {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6025                AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6026         SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6027       return;
6028     case Intrinsic::aarch64_sve_zip_x2:
6029       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6030               Node->getValueType(0),
6031               {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6032                AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6033         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6034       return;
6035     case Intrinsic::aarch64_sve_zipq_x2:
6036       SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6037                                 AArch64::ZIP_VG2_2ZZZ_Q);
6038       return;
6039     case Intrinsic::aarch64_sve_zip_x4:
6040       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6041               Node->getValueType(0),
6042               {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6043                AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6044         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6045       return;
6046     case Intrinsic::aarch64_sve_zipq_x4:
6047       SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6048                                 AArch64::ZIP_VG4_4Z4Z_Q);
6049       return;
6050     case Intrinsic::aarch64_sve_uzp_x2:
6051       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6052               Node->getValueType(0),
6053               {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6054                AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6055         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6056       return;
6057     case Intrinsic::aarch64_sve_uzpq_x2:
6058       SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6059                                 AArch64::UZP_VG2_2ZZZ_Q);
6060       return;
6061     case Intrinsic::aarch64_sve_uzp_x4:
6062       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6063               Node->getValueType(0),
6064               {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6065                AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6066         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6067       return;
6068     case Intrinsic::aarch64_sve_uzpq_x4:
6069       SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6070                                 AArch64::UZP_VG4_4Z4Z_Q);
6071       return;
6072     case Intrinsic::aarch64_sve_sel_x2:
6073       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6074               Node->getValueType(0),
6075               {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6076                AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6077         SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6078       return;
6079     case Intrinsic::aarch64_sve_sel_x4:
6080       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6081               Node->getValueType(0),
6082               {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6083                AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6084         SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6085       return;
6086     case Intrinsic::aarch64_sve_frinta_x2:
6087       SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6088       return;
6089     case Intrinsic::aarch64_sve_frinta_x4:
6090       SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6091       return;
6092     case Intrinsic::aarch64_sve_frintm_x2:
6093       SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6094       return;
6095     case Intrinsic::aarch64_sve_frintm_x4:
6096       SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6097       return;
6098     case Intrinsic::aarch64_sve_frintn_x2:
6099       SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6100       return;
6101     case Intrinsic::aarch64_sve_frintn_x4:
6102       SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6103       return;
6104     case Intrinsic::aarch64_sve_frintp_x2:
6105       SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6106       return;
6107     case Intrinsic::aarch64_sve_frintp_x4:
6108       SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6109       return;
6110     case Intrinsic::aarch64_sve_sunpk_x2:
6111       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6112               Node->getValueType(0),
6113               {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6114                AArch64::SUNPK_VG2_2ZZ_D}))
6115         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6116       return;
6117     case Intrinsic::aarch64_sve_uunpk_x2:
6118       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6119               Node->getValueType(0),
6120               {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6121                AArch64::UUNPK_VG2_2ZZ_D}))
6122         SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6123       return;
6124     case Intrinsic::aarch64_sve_sunpk_x4:
6125       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6126               Node->getValueType(0),
6127               {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6128                AArch64::SUNPK_VG4_4Z2Z_D}))
6129         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6130       return;
6131     case Intrinsic::aarch64_sve_uunpk_x4:
6132       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6133               Node->getValueType(0),
6134               {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6135                AArch64::UUNPK_VG4_4Z2Z_D}))
6136         SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6137       return;
6138     case Intrinsic::aarch64_sve_pext_x2: {
6139       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6140               Node->getValueType(0),
6141               {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6142                AArch64::PEXT_2PCI_D}))
6143         SelectPExtPair(Node, Op);
6144       return;
6145     }
6146     }
6147     break;
6148   }
6149   case ISD::INTRINSIC_VOID: {
6150     unsigned IntNo = Node->getConstantOperandVal(1);
6151     if (Node->getNumOperands() >= 3)
6152       VT = Node->getOperand(2)->getValueType(0);
6153     switch (IntNo) {
6154     default:
6155       break;
6156     case Intrinsic::aarch64_neon_st1x2: {
6157       if (VT == MVT::v8i8) {
6158         SelectStore(Node, 2, AArch64::ST1Twov8b);
6159         return;
6160       } else if (VT == MVT::v16i8) {
6161         SelectStore(Node, 2, AArch64::ST1Twov16b);
6162         return;
6163       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6164                  VT == MVT::v4bf16) {
6165         SelectStore(Node, 2, AArch64::ST1Twov4h);
6166         return;
6167       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6168                  VT == MVT::v8bf16) {
6169         SelectStore(Node, 2, AArch64::ST1Twov8h);
6170         return;
6171       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6172         SelectStore(Node, 2, AArch64::ST1Twov2s);
6173         return;
6174       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6175         SelectStore(Node, 2, AArch64::ST1Twov4s);
6176         return;
6177       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6178         SelectStore(Node, 2, AArch64::ST1Twov2d);
6179         return;
6180       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6181         SelectStore(Node, 2, AArch64::ST1Twov1d);
6182         return;
6183       }
6184       break;
6185     }
6186     case Intrinsic::aarch64_neon_st1x3: {
6187       if (VT == MVT::v8i8) {
6188         SelectStore(Node, 3, AArch64::ST1Threev8b);
6189         return;
6190       } else if (VT == MVT::v16i8) {
6191         SelectStore(Node, 3, AArch64::ST1Threev16b);
6192         return;
6193       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6194                  VT == MVT::v4bf16) {
6195         SelectStore(Node, 3, AArch64::ST1Threev4h);
6196         return;
6197       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6198                  VT == MVT::v8bf16) {
6199         SelectStore(Node, 3, AArch64::ST1Threev8h);
6200         return;
6201       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6202         SelectStore(Node, 3, AArch64::ST1Threev2s);
6203         return;
6204       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6205         SelectStore(Node, 3, AArch64::ST1Threev4s);
6206         return;
6207       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6208         SelectStore(Node, 3, AArch64::ST1Threev2d);
6209         return;
6210       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6211         SelectStore(Node, 3, AArch64::ST1Threev1d);
6212         return;
6213       }
6214       break;
6215     }
6216     case Intrinsic::aarch64_neon_st1x4: {
6217       if (VT == MVT::v8i8) {
6218         SelectStore(Node, 4, AArch64::ST1Fourv8b);
6219         return;
6220       } else if (VT == MVT::v16i8) {
6221         SelectStore(Node, 4, AArch64::ST1Fourv16b);
6222         return;
6223       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6224                  VT == MVT::v4bf16) {
6225         SelectStore(Node, 4, AArch64::ST1Fourv4h);
6226         return;
6227       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6228                  VT == MVT::v8bf16) {
6229         SelectStore(Node, 4, AArch64::ST1Fourv8h);
6230         return;
6231       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6232         SelectStore(Node, 4, AArch64::ST1Fourv2s);
6233         return;
6234       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6235         SelectStore(Node, 4, AArch64::ST1Fourv4s);
6236         return;
6237       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6238         SelectStore(Node, 4, AArch64::ST1Fourv2d);
6239         return;
6240       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6241         SelectStore(Node, 4, AArch64::ST1Fourv1d);
6242         return;
6243       }
6244       break;
6245     }
6246     case Intrinsic::aarch64_neon_st2: {
6247       if (VT == MVT::v8i8) {
6248         SelectStore(Node, 2, AArch64::ST2Twov8b);
6249         return;
6250       } else if (VT == MVT::v16i8) {
6251         SelectStore(Node, 2, AArch64::ST2Twov16b);
6252         return;
6253       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6254                  VT == MVT::v4bf16) {
6255         SelectStore(Node, 2, AArch64::ST2Twov4h);
6256         return;
6257       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6258                  VT == MVT::v8bf16) {
6259         SelectStore(Node, 2, AArch64::ST2Twov8h);
6260         return;
6261       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6262         SelectStore(Node, 2, AArch64::ST2Twov2s);
6263         return;
6264       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6265         SelectStore(Node, 2, AArch64::ST2Twov4s);
6266         return;
6267       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6268         SelectStore(Node, 2, AArch64::ST2Twov2d);
6269         return;
6270       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6271         SelectStore(Node, 2, AArch64::ST1Twov1d);
6272         return;
6273       }
6274       break;
6275     }
6276     case Intrinsic::aarch64_neon_st3: {
6277       if (VT == MVT::v8i8) {
6278         SelectStore(Node, 3, AArch64::ST3Threev8b);
6279         return;
6280       } else if (VT == MVT::v16i8) {
6281         SelectStore(Node, 3, AArch64::ST3Threev16b);
6282         return;
6283       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6284                  VT == MVT::v4bf16) {
6285         SelectStore(Node, 3, AArch64::ST3Threev4h);
6286         return;
6287       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6288                  VT == MVT::v8bf16) {
6289         SelectStore(Node, 3, AArch64::ST3Threev8h);
6290         return;
6291       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6292         SelectStore(Node, 3, AArch64::ST3Threev2s);
6293         return;
6294       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6295         SelectStore(Node, 3, AArch64::ST3Threev4s);
6296         return;
6297       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6298         SelectStore(Node, 3, AArch64::ST3Threev2d);
6299         return;
6300       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6301         SelectStore(Node, 3, AArch64::ST1Threev1d);
6302         return;
6303       }
6304       break;
6305     }
6306     case Intrinsic::aarch64_neon_st4: {
6307       if (VT == MVT::v8i8) {
6308         SelectStore(Node, 4, AArch64::ST4Fourv8b);
6309         return;
6310       } else if (VT == MVT::v16i8) {
6311         SelectStore(Node, 4, AArch64::ST4Fourv16b);
6312         return;
6313       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6314                  VT == MVT::v4bf16) {
6315         SelectStore(Node, 4, AArch64::ST4Fourv4h);
6316         return;
6317       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6318                  VT == MVT::v8bf16) {
6319         SelectStore(Node, 4, AArch64::ST4Fourv8h);
6320         return;
6321       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6322         SelectStore(Node, 4, AArch64::ST4Fourv2s);
6323         return;
6324       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6325         SelectStore(Node, 4, AArch64::ST4Fourv4s);
6326         return;
6327       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6328         SelectStore(Node, 4, AArch64::ST4Fourv2d);
6329         return;
6330       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6331         SelectStore(Node, 4, AArch64::ST1Fourv1d);
6332         return;
6333       }
6334       break;
6335     }
6336     case Intrinsic::aarch64_neon_st2lane: {
6337       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6338         SelectStoreLane(Node, 2, AArch64::ST2i8);
6339         return;
6340       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6341                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6342         SelectStoreLane(Node, 2, AArch64::ST2i16);
6343         return;
6344       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6345                  VT == MVT::v2f32) {
6346         SelectStoreLane(Node, 2, AArch64::ST2i32);
6347         return;
6348       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6349                  VT == MVT::v1f64) {
6350         SelectStoreLane(Node, 2, AArch64::ST2i64);
6351         return;
6352       }
6353       break;
6354     }
6355     case Intrinsic::aarch64_neon_st3lane: {
6356       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6357         SelectStoreLane(Node, 3, AArch64::ST3i8);
6358         return;
6359       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6360                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6361         SelectStoreLane(Node, 3, AArch64::ST3i16);
6362         return;
6363       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6364                  VT == MVT::v2f32) {
6365         SelectStoreLane(Node, 3, AArch64::ST3i32);
6366         return;
6367       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6368                  VT == MVT::v1f64) {
6369         SelectStoreLane(Node, 3, AArch64::ST3i64);
6370         return;
6371       }
6372       break;
6373     }
6374     case Intrinsic::aarch64_neon_st4lane: {
6375       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6376         SelectStoreLane(Node, 4, AArch64::ST4i8);
6377         return;
6378       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6379                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6380         SelectStoreLane(Node, 4, AArch64::ST4i16);
6381         return;
6382       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6383                  VT == MVT::v2f32) {
6384         SelectStoreLane(Node, 4, AArch64::ST4i32);
6385         return;
6386       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6387                  VT == MVT::v1f64) {
6388         SelectStoreLane(Node, 4, AArch64::ST4i64);
6389         return;
6390       }
6391       break;
6392     }
6393     case Intrinsic::aarch64_sve_st2q: {
6394       SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6395       return;
6396     }
6397     case Intrinsic::aarch64_sve_st3q: {
6398       SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6399       return;
6400     }
6401     case Intrinsic::aarch64_sve_st4q: {
6402       SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6403       return;
6404     }
6405     case Intrinsic::aarch64_sve_st2: {
6406       if (VT == MVT::nxv16i8) {
6407         SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6408         return;
6409       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6410                  VT == MVT::nxv8bf16) {
6411         SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6412         return;
6413       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6414         SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6415         return;
6416       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6417         SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6418         return;
6419       }
6420       break;
6421     }
6422     case Intrinsic::aarch64_sve_st3: {
6423       if (VT == MVT::nxv16i8) {
6424         SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6425         return;
6426       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6427                  VT == MVT::nxv8bf16) {
6428         SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6429         return;
6430       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6431         SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6432         return;
6433       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6434         SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6435         return;
6436       }
6437       break;
6438     }
6439     case Intrinsic::aarch64_sve_st4: {
6440       if (VT == MVT::nxv16i8) {
6441         SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6442         return;
6443       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6444                  VT == MVT::nxv8bf16) {
6445         SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6446         return;
6447       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6448         SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6449         return;
6450       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6451         SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6452         return;
6453       }
6454       break;
6455     }
6456     }
6457     break;
6458   }
6459   case AArch64ISD::LD2post: {
6460     if (VT == MVT::v8i8) {
6461       SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6462       return;
6463     } else if (VT == MVT::v16i8) {
6464       SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6465       return;
6466     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6467       SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6468       return;
6469     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6470       SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6471       return;
6472     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6473       SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6474       return;
6475     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6476       SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6477       return;
6478     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6479       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6480       return;
6481     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6482       SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6483       return;
6484     }
6485     break;
6486   }
6487   case AArch64ISD::LD3post: {
6488     if (VT == MVT::v8i8) {
6489       SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6490       return;
6491     } else if (VT == MVT::v16i8) {
6492       SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6493       return;
6494     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6495       SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6496       return;
6497     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6498       SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6499       return;
6500     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6501       SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6502       return;
6503     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6504       SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6505       return;
6506     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6507       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6508       return;
6509     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6510       SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6511       return;
6512     }
6513     break;
6514   }
6515   case AArch64ISD::LD4post: {
6516     if (VT == MVT::v8i8) {
6517       SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6518       return;
6519     } else if (VT == MVT::v16i8) {
6520       SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6521       return;
6522     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6523       SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6524       return;
6525     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6526       SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6527       return;
6528     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6529       SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6530       return;
6531     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6532       SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6533       return;
6534     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6535       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6536       return;
6537     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6538       SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6539       return;
6540     }
6541     break;
6542   }
6543   case AArch64ISD::LD1x2post: {
6544     if (VT == MVT::v8i8) {
6545       SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6546       return;
6547     } else if (VT == MVT::v16i8) {
6548       SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6549       return;
6550     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6551       SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6552       return;
6553     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6554       SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6555       return;
6556     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6557       SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6558       return;
6559     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6560       SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6561       return;
6562     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6563       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6564       return;
6565     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6566       SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6567       return;
6568     }
6569     break;
6570   }
6571   case AArch64ISD::LD1x3post: {
6572     if (VT == MVT::v8i8) {
6573       SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6574       return;
6575     } else if (VT == MVT::v16i8) {
6576       SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6577       return;
6578     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6579       SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6580       return;
6581     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6582       SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6583       return;
6584     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6585       SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6586       return;
6587     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6588       SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6589       return;
6590     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6591       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6592       return;
6593     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6594       SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6595       return;
6596     }
6597     break;
6598   }
6599   case AArch64ISD::LD1x4post: {
6600     if (VT == MVT::v8i8) {
6601       SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6602       return;
6603     } else if (VT == MVT::v16i8) {
6604       SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6605       return;
6606     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6607       SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6608       return;
6609     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6610       SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6611       return;
6612     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6613       SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6614       return;
6615     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6616       SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6617       return;
6618     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6619       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6620       return;
6621     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6622       SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6623       return;
6624     }
6625     break;
6626   }
6627   case AArch64ISD::LD1DUPpost: {
6628     if (VT == MVT::v8i8) {
6629       SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6630       return;
6631     } else if (VT == MVT::v16i8) {
6632       SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6633       return;
6634     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6635       SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6636       return;
6637     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6638       SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6639       return;
6640     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6641       SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6642       return;
6643     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6644       SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6645       return;
6646     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6647       SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6648       return;
6649     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6650       SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6651       return;
6652     }
6653     break;
6654   }
6655   case AArch64ISD::LD2DUPpost: {
6656     if (VT == MVT::v8i8) {
6657       SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6658       return;
6659     } else if (VT == MVT::v16i8) {
6660       SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6661       return;
6662     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6663       SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6664       return;
6665     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6666       SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6667       return;
6668     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6669       SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6670       return;
6671     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6672       SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6673       return;
6674     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6675       SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6676       return;
6677     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6678       SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6679       return;
6680     }
6681     break;
6682   }
6683   case AArch64ISD::LD3DUPpost: {
6684     if (VT == MVT::v8i8) {
6685       SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6686       return;
6687     } else if (VT == MVT::v16i8) {
6688       SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6689       return;
6690     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6691       SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6692       return;
6693     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6694       SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6695       return;
6696     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6697       SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6698       return;
6699     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6700       SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6701       return;
6702     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6703       SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6704       return;
6705     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6706       SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6707       return;
6708     }
6709     break;
6710   }
6711   case AArch64ISD::LD4DUPpost: {
6712     if (VT == MVT::v8i8) {
6713       SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6714       return;
6715     } else if (VT == MVT::v16i8) {
6716       SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6717       return;
6718     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6719       SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6720       return;
6721     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6722       SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6723       return;
6724     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6725       SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6726       return;
6727     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6728       SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6729       return;
6730     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6731       SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6732       return;
6733     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6734       SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6735       return;
6736     }
6737     break;
6738   }
6739   case AArch64ISD::LD1LANEpost: {
6740     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6741       SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6742       return;
6743     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6744                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6745       SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6746       return;
6747     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6748                VT == MVT::v2f32) {
6749       SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6750       return;
6751     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6752                VT == MVT::v1f64) {
6753       SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6754       return;
6755     }
6756     break;
6757   }
6758   case AArch64ISD::LD2LANEpost: {
6759     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6760       SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6761       return;
6762     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6763                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6764       SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6765       return;
6766     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6767                VT == MVT::v2f32) {
6768       SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6769       return;
6770     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6771                VT == MVT::v1f64) {
6772       SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6773       return;
6774     }
6775     break;
6776   }
6777   case AArch64ISD::LD3LANEpost: {
6778     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6779       SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6780       return;
6781     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6782                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6783       SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6784       return;
6785     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6786                VT == MVT::v2f32) {
6787       SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6788       return;
6789     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6790                VT == MVT::v1f64) {
6791       SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6792       return;
6793     }
6794     break;
6795   }
6796   case AArch64ISD::LD4LANEpost: {
6797     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6798       SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6799       return;
6800     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6801                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6802       SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6803       return;
6804     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6805                VT == MVT::v2f32) {
6806       SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6807       return;
6808     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6809                VT == MVT::v1f64) {
6810       SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6811       return;
6812     }
6813     break;
6814   }
6815   case AArch64ISD::ST2post: {
6816     VT = Node->getOperand(1).getValueType();
6817     if (VT == MVT::v8i8) {
6818       SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6819       return;
6820     } else if (VT == MVT::v16i8) {
6821       SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6822       return;
6823     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6824       SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6825       return;
6826     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6827       SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6828       return;
6829     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6830       SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6831       return;
6832     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6833       SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6834       return;
6835     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6836       SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6837       return;
6838     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6839       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6840       return;
6841     }
6842     break;
6843   }
6844   case AArch64ISD::ST3post: {
6845     VT = Node->getOperand(1).getValueType();
6846     if (VT == MVT::v8i8) {
6847       SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6848       return;
6849     } else if (VT == MVT::v16i8) {
6850       SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6851       return;
6852     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6853       SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6854       return;
6855     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6856       SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6857       return;
6858     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6859       SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6860       return;
6861     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6862       SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6863       return;
6864     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6865       SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6866       return;
6867     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6868       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6869       return;
6870     }
6871     break;
6872   }
6873   case AArch64ISD::ST4post: {
6874     VT = Node->getOperand(1).getValueType();
6875     if (VT == MVT::v8i8) {
6876       SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6877       return;
6878     } else if (VT == MVT::v16i8) {
6879       SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6880       return;
6881     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6882       SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6883       return;
6884     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6885       SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6886       return;
6887     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6888       SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6889       return;
6890     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6891       SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6892       return;
6893     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6894       SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6895       return;
6896     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6897       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6898       return;
6899     }
6900     break;
6901   }
6902   case AArch64ISD::ST1x2post: {
6903     VT = Node->getOperand(1).getValueType();
6904     if (VT == MVT::v8i8) {
6905       SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6906       return;
6907     } else if (VT == MVT::v16i8) {
6908       SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6909       return;
6910     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6911       SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6912       return;
6913     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6914       SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6915       return;
6916     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6917       SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6918       return;
6919     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6920       SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6921       return;
6922     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6923       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6924       return;
6925     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6926       SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6927       return;
6928     }
6929     break;
6930   }
6931   case AArch64ISD::ST1x3post: {
6932     VT = Node->getOperand(1).getValueType();
6933     if (VT == MVT::v8i8) {
6934       SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6935       return;
6936     } else if (VT == MVT::v16i8) {
6937       SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6938       return;
6939     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6940       SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6941       return;
6942     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6943       SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6944       return;
6945     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6946       SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6947       return;
6948     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6949       SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6950       return;
6951     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6952       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6953       return;
6954     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6955       SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6956       return;
6957     }
6958     break;
6959   }
6960   case AArch64ISD::ST1x4post: {
6961     VT = Node->getOperand(1).getValueType();
6962     if (VT == MVT::v8i8) {
6963       SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6964       return;
6965     } else if (VT == MVT::v16i8) {
6966       SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6967       return;
6968     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6969       SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6970       return;
6971     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6972       SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6973       return;
6974     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6975       SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6976       return;
6977     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6978       SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6979       return;
6980     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6981       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6982       return;
6983     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6984       SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6985       return;
6986     }
6987     break;
6988   }
6989   case AArch64ISD::ST2LANEpost: {
6990     VT = Node->getOperand(1).getValueType();
6991     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6992       SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6993       return;
6994     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6995                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6996       SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6997       return;
6998     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6999                VT == MVT::v2f32) {
7000       SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7001       return;
7002     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7003                VT == MVT::v1f64) {
7004       SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7005       return;
7006     }
7007     break;
7008   }
7009   case AArch64ISD::ST3LANEpost: {
7010     VT = Node->getOperand(1).getValueType();
7011     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7012       SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7013       return;
7014     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7015                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7016       SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7017       return;
7018     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7019                VT == MVT::v2f32) {
7020       SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7021       return;
7022     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7023                VT == MVT::v1f64) {
7024       SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7025       return;
7026     }
7027     break;
7028   }
7029   case AArch64ISD::ST4LANEpost: {
7030     VT = Node->getOperand(1).getValueType();
7031     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7032       SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7033       return;
7034     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7035                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7036       SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7037       return;
7038     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7039                VT == MVT::v2f32) {
7040       SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7041       return;
7042     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7043                VT == MVT::v1f64) {
7044       SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7045       return;
7046     }
7047     break;
7048   }
7049   case AArch64ISD::SVE_LD2_MERGE_ZERO: {
7050     if (VT == MVT::nxv16i8) {
7051       SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7052       return;
7053     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7054                VT == MVT::nxv8bf16) {
7055       SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7056       return;
7057     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7058       SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7059       return;
7060     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7061       SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7062       return;
7063     }
7064     break;
7065   }
7066   case AArch64ISD::SVE_LD3_MERGE_ZERO: {
7067     if (VT == MVT::nxv16i8) {
7068       SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7069       return;
7070     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7071                VT == MVT::nxv8bf16) {
7072       SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7073       return;
7074     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7075       SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7076       return;
7077     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7078       SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7079       return;
7080     }
7081     break;
7082   }
7083   case AArch64ISD::SVE_LD4_MERGE_ZERO: {
7084     if (VT == MVT::nxv16i8) {
7085       SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7086       return;
7087     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7088                VT == MVT::nxv8bf16) {
7089       SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7090       return;
7091     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7092       SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7093       return;
7094     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7095       SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7096       return;
7097     }
7098     break;
7099   }
7100   }
7101 
7102   // Select the default instruction
7103   SelectCode(Node);
7104 }
7105 
7106 /// createAArch64ISelDag - This pass converts a legalized DAG into a
7107 /// AArch64-specific DAG, ready for instruction scheduling.
7108 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7109                                          CodeGenOptLevel OptLevel) {
7110   return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7111 }
7112 
7113 /// When \p PredVT is a scalable vector predicate in the form
7114 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7115 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7116 /// structured vectors (NumVec >1), the output data type is
7117 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7118 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7119 /// EVT.
7120 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7121                                                 unsigned NumVec) {
7122   assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7123   if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7124     return EVT();
7125 
7126   if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7127       PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7128     return EVT();
7129 
7130   ElementCount EC = PredVT.getVectorElementCount();
7131   EVT ScalarVT =
7132       EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7133   EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7134 
7135   return MemVT;
7136 }
7137 
7138 /// Return the EVT of the data associated to a memory operation in \p
7139 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7140 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7141   if (isa<MemSDNode>(Root))
7142     return cast<MemSDNode>(Root)->getMemoryVT();
7143 
7144   if (isa<MemIntrinsicSDNode>(Root))
7145     return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7146 
7147   const unsigned Opcode = Root->getOpcode();
7148   // For custom ISD nodes, we have to look at them individually to extract the
7149   // type of the data moved to/from memory.
7150   switch (Opcode) {
7151   case AArch64ISD::LD1_MERGE_ZERO:
7152   case AArch64ISD::LD1S_MERGE_ZERO:
7153   case AArch64ISD::LDNF1_MERGE_ZERO:
7154   case AArch64ISD::LDNF1S_MERGE_ZERO:
7155     return cast<VTSDNode>(Root->getOperand(3))->getVT();
7156   case AArch64ISD::ST1_PRED:
7157     return cast<VTSDNode>(Root->getOperand(4))->getVT();
7158   case AArch64ISD::SVE_LD2_MERGE_ZERO:
7159     return getPackedVectorTypeFromPredicateType(
7160         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7161   case AArch64ISD::SVE_LD3_MERGE_ZERO:
7162     return getPackedVectorTypeFromPredicateType(
7163         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7164   case AArch64ISD::SVE_LD4_MERGE_ZERO:
7165     return getPackedVectorTypeFromPredicateType(
7166         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7167   default:
7168     break;
7169   }
7170 
7171   if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7172     return EVT();
7173 
7174   switch (Root->getConstantOperandVal(1)) {
7175   default:
7176     return EVT();
7177   case Intrinsic::aarch64_sme_ldr:
7178   case Intrinsic::aarch64_sme_str:
7179     return MVT::nxv16i8;
7180   case Intrinsic::aarch64_sve_prf:
7181     // We are using an SVE prefetch intrinsic. Type must be inferred from the
7182     // width of the predicate.
7183     return getPackedVectorTypeFromPredicateType(
7184         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7185   case Intrinsic::aarch64_sve_ld2_sret:
7186   case Intrinsic::aarch64_sve_ld2q_sret:
7187     return getPackedVectorTypeFromPredicateType(
7188         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7189   case Intrinsic::aarch64_sve_st2q:
7190     return getPackedVectorTypeFromPredicateType(
7191         Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7192   case Intrinsic::aarch64_sve_ld3_sret:
7193   case Intrinsic::aarch64_sve_ld3q_sret:
7194     return getPackedVectorTypeFromPredicateType(
7195         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7196   case Intrinsic::aarch64_sve_st3q:
7197     return getPackedVectorTypeFromPredicateType(
7198         Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7199   case Intrinsic::aarch64_sve_ld4_sret:
7200   case Intrinsic::aarch64_sve_ld4q_sret:
7201     return getPackedVectorTypeFromPredicateType(
7202         Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7203   case Intrinsic::aarch64_sve_st4q:
7204     return getPackedVectorTypeFromPredicateType(
7205         Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7206   case Intrinsic::aarch64_sve_ld1udq:
7207   case Intrinsic::aarch64_sve_st1dq:
7208     return EVT(MVT::nxv1i64);
7209   case Intrinsic::aarch64_sve_ld1uwq:
7210   case Intrinsic::aarch64_sve_st1wq:
7211     return EVT(MVT::nxv1i32);
7212   }
7213 }
7214 
7215 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7216 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7217 /// where Root is the memory access using N for its address.
7218 template <int64_t Min, int64_t Max>
7219 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7220                                                    SDValue &Base,
7221                                                    SDValue &OffImm) {
7222   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7223   const DataLayout &DL = CurDAG->getDataLayout();
7224   const MachineFrameInfo &MFI = MF->getFrameInfo();
7225 
7226   if (N.getOpcode() == ISD::FrameIndex) {
7227     int FI = cast<FrameIndexSDNode>(N)->getIndex();
7228     // We can only encode VL scaled offsets, so only fold in frame indexes
7229     // referencing SVE objects.
7230     if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
7231       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7232       OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7233       return true;
7234     }
7235 
7236     return false;
7237   }
7238 
7239   if (MemVT == EVT())
7240     return false;
7241 
7242   if (N.getOpcode() != ISD::ADD)
7243     return false;
7244 
7245   SDValue VScale = N.getOperand(1);
7246   if (VScale.getOpcode() != ISD::VSCALE)
7247     return false;
7248 
7249   TypeSize TS = MemVT.getSizeInBits();
7250   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7251   int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7252 
7253   if ((MulImm % MemWidthBytes) != 0)
7254     return false;
7255 
7256   int64_t Offset = MulImm / MemWidthBytes;
7257   if (Offset < Min || Offset > Max)
7258     return false;
7259 
7260   Base = N.getOperand(0);
7261   if (Base.getOpcode() == ISD::FrameIndex) {
7262     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7263     // We can only encode VL scaled offsets, so only fold in frame indexes
7264     // referencing SVE objects.
7265     if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
7266       Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7267   }
7268 
7269   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7270   return true;
7271 }
7272 
7273 /// Select register plus register addressing mode for SVE, with scaled
7274 /// offset.
7275 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7276                                                   SDValue &Base,
7277                                                   SDValue &Offset) {
7278   if (N.getOpcode() != ISD::ADD)
7279     return false;
7280 
7281   // Process an ADD node.
7282   const SDValue LHS = N.getOperand(0);
7283   const SDValue RHS = N.getOperand(1);
7284 
7285   // 8 bit data does not come with the SHL node, so it is treated
7286   // separately.
7287   if (Scale == 0) {
7288     Base = LHS;
7289     Offset = RHS;
7290     return true;
7291   }
7292 
7293   if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7294     int64_t ImmOff = C->getSExtValue();
7295     unsigned Size = 1 << Scale;
7296 
7297     // To use the reg+reg addressing mode, the immediate must be a multiple of
7298     // the vector element's byte size.
7299     if (ImmOff % Size)
7300       return false;
7301 
7302     SDLoc DL(N);
7303     Base = LHS;
7304     Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7305     SDValue Ops[] = {Offset};
7306     SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7307     Offset = SDValue(MI, 0);
7308     return true;
7309   }
7310 
7311   // Check if the RHS is a shift node with a constant.
7312   if (RHS.getOpcode() != ISD::SHL)
7313     return false;
7314 
7315   const SDValue ShiftRHS = RHS.getOperand(1);
7316   if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7317     if (C->getZExtValue() == Scale) {
7318       Base = LHS;
7319       Offset = RHS.getOperand(0);
7320       return true;
7321     }
7322 
7323   return false;
7324 }
7325 
7326 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7327   const AArch64TargetLowering *TLI =
7328       static_cast<const AArch64TargetLowering *>(getTargetLowering());
7329 
7330   return TLI->isAllActivePredicate(*CurDAG, N);
7331 }
7332 
7333 bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7334   EVT VT = N.getValueType();
7335   return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7336 }
7337 
7338 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7339                                              SDValue &Base, SDValue &Offset,
7340                                              unsigned Scale) {
7341   // Try to untangle an ADD node into a 'reg + offset'
7342   if (N.getOpcode() == ISD::ADD)
7343     if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7344       int64_t ImmOff = C->getSExtValue();
7345       if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7346         Base = N.getOperand(0);
7347         Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7348         return true;
7349       }
7350     }
7351 
7352   // By default, just match reg + 0.
7353   Base = N;
7354   Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7355   return true;
7356 }
7357