xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
10//
11//===----------------------------------------------------------------------===//
12
13def imm_to_tile8   : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>",  []>;
14def imm_to_tile16  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>",  []>;
15def imm_to_tile32  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>",  []>;
16def imm_to_tile64  : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>",  []>;
17def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>;
18def imm_to_zt      : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0,  0>",  []>;
19
20def tileslice8   : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>;
21def tileslice16  : ComplexPattern<i32 , 2, "SelectSMETileSlice<7,  1>", []>;
22def tileslice32  : ComplexPattern<i32 , 2, "SelectSMETileSlice<3,  1>", []>;
23def tileslice64  : ComplexPattern<i32 , 2, "SelectSMETileSlice<1,  1>", []>;
24def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0,  1>", []>; // nop
25
26def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
27def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6,  2>", []>;
28def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2,  2>", []>;
29def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0,  2>", []>;
30
31def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
32def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4,  4>", []>;
33def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0,  4>", []>;
34
35def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
36
37def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
38def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
39                             [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
40def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore,
41                             [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
42
43//===----------------------------------------------------------------------===//
44// SME Pseudo Classes
45//===----------------------------------------------------------------------===//
46
47def getSMEPseudoMap : InstrMapping {
48  let FilterClass = "SMEPseudo2Instr";
49  let RowFields = ["PseudoName"];
50  let ColFields = ["IsInstr"];
51  let KeyCol = ["0"];
52  let ValueCols = [["1"]];
53}
54
55class SMEPseudo2Instr<string name, bit instr> {
56  string PseudoName = name;
57  bit IsInstr = instr;
58}
59
60class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
61    : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
62                          zpr_ty:$zn, zpr_ty:$zm), []>,
63      Sched<[]> {
64  // Translated to the actual instructions in AArch64ISelLowering.cpp
65  let SMEMatrixType = za_flag;
66  let usesCustomInserter = 1;
67}
68
69class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
70                                            ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
71    : SMEPseudo2Instr<name, 0>,
72      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> {
73  let SMEMatrixType = za_flag;
74  let usesCustomInserter = 1;
75}
76
77class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
78                                           SMEMatrixTypeEnum za_flag>
79    : SMEPseudo2Instr<name, 0>,
80      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> {
81  let SMEMatrixType = za_flag;
82  let usesCustomInserter = 1;
83}
84
85class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
86                                           ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag>
87    : SMEPseudo2Instr<name, 0>,
88      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> {
89  let SMEMatrixType = za_flag;
90  let usesCustomInserter = 1;
91}
92
93class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
94    : SMEPseudo2Instr<name, 0>,
95      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
96  let SMEMatrixType = za_flag;
97  let usesCustomInserter = 1;
98}
99
100class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
101    : SMEPseudo2Instr<name, 0>,
102      Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
103  let SMEMatrixType = za_flag;
104  let usesCustomInserter = 1;
105}
106
107class sem2p1_zero_matrix_pseudo<string name, Operand index_ty, SMEMatrixTypeEnum za_flag>
108    : SMEPseudo2Instr<name, 0>,
109      Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, index_ty:$imm), []> {
110  let SMEMatrixType = za_flag;
111  let usesCustomInserter = 1;
112}
113
114class sme2_movez_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand vector_ty, SMEMatrixTypeEnum za_flag>
115    : SMEPseudo2Instr<name, 0>,
116      Pseudo<(outs vector_ty:$Zn), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm), []> {
117  let SMEMatrixType = za_flag;
118  let usesCustomInserter = 1;
119}
120
121class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
122                                      SMEMatrixTypeEnum za_flag>
123    : SMEPseudo2Instr<name, 0>,
124      Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
125  let SMEMatrixType = za_flag;
126  let usesCustomInserter = 1;
127}
128
129//===----------------------------------------------------------------------===//
130// SME pattern match helpers.
131//===----------------------------------------------------------------------===//
132
133class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
134                                     ValueType vt, ComplexPattern tileslice>
135    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm),
136          (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>;
137
138
139class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
140                                         ValueType vt, ComplexPattern tileslice>
141    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm),
142          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
143                                              zpr_ty:$Zm)>;
144class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty,
145                                         ValueType vt, ComplexPattern tileslice>
146    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
147                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm),
148          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
149                                              (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
150                                              zpr_ty:$Zm)>;
151
152class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
153    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2),
154          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
155                                              (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1),
156                                              (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>;
157
158class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice>
159    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
160                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4),
161          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
162                                              (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
163                                              (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>;
164
165class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
166                                    Operand imm_ty, ComplexPattern tileslice>
167   : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)),
168         (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>;
169
170
171class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
172                                        Operand imm_ty, ComplexPattern tileslice>
173    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)),
174          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
175                                              (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>;
176
177class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt,
178                                        Operand imm_ty, ComplexPattern tileslice>
179    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)),
180                     vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)),
181          (!cast<Instruction>(name # _PSEUDO) $base, $offset,
182                                              (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
183                                              zpr_ty:$Zm, imm_ty:$i)>;
184
185class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
186    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
187                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
188
189class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
190    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
191                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
192                                            imm_ty:$i)>;
193
194class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
195    : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
196                  (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
197
198class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
199    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
200          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
201
202class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
203    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
204          (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
205
206class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
207    : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
208          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
209
210class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
211    : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
212          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
213
214class SME2_Zero_Matrix_Pat<string name, SDPatternOperator intrinsic, Operand offset_ty, ComplexPattern tileslice>
215    : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, offset_ty:$offset))),
216    (!cast<Instruction>(name) $base, $offset)>;
217
218class SME2_Tile_Movaz_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, Operand tile_imm, Operand index_ty, ComplexPattern tileslice>
219    : Pat<(out_vt (intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)))),
220          (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset)>;
221
222//===----------------------------------------------------------------------===//
223// SME pattern match helpers.
224//===----------------------------------------------------------------------===//
225
226class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt>
227    : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
228          (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
229
230
231//===----------------------------------------------------------------------===//
232// SME smstart/smstop
233//===----------------------------------------------------------------------===//
234
235// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or
236// both fields:
237//
238//   MSR SVCRSM, #<imm1>
239//   MSR SVCRZA, #<imm1>
240//   MSR SVCRSMZA, #<imm1>
241//
242// It's tricky to using the existing pstate operand defined in
243// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2,
244// when these fields are also encoded in CRm[3:1].
245def MSRpstatesvcrImm1
246  : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr",
247                      "\t$pstatefield, $imm">,
248    Sched<[WriteSys]> {
249  bits<3> pstatefield;
250  bit imm;
251  let Inst{18-16} = 0b011; // op1
252  let Inst{11-9} = pstatefield;
253  let Inst{8} = imm;
254  let Inst{7-5} = 0b011; // op2
255  let hasPostISelHook = 1;
256}
257
258def : InstAlias<"smstart",    (MSRpstatesvcrImm1 0b011, 0b1)>;
259def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>;
260def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>;
261
262def : InstAlias<"smstop",     (MSRpstatesvcrImm1 0b011, 0b0)>;
263def : InstAlias<"smstop sm",  (MSRpstatesvcrImm1 0b001, 0b0)>;
264def : InstAlias<"smstop za",  (MSRpstatesvcrImm1 0b010, 0b0)>;
265
266
267//===----------------------------------------------------------------------===//
268// SME Outer Products
269//===----------------------------------------------------------------------===//
270
271class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty,
272                                ZPRRegOp zpr_ty, string mnemonic>
273    : I<(outs za_ty:$ZAda),
274      (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
275        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
276        "", []>,
277      Sched<[]> {
278  bits<5> Zm;
279  bits<3> Pm;
280  bits<3> Pn;
281  bits<5> Zn;
282  let Inst{31-25} = 0b1000000;
283  let Inst{24}    = op{1};
284  let Inst{23}    = 0b1;
285  let Inst{22-21} = sz;
286  let Inst{20-16} = Zm;
287  let Inst{15-13} = Pm;
288  let Inst{12-10} = Pn;
289  let Inst{9-5}   = Zn;
290  let Inst{4}     = S;
291  let Inst{3}     = op{0};
292
293  let Constraints = "$ZAda = $_ZAda";
294}
295
296multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> {
297  def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> {
298    bits<2> ZAda;
299    let Inst{1-0} = ZAda;
300    let Inst{2}   = 0b0;
301  }
302
303  def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
304
305  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
306}
307
308multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
309  def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
310    bits<3> ZAda;
311    let Inst{2-0} = ZAda;
312  }
313
314  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
315
316  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
317}
318
319multiclass sme2p1_fmop_tile_f8f16<string mnemonic, bit bf, bit s, bits<2> op> {
320  def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, ZPR8, mnemonic> {
321    bits<1> ZAda;
322    let Inst{2-1} = 0b00;
323    let Inst{0}   = ZAda;
324  }
325}
326
327multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, ValueType vt, SDPatternOperator intrinsic = null_frag> {
328  def NAME : sme_fp_outer_product_inst<s, {0,bf}, 0b11, TileOp16, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
329    bits<1> ZAda;
330    let Inst{2-1} = 0b00;
331    let Inst{0}   = ZAda;
332  }
333
334  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileH>, SMEPseudo2Instr<NAME, 0>;
335
336  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_1, nxv8i1, vt>;
337}
338
339class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2,
340                                 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
341                                 string mnemonic>
342    : I<(outs za_ty:$ZAda),
343        (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
344        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
345        "", []>,
346      Sched<[]> {
347  bits<5> Zm;
348  bits<3> Pm;
349  bits<3> Pn;
350  bits<5> Zn;
351  let Inst{31-25} = 0b1010000;
352  let Inst{24}    = opc{2}; // u0
353  let Inst{23}    = 0b1;
354  let Inst{22}    = sz;
355  let Inst{21}    = opc{1}; // u1
356  let Inst{20-16} = Zm;
357  let Inst{15-13} = Pm;
358  let Inst{12-10} = Pn;
359  let Inst{9-5}   = Zn;
360  let Inst{4}     = opc{0};  //S;
361  let Inst{3}     = sme2;
362
363  let Constraints = "$ZAda = $_ZAda";
364}
365
366multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
367                                     SDPatternOperator op> {
368  def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0,  TileOp32,
369                                        ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> {
370    bits<2> ZAda;
371    let Inst{1-0} = ZAda;
372    let Inst{2}   = 0b0;
373  }
374
375  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
376
377  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>;
378}
379
380multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
381                                     SDPatternOperator op> {
382  def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64,
383                                        ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
384    bits<3> ZAda;
385    let Inst{2-0} = ZAda;
386  }
387
388  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
389
390  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
391}
392
393class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
394    : I<(outs TileOp32:$ZAda),
395        (ins  TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
396        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
397        "", []>,
398      Sched<[]> {
399  bits<5> Zm;
400  bits<3> Pm;
401  bits<3> Pn;
402  bits<5> Zn;
403  bits<2> ZAda;
404  let Inst{31-25} = 0b1000000;
405  let Inst{24}    = !if(opc{2}, 0, 1);
406  let Inst{23-22} = 0b10;
407  let Inst{21}    = opc{1};
408  let Inst{20-16} = Zm;
409  let Inst{15-13} = Pm;
410  let Inst{12-10} = Pn;
411  let Inst{9-5}   = Zn;
412  let Inst{4}     = opc{0};
413  let Inst{3}     = opc{2};
414  let Inst{2}     = 0b0;
415  let Inst{1-0}   = ZAda;
416
417  let Constraints = "$ZAda = $_ZAda";
418}
419
420multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
421  def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
422
423  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
424
425  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>;
426}
427
428multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
429  def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>;
430
431  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
432
433  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
434}
435
436//===----------------------------------------------------------------------===//
437// SME Add Vector to Tile
438//===----------------------------------------------------------------------===//
439
440class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
441                                  ZPRRegOp zpr_ty, string mnemonic>
442    : I<(outs tile_ty:$ZAda),
443        (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
444        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
445        "", []>, Sched<[]> {
446  bits<3> Pm;
447  bits<3> Pn;
448  bits<5> Zn;
449  let Inst{31-23} = 0b110000001;
450  let Inst{22}    = op;
451  let Inst{21-17} = 0b01000;
452  let Inst{16}    = V;
453  let Inst{15-13} = Pm;
454  let Inst{12-10} = Pn;
455  let Inst{9-5}   = Zn;
456  let Inst{4-3}   = 0b00;
457
458  let Constraints = "$ZAda = $_ZAda";
459}
460
461class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag>
462    : Pseudo<(outs),
463             (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
464      Sched<[]> {
465  // Translated to the actual instructions in AArch64ISelLowering.cpp
466  let SMEMatrixType = za_flag;
467  let usesCustomInserter = 1;
468}
469
470multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> {
471    def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> {
472  bits<2> ZAda;
473  let Inst{2}   = 0b0;
474  let Inst{1-0} = ZAda;
475  }
476
477  def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
478
479  def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
480            (nxv4i32 ZPR32:$zn)),
481          (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>;
482}
483
484multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> {
485    def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> {
486  bits<3> ZAda;
487  let Inst{2-0} = ZAda;
488  }
489
490  def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
491
492  let Predicates = [HasSMEI16I64] in {
493  def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
494                (nxv2i64 ZPR64:$zn)),
495            (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>;
496  }
497}
498
499//===----------------------------------------------------------------------===//
500// SME Contiguous Loads
501//===----------------------------------------------------------------------===//
502
503class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
504                         string mnemonic, string argstr>
505    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
506  bits<5> Rm;
507  bits<2> Rv;
508  bits<3> Pg;
509  bits<5> Rn;
510  let Inst{31-25} = 0b1110000;
511  let Inst{24}    = Q;
512  let Inst{23-22} = msz;
513  let Inst{21}    = 0b0;
514  let Inst{20-16} = Rm;
515  let Inst{15}    = V;
516  let Inst{14-13} = Rv;
517  let Inst{12-10} = Pg;
518  let Inst{9-5}   = Rn;
519  let Inst{4}     = 0b0;
520
521  let mayLoad = 1;
522}
523
524class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
525                         MatrixTileVectorOperand tile_ty, bit is_col,
526                         Operand imm_ty, RegisterOperand gpr_ty>
527    : sme_mem_ld_ss_base<
528        Q, is_col, msz, (outs tile_ty:$ZAt),
529        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
530             gpr_ty:$Rm),
531        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
532
533multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
534                                   MatrixTileVectorOperand tile_ty,
535                                   Operand imm_ty, RegisterOperand gpr_ty,
536                                   string pg_suffix=""> {
537  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
538                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
539  // Default XZR offset aliases
540  def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
541                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
542  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
543                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
544}
545
546multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
547                              string pg_suffix=""> {
548  defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
549                                 !if(is_col, TileVectorOpV8, TileVectorOpH8),
550                                 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
551  defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
552                                 !if(is_col, TileVectorOpV16, TileVectorOpH16),
553                                 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
554  defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
555                                 !if(is_col, TileVectorOpV32, TileVectorOpH32),
556                                 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
557  defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
558                                 !if(is_col, TileVectorOpV64, TileVectorOpH64),
559                                 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
560  defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
561                                 !if(is_col, TileVectorOpV128, TileVectorOpH128),
562                                 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
563}
564
565multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
566  defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
567}
568
569multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
570                                  Operand tile_ty, Operand offset_ty,
571                                  ComplexPattern addr,
572                                  ComplexPattern tileslice> {
573  // base, tileslice
574  def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
575                  (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
576            (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
577
578  // reg + reg, tileslice
579  let AddedComplexity = 1 in {
580    def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
581                    tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
582                                              offset_ty:$imm))),
583              (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
584  }
585}
586
587class sme_load_pseudo
588    : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
589                          i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
590      Sched<[]> {
591  // Translated to the actual instructions in AArch64ISelLowering.cpp
592  let usesCustomInserter = 1;
593  let mayLoad = 1;
594}
595
596multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
597  def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
598                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
599                              is_col, sme_elm_idx0_15, GPR64shifted8> {
600    bits<4> imm;
601    let Inst{3-0} = imm;
602  }
603  def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
604                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
605                              is_col, sme_elm_idx0_7, GPR64shifted16> {
606    bits<1> ZAt;
607    bits<3> imm;
608    let Inst{3}   = ZAt;
609    let Inst{2-0} = imm;
610  }
611  def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
612                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
613                              is_col, sme_elm_idx0_3, GPR64shifted32> {
614    bits<2> ZAt;
615    bits<2> imm;
616    let Inst{3-2} = ZAt;
617    let Inst{1-0} = imm;
618  }
619  def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
620                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
621                              is_col, sme_elm_idx0_1, GPR64shifted64> {
622    bits<3> ZAt;
623    bits<1> imm;
624    let Inst{3-1} = ZAt;
625    let Inst{0}   = imm;
626  }
627  def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
628                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
629                              is_col, sme_elm_idx0_0, GPR64shifted128> {
630    bits<4> ZAt;
631    let Inst{3-0} = ZAt;
632  }
633
634  defm : sme_mem_ld_ss_aliases<NAME, is_col>;
635
636  // Pseudo instructions for lowering intrinsics, using immediates instead of
637  // tile registers.
638  def _PSEUDO_B : sme_load_pseudo;
639  def _PSEUDO_H : sme_load_pseudo;
640  def _PSEUDO_S : sme_load_pseudo;
641  def _PSEUDO_D : sme_load_pseudo;
642  def _PSEUDO_Q : sme_load_pseudo;
643
644  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
645                                !if(is_col, int_aarch64_sme_ld1b_vert,
646                                            int_aarch64_sme_ld1b_horiz),
647                                sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0,
648                                tileslice8>;
649  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
650                                !if(is_col, int_aarch64_sme_ld1h_vert,
651                                            int_aarch64_sme_ld1h_horiz),
652                                timm32_0_1, timm32_0_7, am_sve_regreg_lsl1,
653                                tileslice16>;
654  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
655                                !if(is_col, int_aarch64_sme_ld1w_vert,
656                                            int_aarch64_sme_ld1w_horiz),
657                                timm32_0_3, timm32_0_3, am_sve_regreg_lsl2,
658                                tileslice32>;
659  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
660                                !if(is_col, int_aarch64_sme_ld1d_vert,
661                                            int_aarch64_sme_ld1d_horiz),
662                                timm32_0_7, timm32_0_1, am_sve_regreg_lsl3,
663                                tileslice64>;
664  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
665                                !if(is_col, int_aarch64_sme_ld1q_vert,
666                                            int_aarch64_sme_ld1q_horiz),
667                                timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
668                                tileslice128>;
669}
670
671multiclass sme_mem_ld_ss<string mnemonic> {
672  defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
673  defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
674}
675
676//===----------------------------------------------------------------------===//
677// SME Contiguous Stores
678//===----------------------------------------------------------------------===//
679
680class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
681                         string mnemonic, string argstr>
682    : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
683  bits<5> Rm;
684  bits<2> Rv;
685  bits<3> Pg;
686  bits<5> Rn;
687  let Inst{31-25} = 0b1110000;
688  let Inst{24}    = Q;
689  let Inst{23-22} = msz;
690  let Inst{21}    = 0b1;
691  let Inst{20-16} = Rm;
692  let Inst{15}    = V;
693  let Inst{14-13} = Rv;
694  let Inst{12-10} = Pg;
695  let Inst{9-5}   = Rn;
696  let Inst{4}     = 0b0;
697
698  let mayStore = 1;
699  let hasSideEffects = 1;
700}
701
702class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
703                         MatrixTileVectorOperand tile_ty, bit is_col,
704                         Operand imm_ty, RegisterOperand gpr_ty>
705    : sme_mem_st_ss_base<
706        Q, is_col, msz,
707        (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
708             GPR64sp:$Rn, gpr_ty:$Rm),
709        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
710
711multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
712  defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
713}
714
715multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
716                                  Operand offset_ty,
717                                  ComplexPattern imm2tile,
718                                  ComplexPattern addr,
719                                  ComplexPattern tileslice> {
720  // base, tileslice
721  def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
722                   (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
723            (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
724
725  // reg + reg, tileslice
726  let AddedComplexity = 1 in {
727    def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
728                     (imm2tile untyped:$tile),
729                     (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
730              (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
731  }
732}
733
734multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
735  def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
736                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
737                              is_col, sme_elm_idx0_15, GPR64shifted8> {
738    bits<4> imm;
739    let Inst{3-0} = imm;
740  }
741  def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
742                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
743                              is_col, sme_elm_idx0_7, GPR64shifted16> {
744    bits<1> ZAt;
745    bits<3> imm;
746    let Inst{3}   = ZAt;
747    let Inst{2-0} = imm;
748  }
749  def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
750                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
751                              is_col, sme_elm_idx0_3, GPR64shifted32> {
752    bits<2> ZAt;
753    bits<2> imm;
754    let Inst{3-2} = ZAt;
755    let Inst{1-0} = imm;
756  }
757  def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
758                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
759                              is_col, sme_elm_idx0_1, GPR64shifted64> {
760    bits<3> ZAt;
761    bits<1> imm;
762    let Inst{3-1} = ZAt;
763    let Inst{0}   = imm;
764  }
765  def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
766                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
767                              is_col, sme_elm_idx0_0, GPR64shifted128> {
768    bits<4> ZAt;
769    let Inst{3-0} = ZAt;
770  }
771
772  defm : sme_mem_st_ss_aliases<NAME, is_col>;
773
774  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
775                                !if(is_col, int_aarch64_sme_st1b_vert,
776                                            int_aarch64_sme_st1b_horiz),
777                                timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0,
778                                tileslice8>;
779  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
780                                !if(is_col, int_aarch64_sme_st1h_vert,
781                                            int_aarch64_sme_st1h_horiz),
782                                timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1,
783                                tileslice16>;
784  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
785                                !if(is_col, int_aarch64_sme_st1w_vert,
786                                            int_aarch64_sme_st1w_horiz),
787                                timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2,
788                                tileslice32>;
789  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
790                                !if(is_col, int_aarch64_sme_st1d_vert,
791                                            int_aarch64_sme_st1d_horiz),
792                                timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3,
793                                tileslice64>;
794  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
795                                !if(is_col, int_aarch64_sme_st1q_vert,
796                                            int_aarch64_sme_st1q_horiz),
797                                sme_elm_idx0_0, imm_to_tile128,
798                                am_sve_regreg_lsl4, tileslice128>;
799}
800
801multiclass sme_mem_st_ss<string mnemonic> {
802  defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
803  defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
804}
805
806//===----------------------------------------------------------------------===//
807// SME Save and Restore Array
808//===----------------------------------------------------------------------===//
809
810class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
811    : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
812        []>,
813      Sched<[]> {
814  bits<2> Rv;
815  bits<5> Rn;
816  bits<4> imm4;
817  let Inst{31-22} = 0b1110000100;
818  let Inst{21}    = isStore;
819  let Inst{20-15} = 0b000000;
820  let Inst{14-13} = Rv;
821  let Inst{12-10} = 0b000;
822  let Inst{9-5}   = Rn;
823  let Inst{4}     = 0b0;
824  let Inst{3-0}   = imm4;
825}
826
827let mayStore = 1 in
828class sme_spill_inst<string opcodestr>
829    : sme_spill_fill_base<0b1, (outs),
830                          (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
831                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
832                               imm32_0_15:$offset),
833                          opcodestr>;
834let mayLoad = 1 in
835class sme_fill_inst<string opcodestr>
836    : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
837                          (ins MatrixIndexGPR32Op12_15:$Rv,
838                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
839                               imm32_0_15:$offset),
840                          opcodestr>;
841multiclass sme_spill<string opcodestr> {
842  def NAME : sme_spill_inst<opcodestr>;
843  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
844                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
845                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
846
847  def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)),
848          (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>;
849}
850
851multiclass sme_fill<string opcodestr> {
852  def NAME : sme_fill_inst<opcodestr>;
853  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
854                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
855                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
856  def NAME # _PSEUDO
857      : Pseudo<(outs),
858               (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4,
859                    GPR64sp:$base), []>,
860        Sched<[]> {
861    // Translated to actual instruction in AArch64ISelLowering.cpp
862    let usesCustomInserter = 1;
863    let mayLoad = 1;
864  }
865  def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm),
866          (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>;
867}
868
869//===----------------------------------------------------------------------===//
870// Move instructions
871//===----------------------------------------------------------------------===//
872
873class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
874                              string mnemonic, string argstr>
875    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
876  bits<2> Rv;
877  bits<3> Pg;
878  bits<5> Zn;
879  let Inst{31-24} = 0b11000000;
880  let Inst{23-22} = sz;
881  let Inst{21-17} = 0b00000;
882  let Inst{16}    = Q;
883  let Inst{15}    = V;
884  let Inst{14-13} = Rv;
885  let Inst{12-10} = Pg;
886  let Inst{9-5}   = Zn;
887  let Inst{4}     = 0b0;
888}
889
890class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
891                              bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
892                              string mnemonic>
893    : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
894        (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
895        mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
896
897  let Constraints = "$ZAd = $_ZAd";
898}
899
900
901multiclass sme_vector_to_tile_aliases<Instruction inst,
902                                      MatrixTileVectorOperand tile_ty,
903                                      ZPRRegOp zpr_ty, Operand imm_ty> {
904  def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
905                  (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
906}
907
908multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
909                                       ValueType ppr_vt, Operand imm_ty,
910                                       Operand offset_ty,
911                                       SDPatternOperator op,
912                                       ComplexPattern tileslice> {
913  def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
914                                              offset_ty:$imm)),
915                (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
916            (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
917}
918
919class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
920    : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx,
921                          i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
922      Sched<[]> {
923  // Translated to the actual instructions in AArch64ISelLowering.cpp
924  let SMEMatrixType = za_flag;
925  let usesCustomInserter = 1;
926}
927
928multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
929  def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
930                                                          TileVectorOpH8),
931                                   is_col, sme_elm_idx0_15, ZPR8, mnemonic>,
932                                   SMEPseudo2Instr<NAME # _B, 1> {
933    bits<4> imm;
934    let Inst{3-0} = imm;
935  }
936  def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
937                                                          TileVectorOpH16),
938                                   is_col, sme_elm_idx0_7, ZPR16, mnemonic>,
939                                   SMEPseudo2Instr<NAME # _H, 1> {
940    bits<1> ZAd;
941    bits<3> imm;
942    let Inst{3}   = ZAd;
943    let Inst{2-0} = imm;
944  }
945  def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
946                                                          TileVectorOpH32),
947                                   is_col, sme_elm_idx0_3, ZPR32, mnemonic>,
948                                   SMEPseudo2Instr<NAME # _S, 1> {
949    bits<2> ZAd;
950    bits<2> imm;
951    let Inst{3-2} = ZAd;
952    let Inst{1-0} = imm;
953  }
954  def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
955                                                          TileVectorOpH64),
956                                   is_col, sme_elm_idx0_1, ZPR64, mnemonic>,
957                                   SMEPseudo2Instr<NAME # _D, 1> {
958    bits<3> ZAd;
959    bits<1> imm;
960    let Inst{3-1} = ZAd;
961    let Inst{0}   = imm;
962  }
963  def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
964                                                          TileVectorOpH128),
965                                   is_col, sme_elm_idx0_0, ZPR128, mnemonic>,
966                                   SMEPseudo2Instr<NAME # _Q, 1> {
967    bits<4> ZAd;
968    bits<1> imm;
969    let Inst{3-0} = ZAd;
970  }
971
972  // Pseudo instructions for lowering intrinsics, using immediates instead of
973  // tile registers.
974  def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>;
975  def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>;
976  def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>;
977  def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>;
978  def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>;
979
980  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
981                                    !if(is_col, TileVectorOpV8,
982                                                TileVectorOpH8),
983                                    ZPR8, sme_elm_idx0_15>;
984  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
985                                    !if(is_col, TileVectorOpV16,
986                                                TileVectorOpH16),
987                                    ZPR16, sme_elm_idx0_7>;
988  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
989                                    !if(is_col, TileVectorOpV32,
990                                                TileVectorOpH32),
991                                    ZPR32, sme_elm_idx0_3>;
992  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
993                                    !if(is_col, TileVectorOpV64,
994                                                TileVectorOpH64),
995                                    ZPR64, sme_elm_idx0_1>;
996  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
997                                    !if(is_col, TileVectorOpV128,
998                                                TileVectorOpH128),
999                                    ZPR128, sme_elm_idx0_0>;
1000
1001  defvar op = !if(is_col, int_aarch64_sme_write_vert,
1002                          int_aarch64_sme_write_horiz);
1003
1004  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
1005                                     nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15,
1006                                     op, tileslice8>;
1007  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1008                                     nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1009                                     op, tileslice16>;
1010  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1011                                     nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1012                                     op, tileslice16>;
1013  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
1014                                     nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7,
1015                                     op, tileslice16>;
1016  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
1017                                     nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
1018                                     op, tileslice32>;
1019  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
1020                                     nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3,
1021                                     op, tileslice32>;
1022  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
1023                                     nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
1024                                     op, tileslice64>;
1025  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
1026                                     nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1,
1027                                     op, tileslice64>;
1028
1029  defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
1030                           int_aarch64_sme_writeq_horiz);
1031
1032  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1033                                     nxv16i8, nxv16i1, sme_elm_idx0_15,
1034                                     sme_elm_idx0_0, opq, tileslice128>;
1035  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1036                                     nxv8i16, nxv8i1, sme_elm_idx0_15,
1037                                     sme_elm_idx0_0, opq, tileslice128>;
1038  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1039                                     nxv8f16, nxv8i1, sme_elm_idx0_15,
1040                                     sme_elm_idx0_0, opq, tileslice128>;
1041  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1042                                     nxv8bf16, nxv8i1, sme_elm_idx0_15,
1043                                     sme_elm_idx0_0, opq, tileslice128>;
1044  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1045                                     nxv4i32, nxv4i1, sme_elm_idx0_15,
1046                                     sme_elm_idx0_0, opq, tileslice128>;
1047  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1048                                     nxv4f32, nxv4i1, sme_elm_idx0_15,
1049                                     sme_elm_idx0_0, opq, tileslice128>;
1050  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1051                                     nxv2i64, nxv2i1, sme_elm_idx0_15,
1052                                     sme_elm_idx0_0, opq, tileslice128>;
1053  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
1054                                     nxv2f64, nxv2i1, sme_elm_idx0_15,
1055                                     sme_elm_idx0_0, opq, tileslice128>;
1056}
1057
1058multiclass sme_vector_to_tile<string mnemonic> {
1059  defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
1060  defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
1061}
1062
1063class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
1064                              string mnemonic, string argstr>
1065    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
1066  bits<2> Rv;
1067  bits<3> Pg;
1068  bits<5> Zd;
1069  let Inst{31-24} = 0b11000000;
1070  let Inst{23-22} = sz;
1071  let Inst{21-17} = 0b00001;
1072  let Inst{16}    = Q;
1073  let Inst{15}    = V;
1074  let Inst{14-13} = Rv;
1075  let Inst{12-10} = Pg;
1076  let Inst{9}     = 0b0;
1077  let Inst{4-0}   = Zd;
1078}
1079
1080class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
1081                              MatrixTileVectorOperand tile_ty,
1082                              bit is_col, Operand imm_ty, string mnemonic>
1083    : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
1084        (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1085        mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
1086
1087  let Constraints = "$Zd = $_Zd";
1088}
1089
1090multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
1091                                      MatrixTileVectorOperand tile_ty,
1092                                      Operand imm_ty > {
1093  def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
1094                  (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
1095}
1096
1097multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
1098                                       ValueType ppr_vt, Operand offset_ty,
1099                                       ComplexPattern imm2tile,
1100                                       ComplexPattern tileslice,
1101                                       SDPatternOperator op> {
1102  def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1103                        (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
1104            (inst $passthru, $pg, $tile, $idx, 0)>;
1105  let AddedComplexity = 1 in {
1106    def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
1107                          (imm2tile untyped:$tile),
1108                          (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
1109                                          offset_ty:$imm)))),
1110              (inst $passthru, $pg, $tile, $idx, $imm)>;
1111  }
1112}
1113
1114multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
1115  def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
1116                                                                TileVectorOpH8),
1117                                   is_col, sme_elm_idx0_15, mnemonic> {
1118    bits<4> imm;
1119    let Inst{8-5} = imm;
1120  }
1121  def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
1122                                                                 TileVectorOpH16),
1123                                   is_col, sme_elm_idx0_7, mnemonic> {
1124    bits<1> ZAn;
1125    bits<3> imm;
1126    let Inst{8}   = ZAn;
1127    let Inst{7-5} = imm;
1128  }
1129  def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
1130                                                                 TileVectorOpH32),
1131                                   is_col, sme_elm_idx0_3, mnemonic> {
1132    bits<2> ZAn;
1133    bits<2> imm;
1134    let Inst{8-7} = ZAn;
1135    let Inst{6-5} = imm;
1136  }
1137  def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
1138                                                                 TileVectorOpH64),
1139                                   is_col, sme_elm_idx0_1, mnemonic> {
1140    bits<3> ZAn;
1141    bits<1> imm;
1142    let Inst{8-6} = ZAn;
1143    let Inst{5}   = imm;
1144  }
1145  def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
1146                                                                  TileVectorOpH128),
1147                                   is_col, sme_elm_idx0_0, mnemonic> {
1148    bits<4> ZAn;
1149    let Inst{8-5} = ZAn;
1150  }
1151
1152  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
1153                                    !if(is_col, TileVectorOpV8,
1154                                                TileVectorOpH8), sme_elm_idx0_15>;
1155  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
1156                                    !if(is_col, TileVectorOpV16,
1157                                                TileVectorOpH16), sme_elm_idx0_7>;
1158  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
1159                                    !if(is_col, TileVectorOpV32,
1160                                                TileVectorOpH32), sme_elm_idx0_3>;
1161  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
1162                                    !if(is_col, TileVectorOpV64,
1163                                                TileVectorOpH64), sme_elm_idx0_1>;
1164  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
1165                                    !if(is_col, TileVectorOpV128,
1166                                                TileVectorOpH128), sme_elm_idx0_0>;
1167
1168  defvar op = !if(is_col, int_aarch64_sme_read_vert,
1169                          int_aarch64_sme_read_horiz);
1170
1171  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
1172                                     nxv16i8, nxv16i1, sme_elm_idx0_15,
1173                                     imm_to_tile8, tileslice8, op>;
1174  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1175                                     nxv8i16, nxv8i1, sme_elm_idx0_7,
1176                                     imm_to_tile16, tileslice16, op>;
1177  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1178                                     nxv8f16, nxv8i1, sme_elm_idx0_7,
1179                                     imm_to_tile16, tileslice16, op>;
1180  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
1181                                     nxv8bf16, nxv8i1, sme_elm_idx0_7,
1182                                     imm_to_tile16, tileslice16, op>;
1183  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1184                                     nxv4i32, nxv4i1, sme_elm_idx0_3,
1185                                     imm_to_tile32, tileslice32, op>;
1186  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
1187                                     nxv4f32, nxv4i1, sme_elm_idx0_3,
1188                                     imm_to_tile32, tileslice32, op>;
1189  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1190                                     nxv2i64, nxv2i1, sme_elm_idx0_1,
1191                                     imm_to_tile64, tileslice64, op>;
1192  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
1193                                     nxv2f64, nxv2i1, sme_elm_idx0_1,
1194                                     imm_to_tile64, tileslice64, op>;
1195
1196  defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
1197                           int_aarch64_sme_readq_horiz);
1198
1199  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1200                                     nxv16i8, nxv16i1, sme_elm_idx0_0,
1201                                     imm_to_tile128, tileslice128, opq>;
1202  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1203                                     nxv8i16, nxv8i1, sme_elm_idx0_0,
1204                                     imm_to_tile128, tileslice128, opq>;
1205  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1206                                     nxv8f16, nxv8i1, sme_elm_idx0_0,
1207                                     imm_to_tile128, tileslice128, opq>;
1208  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1209                                     nxv8bf16, nxv8i1, sme_elm_idx0_0,
1210                                     imm_to_tile128, tileslice128, opq>;
1211  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1212                                     nxv4i32, nxv4i1, sme_elm_idx0_0,
1213                                     imm_to_tile128, tileslice128, opq>;
1214  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1215                                     nxv4f32, nxv4i1, sme_elm_idx0_0,
1216                                     imm_to_tile128, tileslice128, opq>;
1217  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1218                                     nxv2i64, nxv2i1, sme_elm_idx0_0,
1219                                     imm_to_tile128, tileslice128, opq>;
1220  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
1221                                     nxv2f64, nxv2i1, sme_elm_idx0_0,
1222                                     imm_to_tile128, tileslice128, opq>;
1223}
1224
1225multiclass sme_tile_to_vector<string mnemonic> {
1226  defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
1227  defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
1228}
1229
1230//===----------------------------------------------------------------------===//
1231// SME Zero
1232//===----------------------------------------------------------------------===//
1233
1234// NOTE: This definition isn't really correct because there are outputs, i.e.
1235// the tile registers being zeroed. We fix this up in a custom inserter that
1236// marks the appropriate registers as being implicitly defined.
1237class sme_zero_inst<string mnemonic>
1238    : I<(outs), (ins MatrixTileList:$imm),
1239        mnemonic, "\t$imm", "", []>, Sched<[]> {
1240  bits<8> imm;
1241  let Inst{31-8} = 0b110000000000100000000000;
1242  let Inst{7-0}  = imm;
1243}
1244
1245multiclass sme_zero<string mnemonic> {
1246  def NAME : sme_zero_inst<mnemonic>;
1247
1248  def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
1249  def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
1250  def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
1251  def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
1252  def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
1253  def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
1254  def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
1255  def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
1256  def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
1257  def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
1258  def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
1259  def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
1260  def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
1261  def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
1262  def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
1263
1264  def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>,
1265      Sched<[]> {
1266    // Translated to the actual instructions in AArch64ISelLowering.cpp
1267    let usesCustomInserter = 1;
1268  }
1269
1270  def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm),
1271            (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>;
1272}
1273
1274//===----------------------------------------------------------------------===//
1275// SVE2 Instructions
1276//===----------------------------------------------------------------------===//
1277
1278class sve2_int_perm_revd<string asm>
1279    : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
1280        asm, "\t$Zd, $Pg/m, $Zn", "", []>,
1281      Sched<[]> {
1282  bits<5> Zd;
1283  bits<3> Pg;
1284  bits<5> Zn;
1285  let Inst{31-24} = 0b00000101;
1286  let Inst{23-22} = 0b00; // size
1287  let Inst{21-13} = 0b101110100;
1288  let Inst{12-10} = Pg;
1289  let Inst{9-5}   = Zn;
1290  let Inst{4-0}   = Zd;
1291
1292  let Constraints = "$Zd = $_Zd";
1293  let DestructiveInstType = DestructiveUnary;
1294  let ElementSize = ZPR128.ElementSize;
1295}
1296
1297multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
1298  def NAME : sve2_int_perm_revd<asm>;
1299
1300  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
1301  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME)>;
1302  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME)>;
1303  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME)>;
1304
1305  def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>;
1306  def : SVE_1_Op_Passthru_Pat<nxv8f16,  op, nxv8i1, nxv8f16,  !cast<Instruction>(NAME)>;
1307  def : SVE_1_Op_Passthru_Pat<nxv4f32,  op, nxv4i1, nxv4f32,  !cast<Instruction>(NAME)>;
1308  def : SVE_1_Op_Passthru_Pat<nxv2f64,  op, nxv2i1, nxv2f64,  !cast<Instruction>(NAME)>;
1309
1310}
1311
1312class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
1313    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
1314        asm, "\t$Zd, $Zn, $Zm", "", []>,
1315      Sched<[]> {
1316  bits<5> Zm;
1317  bits<5> Zn;
1318  bits<5> Zd;
1319  let Inst{31-24} = 0b01000100;
1320  let Inst{23-22} = sz;
1321  let Inst{21}    = 0b0;
1322  let Inst{20-16} = Zm;
1323  let Inst{15-11} = 0b11000;
1324  let Inst{10}    = U;
1325  let Inst{9-5}   = Zn;
1326  let Inst{4-0}   = Zd;
1327
1328  let Constraints = "$Zd = $_Zd";
1329  let DestructiveInstType = DestructiveOther;
1330  let ElementSize = zpr_ty.ElementSize;
1331}
1332
1333multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
1334  def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
1335  def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
1336  def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
1337  def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
1338
1339  def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1340  def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1341  def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1342  def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
1343}
1344
1345class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
1346    : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm,
1347                            MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1348        asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
1349      Sched<[]> {
1350  bits<2> Rv;
1351  bits<4> Pn;
1352  bits<4> Pm;
1353  bits<4> Pd;
1354  let Inst{31-24} = 0b00100101;
1355  let Inst{21}    = 0b1;
1356  let Inst{17-16} = Rv;
1357  let Inst{15-14} = 0b01;
1358  let Inst{13-10} = Pn;
1359  let Inst{9}     = 0b0;
1360  let Inst{8-5}   = Pm;
1361  let Inst{4}     = 0b0;
1362  let Inst{3-0}   = Pd;
1363}
1364
1365multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
1366  def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
1367    bits<4> imm;
1368    let Inst{23-22} = imm{3-2};
1369    let Inst{20-19} = imm{1-0};
1370    let Inst{18}    = 0b1;
1371  }
1372  def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
1373    bits<3> imm;
1374    let Inst{23-22} = imm{2-1};
1375    let Inst{20}    = imm{0};
1376    let Inst{19-18} = 0b10;
1377  }
1378  def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
1379    bits<2> imm;
1380    let Inst{23-22} = imm{1-0};
1381    let Inst{20-18} = 0b100;
1382  }
1383  def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
1384    bits<1> imm;
1385    let Inst{23}    = imm;
1386    let Inst{22}    = 0b1;
1387    let Inst{20-18} = 0b000;
1388  }
1389
1390  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1391             MatrixIndexGPR32Op12_15:$idx)),
1392            (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
1393  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1394             MatrixIndexGPR32Op12_15:$idx)),
1395            (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
1396  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1397             MatrixIndexGPR32Op12_15:$idx)),
1398            (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
1399  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1400             MatrixIndexGPR32Op12_15:$idx)),
1401            (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
1402
1403  let AddedComplexity = 1 in {
1404    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
1405               (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
1406              (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
1407    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
1408               (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
1409              (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
1410    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
1411               (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
1412              (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
1413    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
1414               (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
1415              (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
1416  }
1417}
1418
1419//===----------------------------------------------------------------------===//
1420// SME2 Instructions
1421//===----------------------------------------------------------------------===//
1422
1423//===----------------------------------------------------------------------===//
1424// SME2 single-multi ternary int/fp, two/four registers
1425
1426class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op,
1427                                         MatrixOperand matrix_ty,
1428                                         RegisterOperand multi_vector_ty,
1429                                         ZPRRegOp zpr_ty,
1430                                         string mnemonic>
1431   : I<(outs matrix_ty:$ZAd),
1432       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1433       sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm),
1434       mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm",
1435       "", []> , Sched<[]> {
1436  bits<4> Zm;
1437  bits<5> Zn;
1438  bits<2> Rv;
1439  bits<3> imm3;
1440  let Inst{31-23} = 0b110000010;
1441  let Inst{22}    = op{6}; //sz
1442  let Inst{21}    = 0b1;
1443  let Inst{20}    = op{5}; //vgx4
1444  let Inst{19-16} = Zm;
1445  let Inst{15}    = 0b0;
1446  let Inst{14-13} = Rv;
1447  let Inst{12-10} = op{4-2};
1448  let Inst{9-5}   = Zn;
1449  let Inst{4-3}   = op{1-0};
1450  let Inst{2-0}   = imm3;
1451  let Constraints = "$ZAd = $_ZAd";
1452}
1453
1454multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op,
1455                                              MatrixOperand matrix_ty,
1456                                              RegisterOperand multi_vector_ty,
1457                                              ZPRRegOp zpr_ty>{
1458  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1459
1460  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1461                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1462}
1463
1464multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op,
1465                                              MatrixOperand matrix_ty,
1466                                              RegisterOperand multi_vector_ty,
1467                                              ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1468  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1469
1470  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1471                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1472
1473  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1474
1475  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1476}
1477
1478multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op,
1479                                              MatrixOperand matrix_ty,
1480                                              RegisterOperand multi_vector_ty,
1481                                              ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{
1482  def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1483
1484  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1485                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
1486
1487  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>;
1488
1489  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>;
1490}
1491
1492//===----------------------------------------------------------------------===//
1493// SME2 multiple vectors ternary INT/FP  two and four registers
1494class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op,
1495                                       MatrixOperand matrix_ty,
1496                                       RegisterOperand multi_vector_ty,
1497                                       string mnemonic>
1498   : I<(outs matrix_ty:$ZAd),
1499       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1500       sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1501       mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm",
1502       "", []>, Sched<[]>{
1503  bits<4> Zm;
1504  bits<4> Zn;
1505  bits<2> Rv;
1506  bits<3> imm3;
1507  let Inst{31-23} = 0b110000011;
1508  let Inst{22}    = op{6}; //sz
1509  let Inst{21}    = 0b1;
1510  let Inst{20-17} = Zm;
1511  let Inst{16-15} = 0b00;
1512  let Inst{14-13} = Rv;
1513  let Inst{12-10} = op{5-3};
1514  let Inst{9-6}   = Zn;
1515  let Inst{5-3}   = op{2-0};
1516  let Inst{2-0}   = imm3;
1517  let Constraints = "$ZAd = $_ZAd";
1518}
1519
1520multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op,
1521                                            MatrixOperand  matrix_ty,
1522                                            RegisterOperand multi_vector_ty, ValueType zpr_ty,
1523                                            SDPatternOperator intrinsic> {
1524  def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1525
1526  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1527
1528  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1529
1530  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1531                  (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1532}
1533
1534class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op,
1535                                            MatrixOperand matrix_ty,
1536                                            RegisterOperand multi_vector_ty,
1537                                            string mnemonic>
1538   : I<(outs matrix_ty:$ZAd),
1539       (ins  matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv,
1540        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm),
1541        mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm",
1542        "", []>, Sched<[]>{
1543  bits<3> Zm;
1544  bits<3> Zn;
1545  bits<2> Rv;
1546  bits<3> imm3;
1547  let Inst{31-23} = 0b110000011;
1548  let Inst{22}    = op{6}; //sz
1549  let Inst{21}    = 0b1;
1550  let Inst{20-18} = Zm;
1551  let Inst{17-15} = 0b010;
1552  let Inst{14-13} = Rv;
1553  let Inst{12-10} = op{5-3};
1554  let Inst{9-7}   = Zn;
1555  let Inst{6}     = 0b0;
1556  let Inst{5-3}   = op{2-0};
1557  let Inst{2-0}   = imm3;
1558  let Constraints = "$ZAd = $_ZAd";
1559}
1560
1561multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op,
1562                                            MatrixOperand  matrix_ty,
1563                                            RegisterOperand multi_vector_ty,
1564                                            ValueType zpr_ty, SDPatternOperator intrinsic>{
1565  def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
1566
1567  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>;
1568
1569  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>;
1570
1571  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm",
1572                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
1573}
1574
1575//===----------------------------------------------------------------------===//
1576// SME2 multiple vectors binary two or four  registers
1577
1578class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op,
1579                                  MatrixOperand matrix_ty,
1580                                  RegisterOperand vector_ty>
1581    : I<(outs matrix_ty:$ZAdn),
1582        (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm),
1583        mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm",
1584        "", []>, Sched<[]> {
1585  bits<2> Rv;
1586  bits<3> imm3;
1587  let Inst{31-23} = 0b110000011;
1588  let Inst{22}    = sz;
1589  let Inst{21-19} = 0b100;
1590  let Inst{18}    = op{2};
1591  let Inst{17}    = 0b0;
1592  let Inst{16}    = vg4;
1593  let Inst{15}    = 0b0;
1594  let Inst{14-13} = Rv;
1595  let Inst{12-10} = 0b111;
1596  let Inst{5}     = 0b0;
1597  let Inst{4-3}   = op{1-0};
1598  let Inst{2-0}   = imm3;
1599
1600  let Constraints = "$ZAdn = $_ZAdn";
1601}
1602
1603class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
1604                                      MatrixOperand matrix_ty,
1605                                      RegisterOperand vector_ty>
1606    : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> {
1607  bits<4> Zm;
1608  let Inst{9-6} = Zm;
1609}
1610
1611
1612multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
1613                                           MatrixOperand matrix_ty,
1614                                           RegisterOperand vector_ty,
1615                                           ValueType vty,
1616                                           SDPatternOperator intrinsic> {
1617  def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1618                                             SMEPseudo2Instr<NAME, 1>;
1619  def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1620  (!cast<Instruction>(NAME) matrix_ty:$ZAdn,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1621
1622  def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1623  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1624}
1625
1626class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
1627                                      MatrixOperand matrix_ty,
1628                                      RegisterOperand vector_ty>
1629    : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> {
1630  bits<3> Zm;
1631  let Inst{9-7} = Zm;
1632  let Inst{6}   = 0b0;
1633}
1634
1635multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
1636                                           MatrixOperand matrix_ty,
1637                                           RegisterOperand vector_ty,
1638                                           ValueType vty,
1639                                           SDPatternOperator intrinsic> {
1640  def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
1641                                             SMEPseudo2Instr<NAME, 1>;
1642  def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
1643  (!cast<Instruction>(NAME) matrix_ty:$ZAdn,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
1644
1645  def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
1646  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
1647}
1648
1649//===----------------------------------------------------------------------===//
1650// SME2 Multi-vector - Multiple and Single SVE Destructive
1651// Two and Four registers
1652
1653class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op,
1654                                             RegisterOperand vector_ty,
1655                                             ZPRRegOp zpr_ty,
1656                                             string mnemonic>
1657    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1658        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1659        "", []>, Sched<[]> {
1660  bits<4> Zm;
1661  bits<4> Zdn;
1662  let Inst{31-24} = 0b11000001;
1663  let Inst{23-22} = sz;
1664  let Inst{21-20} = 0b10;
1665  let Inst{19-16} = Zm;
1666  let Inst{15-11} = 0b10100;
1667  let Inst{10-5}  = op{6-1};
1668  let Inst{4-1}   = Zdn;
1669  let Inst{0}     = op{0};
1670
1671  let Constraints = "$Zdn = $_Zdn";
1672}
1673
1674multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1675  def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1676  def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1677  def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1678}
1679
1680multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> {
1681  def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>;
1682  def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>;
1683  def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>;
1684  def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>;
1685}
1686
1687// SME2.1 fmax/fmin instructions.
1688multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> {
1689  def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r,
1690                                                  ZPR4b16, mnemonic>;
1691}
1692
1693class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op,
1694                                             RegisterOperand vector_ty,
1695                                             ZPRRegOp zpr_ty,
1696                                             string mnemonic>
1697    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm),
1698        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1699        "", []>, Sched<[]> {
1700  bits<4> Zm;
1701  bits<3> Zdn;
1702  let Inst{31-24} = 0b11000001;
1703  let Inst{23-22} = sz;
1704  let Inst{21-20} = 0b10;
1705  let Inst{19-16} = Zm;
1706  let Inst{15-11} = 0b10101;
1707  let Inst{10-5}  = op{6-1};
1708  let Inst{4-2}   = Zdn;
1709  let Inst{1}     = 0b0;
1710  let Inst{0}     = op{0};
1711
1712  let Constraints = "$Zdn = $_Zdn";
1713}
1714
1715multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1716  def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1717  def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1718  def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1719}
1720
1721multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> {
1722  def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>;
1723  def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
1724  def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>;
1725  def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>;
1726}
1727
1728// SME2.1 fmax/fmin instructions.
1729multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> {
1730  def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r,
1731                                                  ZPR4b16, mnemonic>;
1732}
1733
1734class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op,
1735                                            RegisterOperand vector_ty,
1736                                            string mnemonic>
1737    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1738        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1739        "", []>, Sched<[]> {
1740  bits<4> Zm;
1741  bits<4> Zdn;
1742  let Inst{31-24} = 0b11000001;
1743  let Inst{23-22} = sz;
1744  let Inst{21}    = 0b1;
1745  let Inst{20-17} = Zm;
1746  let Inst{16-11} = 0b010110;
1747  let Inst{10-5}  = op{6-1};
1748  let Inst{4-1}   = Zdn;
1749  let Inst{0}     = op{0};
1750
1751  let Constraints = "$Zdn = $_Zdn";
1752}
1753
1754multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1755  def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1756  def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1757  def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1758}
1759
1760multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> {
1761  def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>;
1762  def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>;
1763  def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>;
1764  def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>;
1765}
1766
1767// SME2.1 fmax/fmin instructions.
1768multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> {
1769  def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r,
1770                                                 mnemonic>;
1771}
1772
1773class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op,
1774                                            RegisterOperand vector_ty,
1775                                            string mnemonic>
1776    : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm),
1777        mnemonic, "\t$Zdn, $_Zdn, $Zm",
1778        "", []>, Sched<[]> {
1779  bits<3> Zm;
1780  bits<3> Zdn;
1781  let Inst{31-24} = 0b11000001;
1782  let Inst{23-22} = sz;
1783  let Inst{21}    = 0b1;
1784  let Inst{20-18} = Zm;
1785  let Inst{17-11} = 0b0010111;
1786  let Inst{10-5}  = op{6-1};
1787  let Inst{4-2}   = Zdn;
1788  let Inst{1}     = 0b0;
1789  let Inst{0}     = op{0};
1790
1791  let Constraints = "$Zdn = $_Zdn";
1792}
1793
1794multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1795  def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1796  def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1797  def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1798}
1799
1800multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> {
1801  def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>;
1802  def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>;
1803  def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>;
1804  def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>;
1805}
1806
1807// SME2.1 fmax/fmin instructions.
1808multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> {
1809  def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r,
1810                                                 mnemonic>;
1811}
1812
1813//===----------------------------------------------------------------------===//
1814// SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources
1815
1816class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty,
1817                                     RegisterOperand multi_vector_ty,
1818                                     string mnemonic, string vg_acronym="">
1819    : I<(outs MatrixOp32:$ZAda),
1820        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3),
1821        mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3",
1822        "", []>, Sched<[]> {
1823  bits<4> Zm;
1824  bits<2> Rv;
1825  let Inst{31-24} = 0b11000001;
1826  let Inst{23-22} = op0;
1827  let Inst{21}    = 0b0;
1828  let Inst{20}    = !if(!eq(vg_acronym, ""), 0, 1);
1829  let Inst{19-16} = Zm;
1830  let Inst{14-13} = Rv;
1831  let Inst{12}    = 0b1;
1832  let Inst{4-3}   = op;
1833
1834  let Constraints = "$ZAda = $_ZAda";
1835}
1836
1837multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1838  def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16,
1839                                          mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> {
1840    bits<3> i3;
1841    bits<5> Zn;
1842    bits<3> imm;
1843    let Inst{15}    = i3{2};
1844    let Inst{11-10} = i3{1-0};
1845    let Inst{9-5}   = Zn;
1846    let Inst{2-0}   = imm;
1847  }
1848
1849  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1850
1851  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>;
1852}
1853
1854class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op>
1855    : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r,
1856                                     mnemonic, "vgx2"> {
1857  bits<3> i3;
1858  bits<4> Zn;
1859  bits<2> imm;
1860  let Inst{15}    = 0b0;
1861  let Inst{11-10} = i3{2-1};
1862  let Inst{9-6}   = Zn;
1863  let Inst{5}     = 0b0;
1864  let Inst{2}     = i3{0};
1865  let Inst{1-0}   = imm;
1866}
1867
1868multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1869  def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1870
1871  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1872
1873  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
1874
1875  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1876                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1877}
1878
1879multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1880  def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>;
1881
1882  def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1883
1884  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
1885
1886  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1887                 (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1888}
1889
1890class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op>
1891    : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r,
1892                                      mnemonic, "vgx4"> {
1893  bits<3> i3;
1894  bits<3> Zn;
1895  bits<2> imm;
1896  let Inst{15}    = 0b1;
1897  let Inst{11-10} = i3{2-1};
1898  let Inst{9-7}   = Zn;
1899  let Inst{6-5}   = 0b00;
1900  let Inst{2}     = i3{0};
1901  let Inst{1-0}   = imm;
1902}
1903
1904multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1905  def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1906
1907  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1908
1909  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>;
1910
1911  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1912                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1913}
1914
1915multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
1916  def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>;
1917
1918  def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
1919
1920  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>;
1921
1922  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3",
1923                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>;
1924}
1925
1926class sme2_mla_long_array<bits<2>op0, bits<2> op,
1927                          MatrixOperand matrix_ty,
1928                          Operand index_ty,
1929                          RegisterOperand first_vector_ty,
1930                          RegisterOperand second_vector_ty,
1931                          string mnemonic, string vg_acronym="">
1932   : I<(outs matrix_ty:$ZAda),
1933       (ins  matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv,
1934       index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm),
1935       mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm",
1936       "", []> , Sched<[]> {
1937  bits<2> Rv;
1938  let Inst{31-24} = 0b11000001;
1939  let Inst{23-22} = op0;
1940  let Inst{21}    = 0b1;
1941  let Inst{15}    = 0b0;
1942  let Inst{14-13} = Rv;
1943  let Inst{12-11} = 0b01;
1944  let Inst{10}    = !if(!eq(vg_acronym, ""), 1, 0);
1945  let Inst{4-3}   = op;
1946
1947  let Constraints = "$ZAda = $_ZAda";
1948}
1949
1950multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> {
1951  def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16,
1952                               mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{
1953    bits<4> Zm;
1954    bits<5> Zn;
1955    bits<3> imm;
1956    let Inst{20}    = 0b0;
1957    let Inst{19-16} = Zm;
1958    let Inst{9-5}   = Zn;
1959    let Inst{2-0}   = imm;
1960  }
1961
1962  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>;
1963
1964  def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>;
1965}
1966
1967class sme2_mla_long_array_single_16b<string mnemonic>
1968    : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8,  mnemonic> {
1969    bits<4> Zm;
1970    bits<5> Zn;
1971    bits<3> imm;
1972    let Inst{20}    = 0b1;
1973    let Inst{19-16} = Zm;
1974    let Inst{9-5}   = Zn;
1975    let Inst{2-0}   = imm;
1976}
1977
1978class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2,
1979                                      MatrixOperand matrix_ty, RegisterOperand multi_vector_ty,
1980                                      ZPRRegOp zpr_ty, string mnemonic, string vg_acronym>
1981    : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty,
1982                          mnemonic, vg_acronym> {
1983  bits<4> Zm;
1984  bits<5> Zn;
1985  bits<2> imm;
1986  let Inst{20}    = vg4;
1987  let Inst{19-16} = Zm;
1988  let Inst{9-5}   = Zn;
1989  let Inst{2}     = o2;
1990  let Inst{1-0}   = imm;
1991}
1992
1993multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
1994                                             RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
1995                                             ValueType zpr_ty, SDPatternOperator intrinsic> {
1996  def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty,  multi_vector_ty,
1997                                           vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>;
1998
1999  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty,
2000                                                        vector_ty, SMEMatrixArray>;
2001
2002  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
2003                                           tileslicerange2s2>;
2004
2005  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2006                 (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2007                  uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
2008}
2009
2010multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2011  def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic,
2012                                             "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>;
2013
2014  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>;
2015
2016  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
2017
2018  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2019                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>;
2020}
2021
2022multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2023                                             RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
2024                                             ValueType zpr_ty, SDPatternOperator intrinsic> {
2025  def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty,
2026                                             vector_ty, mnemonic, "vgx4">,
2027                                             SMEPseudo2Instr<NAME, 1>;
2028
2029  def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty,
2030                                                      SMEMatrixArray>;
2031
2032  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty,
2033                                           tileslicerange2s2>;
2034
2035  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2036                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv,
2037                  uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>;
2038}
2039
2040multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2041  def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16,  mnemonic,
2042                                           "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>;
2043
2044  def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>;
2045
2046  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>;
2047
2048  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2049                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>;
2050}
2051
2052class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op,
2053                                    MatrixOperand matrix_ty, RegisterOperand multi_vector_ty>
2054   : sme2_mla_long_array<op0, op{1-0},  matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2055                        mnemonic, "vgx2"> {
2056  bits<4> Zm;
2057  bits<4> Zn;
2058  bits<2> imm;
2059  let Inst{20-17} = Zm;
2060  let Inst{16}    = 0b0;
2061  let Inst{9-6}   = Zn;
2062  let Inst{5}     = op{2};  // fp8
2063  let Inst{2}     = 0b0;
2064  let Inst{1-0}   = imm;
2065}
2066
2067multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2068                                            RegisterOperand multi_vector_ty,
2069                                            ValueType zpr_ty, SDPatternOperator intrinsic> {
2070
2071  def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2072                                           SMEPseudo2Instr<NAME, 1>;
2073
2074  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2075
2076  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2077
2078  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2079                  (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2080                  uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2081}
2082
2083multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2084  def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>,
2085                                         SMEPseudo2Instr<NAME # _HtoS, 1>;
2086
2087  def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>;
2088
2089  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2090
2091  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2092                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>;
2093}
2094
2095class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op,
2096                                    MatrixOperand matrix_ty,
2097                                    RegisterOperand multi_vector_ty>
2098   : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty,
2099                         mnemonic, "vgx4"> {
2100  bits<3> Zm;
2101  bits<3> Zn;
2102  bits<2> imm;
2103  let Inst{20-18} = Zm;
2104  let Inst{17}    = 0b0;
2105  let Inst{16}    = 0b1;
2106  let Inst{9-7}   = Zn;
2107  let Inst{6}     = 0b0;
2108  let Inst{5}     = op{2};  //fp8
2109  let Inst{2}     = 0b0;
2110  let Inst{1-0}   = imm;
2111}
2112
2113multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty,
2114                                            RegisterOperand multi_vector_ty, ValueType zpr_ty,
2115                                            SDPatternOperator intrinsic> {
2116  def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>,
2117                                           SMEPseudo2Instr<NAME, 1>;
2118
2119  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>;
2120
2121  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>;
2122
2123  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2124                 (!cast<Instruction>(NAME) matrix_ty:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2125                  uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>;
2126}
2127
2128multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2129  def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>,
2130                                            SMEPseudo2Instr<NAME # _HtoS, 1>;
2131
2132  def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>;
2133
2134  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>;
2135
2136  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm",
2137                 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>;
2138}
2139
2140//===----------------------------------------------------------------------===//
2141class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty,
2142                               RegisterOperand second_ty, string mnemonic>
2143    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2144        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2145  bits<4> Zn;
2146  bits<4> Zd;
2147  let Inst{31-24} = 0b11000001;
2148  let Inst{23-22} = sz;
2149  let Inst{21-20} = 0b10;
2150  let Inst{19-16} = op{4-1};
2151  let Inst{15-10} = 0b111000;
2152  let Inst{9-6}   = Zn;
2153  let Inst{5}     = op{0};
2154  let Inst{4-1}   = Zd;
2155  let Inst{0}     = 0b0;
2156}
2157
2158// SME2 multi-vec FP to int convert two registers
2159// SME2 multi-vec int to FP two registers
2160multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> {
2161  def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2162}
2163
2164// SME2 multi-vec FRINT two registers
2165multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> {
2166  def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>;
2167}
2168
2169class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty,
2170                                   RegisterOperand second_ty, string mnemonic>
2171    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2172        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2173  bits<3> Zn;
2174  bits<3> Zd;
2175  let Inst{31-24} = 0b11000001;
2176  let Inst{23-22} = sz;
2177  let Inst{21-20} = 0b11;
2178  let Inst{19-16} = op{6-3};
2179  let Inst{15-10} = 0b111000;
2180  let Inst{9-7}   = Zn;
2181  let Inst{6-5}   = op{2-1};
2182  let Inst{4-2}   = Zd;
2183  let Inst{1}     = op{0};
2184  let Inst{0}     = 0b0;
2185}
2186
2187// SME2 multi-vec FP to int convert four registers
2188// SME2 multi-vec int to FP four registers
2189multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
2190  def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
2191}
2192
2193// SME2 multi-vec quadwords ZIP four registers
2194multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> {
2195  def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r,
2196                                        mnemonic>;
2197  def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r,
2198                                        mnemonic>;
2199  def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2200                                        mnemonic>;
2201  def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r,
2202                                         mnemonic>;
2203}
2204
2205// SME2 multi-vec quadwords ZIP four registers
2206multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> {
2207  def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r,
2208                                         mnemonic>;
2209}
2210
2211// SME2 multi-vec FRINT four registers
2212multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> {
2213  def _S :  sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r,
2214                                         mnemonic>;
2215}
2216
2217class sme2_cvt_vg2_single<string mnemonic, bits<5> op,
2218                           RegisterOperand first_ty, RegisterOperand second_ty>
2219    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2220        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2221  bits<4> Zn;
2222  bits<5> Zd;
2223  let Inst{31-23} = 0b110000010;
2224  let Inst{22}    = op{4};
2225  let Inst{21-19} = 0b100;
2226  let Inst{18-16} = op{3-1};
2227  let Inst{15-10} = 0b111000;
2228  let Inst{9-6}   = Zn;
2229  let Inst{5}     = op{0};
2230  let Inst{4-0}   = Zd;
2231}
2232
2233// SME2 multi-vec FP down convert two registers
2234// SME2 multi-vec int down convert two registers
2235multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt,
2236                               ValueType in_vt, SDPatternOperator intrinsic> {
2237  def NAME :  sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>;
2238  def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
2239}
2240
2241// SME2 multi-vec FP8 down convert two registers
2242multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> {
2243  def NAME :  sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>;
2244}
2245
2246class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
2247                           RegisterOperand second_ty, string mnemonic>
2248    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2249        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2250  bits<5> Zn;
2251  bits<4> Zd;
2252  let Inst{31-24} = 0b11000001;
2253  let Inst{23-22} = sz;
2254  let Inst{21-19} = 0b100;
2255  let Inst{18-16} = op;
2256  let Inst{15-10} = 0b111000;
2257  let Inst{9-5}   = Zn;
2258  let Inst{4-1}   = Zd;
2259  let Inst{0}     = u;
2260}
2261
2262// SME2 multi-vec unpack two registers
2263multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> {
2264  def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>;
2265  def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>;
2266  def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>;
2267}
2268
2269// SME2.1 multi-vec convert two registers
2270multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {
2271  def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>;
2272}
2273
2274// SME2 multi-vec FP8 up convert two registers
2275multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
2276  def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>;
2277}
2278
2279
2280class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2,  RegisterOperand first_ty,
2281                          RegisterOperand second_ty, string mnemonic>
2282    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2283        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2284  bits<3> Zn;
2285  bits<5> Zd;
2286  let Inst{31-24} = 0b11000001;
2287  let Inst{23}    = sz;
2288  let Inst{22}    = op{2};
2289  let Inst{21-20} = 0b11;
2290  let Inst{19-16} = op2;
2291  let Inst{15-10} = 0b111000;
2292  let Inst{9-7}   = Zn;
2293  let Inst{6-5}   = op{1-0};
2294  let Inst{4-0}   = Zd;
2295}
2296
2297// SME2 multi-vec int down convert four registers
2298multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
2299  def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2300  def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>;
2301
2302  def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>;
2303  def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>;
2304}
2305
2306//SME2 multi-vec FP8 down convert four registers
2307multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> {
2308 def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>;
2309}
2310
2311class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty,
2312                           RegisterOperand second_ty, string mnemonic>
2313    : I<(outs first_ty:$Zd), (ins second_ty:$Zn),
2314        mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> {
2315  bits<4> Zn;
2316  bits<3> Zd;
2317  let Inst{31-24} = 0b11000001;
2318  let Inst{23-22} = sz;
2319  let Inst{21-10} = 0b110101111000;
2320  let Inst{9-6}   = Zn;
2321  let Inst{5}     = 0b0;
2322  let Inst{4-2}   = Zd;
2323  let Inst{1}     = 0b0;
2324  let Inst{0}     = u;
2325}
2326
2327// SME2 multi-vec UNPK four registers
2328multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> {
2329  def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>;
2330  def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>;
2331  def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>;
2332}
2333
2334//===----------------------------------------------------------------------===//
2335// SME2 multi-vec CLAMP registers
2336
2337class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u,
2338                                   RegisterOperand multi_vector_ty,
2339                                   ZPRRegOp vector_ty, string mnemonic>
2340    : I<(outs multi_vector_ty:$Zd),
2341        (ins  multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm),
2342        mnemonic, "\t$Zd, $Zn, $Zm",
2343        "", []>, Sched<[]>{
2344  bits<5> Zm;
2345  bits<5> Zn;
2346  let Inst{31-24} = 0b11000001;
2347  let Inst{23-22} = sz;
2348  let Inst{21}    = 0b1;
2349  let Inst{20-16} = Zm;
2350  let Inst{15-13} = 0b110;
2351  let Inst{12-10} = op1;
2352  let Inst{9-5}   = Zn;
2353  let Inst{0}     = u;
2354
2355  let Constraints = "$Zd = $_Zd";
2356}
2357
2358class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u,
2359                                  RegisterOperand multi_vector_ty,
2360                                  ZPRRegOp vector_ty, string mnemonic>
2361    : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty,
2362                                   mnemonic>{
2363  bits<4> Zd;
2364  let Inst{4-1} = Zd;
2365}
2366
2367multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{
2368  def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>;
2369  def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>;
2370  def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>;
2371}
2372
2373multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{
2374  def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>;
2375  def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>;
2376  def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>;
2377  def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>;
2378}
2379
2380// SME2.1 multi-vec FCLAMP two registers
2381multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> {
2382  def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16,
2383                                           mnemonic>;
2384}
2385
2386class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u,
2387                                  RegisterOperand multi_vector_ty,
2388                                  ZPRRegOp vector_ty, string mnemonic>
2389    : sme2_clamp_vector_vg24_multi<sz, op1, u,  multi_vector_ty, vector_ty,
2390                                   mnemonic>{
2391  bits<3> Zd;
2392  let Inst{4-2} = Zd;
2393  let Inst{1}   = 0b0;
2394}
2395
2396multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{
2397  def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2398  def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2399  def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2400}
2401
2402multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{
2403  def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>;
2404  def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>;
2405  def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>;
2406  def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>;
2407}
2408
2409// SME2.1 multi-vec FCLAMP four registers
2410multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> {
2411  def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16,
2412                                       mnemonic>;
2413}
2414
2415// SME2 multi-vec ZIP two registers
2416class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u,
2417                         RegisterOperand multi_vector_ty,
2418                         ZPRRegOp vector_ty, string mnemonic>
2419    : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
2420        mnemonic, "\t$Zd, $Zn, $Zm",
2421        "", []>, Sched<[]>{
2422  bits<4> Zd;
2423  bits<5> Zm;
2424  bits<5> Zn;
2425  let Inst{31-24} = 0b11000001;
2426  let Inst{23-22} = sz;
2427  let Inst{21}    = 0b1;
2428  let Inst{20-16} = Zm;
2429  let Inst{15-11} = 0b11010;
2430  let Inst{10}    = q;
2431  let Inst{9-5}   = Zn;
2432  let Inst{4-1}   = Zd;
2433  let Inst{0}     = u;
2434}
2435
2436multiclass sme2_zip_vector_vg2<string mnemonic, bit op> {
2437  def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>;
2438  def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>;
2439  def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>;
2440  def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>;
2441  def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>;
2442}
2443
2444//===----------------------------------------------------------------------===//
2445// SME2 Dot Products and MLA
2446class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty,
2447                                     RegisterOperand multi_vector_ty,
2448                                     ZPRRegOp vector_ty, Operand index_ty,
2449                                     string mnemonic>
2450    : I<(outs matrix_ty:$ZAda),
2451        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2452         multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2453         mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i",
2454        "", []>, Sched<[]> {
2455  bits<4> Zm;
2456  bits<2> Rv;
2457  bits<4> Zn;
2458  bits<3> imm3;
2459  let Inst{31-24} = 0b11000001;
2460  let Inst{23-22} = sz;
2461  let Inst{21-20} = 0b01;
2462  let Inst{19-16} = Zm;
2463  let Inst{15}    = 0b0;
2464  let Inst{14-13} = Rv;
2465  let Inst{12-10} = op{5-3};
2466  let Inst{9-6}   = Zn;
2467  let Inst{5-3}   = op{2-0};
2468  let Inst{2-0}   = imm3;
2469
2470  let Constraints = "$ZAda = $_ZAda";
2471}
2472
2473// SME2 multi-vec ternary indexed two registers 32-bit
2474multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op,
2475                                              RegisterOperand multi_vector_ty,
2476                                              ZPRRegOp vector_ty, ValueType vt,
2477                                              SDPatternOperator intrinsic> {
2478  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty,
2479                                             VectorIndexS32b_timm,  mnemonic>, SMEPseudo2Instr<NAME, 1> {
2480    bits<2> i;
2481    let Inst{11-10} = i;
2482  }
2483  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2484
2485  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2486
2487  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2488        (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2489        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2490}
2491
2492// SME2.1 multi-vec ternary indexed two registers 16-bit
2493multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op,
2494                                                RegisterOperand multi_vector_ty, ZPRRegOp vector_ty,
2495                                                ValueType vt, SDPatternOperator intrinsic> {
2496  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
2497                                            multi_vector_ty, vector_ty,
2498                                            VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2499    bits<3> i;
2500    let Inst{11-10} = i{2-1};
2501    let Inst{3}     = i{0};
2502  }
2503
2504  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b, SMEMatrixArray>;
2505
2506  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>;
2507
2508  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2509        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2510        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>;
2511}
2512
2513// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers
2514multiclass sme2p1_multi_vec_array_vg2_index_f8f16<string mnemonic, bits<2> sz, bits<3> op,
2515                                                  RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> {
2516  def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16,
2517                                            multi_vector_ty, zpr_ty,
2518                                            VectorIndexH, mnemonic> {
2519    bits<3> i;
2520    let Inst{11-10} = i{2-1};
2521    let Inst{3}     = i{0};
2522  }
2523
2524  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2525        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2526        multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2527}
2528
2529// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision
2530// two registers
2531class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T>
2532   : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32,
2533                                    ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> {
2534
2535  bits<2> i;
2536  let Inst{10} = i{1};
2537  let Inst{3}  = i{0};
2538  let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}");
2539}
2540
2541// SME2 multi-vec ternary indexed two registers 64-bit
2542
2543class sme2_multi_vec_array_vg2_index_64b<bits<2> op,
2544                                         RegisterOperand multi_vector_ty,
2545                                         ZPRRegOp vector_ty,
2546                                         string mnemonic>
2547    : I<(outs MatrixOp64:$ZAda),
2548        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2549         multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2550        mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1",
2551        "", []>, Sched<[]> {
2552  bits<4> Zm;
2553  bits<2> Rv;
2554  bits<1> i1;
2555  bits<4> Zn;
2556  bits<3> imm3;
2557  let Inst{31-20} = 0b110000011101;
2558  let Inst{19-16} = Zm;
2559  let Inst{15}    = 0b0;
2560  let Inst{14-13} = Rv;
2561  let Inst{12-11} = 0b00;
2562  let Inst{10}    = i1;
2563  let Inst{9-6}   = Zn;
2564  let Inst{5}     = 0b0;
2565  let Inst{4-3}   = op;
2566  let Inst{2-0}   = imm3;
2567
2568  let Constraints = "$ZAda = $_ZAda";
2569}
2570
2571multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op,
2572                                              RegisterOperand multi_vector_ty,
2573                                              ZPRRegOp vector_ty, ValueType vt,
2574                                              SDPatternOperator intrinsic> {
2575  def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty,
2576                                                mnemonic>, SMEPseudo2Instr<NAME, 1>;
2577
2578  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2579
2580  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>;
2581
2582  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2583        (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2584        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2585}
2586
2587class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty,
2588                                     RegisterOperand multi_vector_ty,
2589                                     ZPRRegOp vector_ty, Operand index_ty,
2590                                     string mnemonic>
2591    : I<(outs matrix_ty:$ZAda),
2592        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2593         multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i),
2594         mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i",
2595        "", []>, Sched<[]> {
2596  bits<4> Zm;
2597  bits<2> Rv;
2598  bits<3> Zn;
2599  bits<3> imm3;
2600  let Inst{31-23} = 0b110000010;
2601  let Inst{22}    = sz;
2602  let Inst{21-20} = 0b01;
2603  let Inst{19-16} = Zm;
2604  let Inst{15}    = 0b1;
2605  let Inst{14-13} = Rv;
2606  let Inst{12-10} = op{6-4};
2607  let Inst{9-7}   = Zn;
2608  let Inst{6-3}   = op{3-0};
2609  let Inst{2-0}   = imm3;
2610
2611  let Constraints = "$ZAda = $_ZAda";
2612}
2613
2614// SME2 multi-vec ternary indexed four registers 32-bit
2615multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op,
2616                                              RegisterOperand multi_vector_ty,
2617                                              ZPRRegOp vector_ty, ValueType vt,
2618                                              SDPatternOperator intrinsic> {
2619  def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32,  multi_vector_ty,
2620                                            vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2621   bits<2> i;
2622   let Inst{11-10} = i;
2623  }
2624
2625  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>;
2626
2627  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>;
2628
2629  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2630        (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2631        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>;
2632}
2633
2634// SME2.1 multi-vec ternary indexed four registers 16-bit (FP8)
2635multiclass sme2p1_multi_vec_array_vg4_index_f8f16<string mnemonic, bits<3> op,
2636                                                  RegisterOperand multi_vector_ty,
2637                                                  ZPRRegOp zpr_ty> {
2638  def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
2639                                            multi_vector_ty, zpr_ty,
2640                                            VectorIndexH, mnemonic>{
2641    bits<3> i;
2642    let Inst{11-10} = i{2-1};
2643    let Inst{3}     = i{0};
2644  }
2645
2646  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2647        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2648        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>;
2649}
2650
2651// SME2.1 multi-vec ternary indexed four registers 16-bit
2652multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op,
2653                                                RegisterOperand multi_vector_ty,
2654                                                ZPRRegOp vector_ty, ValueType vt,
2655                                                SDPatternOperator intrinsic> {
2656  def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16,
2657                                            multi_vector_ty, vector_ty,
2658                                            VectorIndexH, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2659    bits<3> i;
2660    let Inst{11-10} = i{2-1};
2661    let Inst{3}     = i{0};
2662  }
2663
2664  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexH32b_timm, SMEMatrixArray>;
2665
2666  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexH32b_timm, tileslice16>;
2667
2668  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2669        (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2670        sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexH:$i), 0>;
2671}
2672
2673// SME2 multi-vec ternary indexed four registers 64-bit
2674class sme2_multi_vec_array_vg4_index_64b<bits<3> op,
2675                                         RegisterOperand multi_vector_ty,
2676                                         ZPRRegOp vector_ty,
2677                                         string mnemonic>
2678    : I<(outs MatrixOp64:$ZAda),
2679        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2680         multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1),
2681        mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1",
2682        "", []>, Sched<[]> {
2683  bits<4> Zm;
2684  bits<2> Rv;
2685  bits<1> i1;
2686  bits<3> Zn;
2687  bits<3> imm3;
2688  let Inst{31-20} = 0b110000011101;
2689  let Inst{19-16} = Zm;
2690  let Inst{15}    = 0b1;
2691  let Inst{14-13} = Rv;
2692  let Inst{12}    = 0b0;
2693  let Inst{11}    = op{2};
2694  let Inst{10}    = i1;
2695  let Inst{9-7}   = Zn;
2696  let Inst{6-5}   = 0b00;
2697  let Inst{4-3}   = op{1-0};
2698  let Inst{2-0}   = imm3;
2699
2700  let Constraints = "$ZAda = $_ZAda";
2701}
2702
2703multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
2704                                              RegisterOperand multi_vector_ty,
2705                                              ZPRRegOp vector_ty, ValueType vty,
2706                                              SDPatternOperator intrinsic> {
2707  def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty,
2708                                                mnemonic>, SMEPseudo2Instr<NAME, 1>;
2709
2710  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>;
2711
2712  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>;
2713
2714  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1",
2715        (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3,
2716        multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>;
2717}
2718
2719// FMLAL (multiple and indexed vector, FP8 to FP16)
2720class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op,
2721                                          RegisterOperand multi_vector_ty, string mnemonic>
2722    : I<(outs MatrixOp16:$ZAda),
2723        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2,
2724         multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
2725         mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2726         "", []>, Sched<[]> {
2727  bits<4> Zm;
2728  bits<2> Rv;
2729  bits<4> i;
2730  bits<2> imm2;
2731  let Inst{31-24} = 0b11000001;
2732  let Inst{23-22} = sz;
2733  let Inst{21-20} = 0b01;
2734  let Inst{19-16} = Zm;
2735  let Inst{15}    = vg4;
2736  let Inst{14-13} = Rv;
2737  let Inst{12}    = op{2};
2738  let Inst{11-10} = i{3-2};
2739  let Inst{5-4}   = op{1-0};
2740  let Inst{3-2}   = i{1-0};
2741  let Inst{1-0}   = imm2;
2742
2743  let Constraints = "$ZAda = $_ZAda";
2744}
2745
2746multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> {
2747  def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> {
2748    bits<4> Zn;
2749    let Inst{9-6} = Zn;
2750 }
2751 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2752                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2753                  uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2754}
2755
2756multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> {
2757  def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> {
2758    bits<3> Zn;
2759    let Inst{9-7} = Zn;
2760    let Inst{6}   = 0b0;
2761  }
2762 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2763                 (!cast<Instruction>(NAME) MatrixOp16:$ZAda,  MatrixIndexGPR32Op8_11:$Rv,
2764                  uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
2765}
2766
2767//===----------------------------------------------------------------------===//
2768// SME2 multi-vec indexed long long MLA one source 16-bit
2769class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op>
2770    : I<(outs MatrixOp16:$ZAda),
2771        (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2772        mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i",
2773        "", []>, Sched<[]> {
2774  bits<4> Zm;
2775  bits<2> Rv;
2776  bits<4> i;
2777  bits<5> Zn;
2778  bits<3> imm3;
2779  let Inst{31-24} = 0b11000001;
2780  let Inst{23-22} = sz;
2781  let Inst{21-20} = 0b00;
2782  let Inst{19-16} = Zm;
2783  let Inst{15}    = i{3};
2784  let Inst{14-13} = Rv;
2785  let Inst{12}    = op{1};
2786  let Inst{11-10} = i{2-1};
2787  let Inst{9-5}   = Zn;
2788  let Inst{4}     = op{0};
2789  let Inst{3}     = i{0};
2790  let Inst{2-0}   = imm3;
2791
2792  let Constraints = "$ZAda = $_ZAda";
2793}
2794
2795// SME2 multi-vec indexed long long MLA one source 32-bit
2796class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op>
2797    : I<(outs MatrixOp32:$ZAda),
2798        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2799        mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2800        "", []>, Sched<[]> {
2801  bits<4> Zm;
2802  bits<2> Rv;
2803  bits<4> i;
2804  bits<5> Zn;
2805  bits<2> imm2;
2806  let Inst{31-24} = 0b11000001;
2807  let Inst{23-22} = sz;
2808  let Inst{21-20} = 0b00;
2809  let Inst{19-16} = Zm;
2810  let Inst{15}    = i{3};
2811  let Inst{14-13} = Rv;
2812  let Inst{12-10} = i{2-0};
2813  let Inst{9-5}   = Zn;
2814  let Inst{4-2}   = op;
2815  let Inst{1-0}   = imm2;
2816
2817  let Constraints = "$ZAda = $_ZAda";
2818}
2819
2820multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
2821  def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1>;
2822
2823  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2824
2825  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>;
2826}
2827
2828// SME2 multi-vec indexed long long MLA one source 64-bit
2829
2830class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
2831    : I<(outs MatrixOp64:$ZAda),
2832        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
2833        mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
2834        "", []>, Sched<[]> {
2835  bits<4> Zm;
2836  bits<2> Rv;
2837  bits<3> i;
2838  bits<5> Zn;
2839  bits<2> imm2;
2840  let Inst{31-20} = 0b110000011000;
2841  let Inst{19-16} = Zm;
2842  let Inst{15}    = i{2};
2843  let Inst{14-13} = Rv;
2844  let Inst{12}    = 0b0;
2845  let Inst{11-10} = i{1-0};
2846  let Inst{9-5}   = Zn;
2847  let Inst{4-3}   = op;
2848  let Inst{2}     = 0b0;
2849  let Inst{1-0}   = imm2;
2850
2851  let Constraints = "$ZAda = $_ZAda";
2852}
2853
2854multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2855  def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
2856
2857  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2858
2859  def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>;
2860}
2861
2862class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op,
2863                                       RegisterOperand vector_ty,
2864                                       string mnemonic>
2865    : I<(outs MatrixOp32:$ZAda),
2866        (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
2867             vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
2868        mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2869        "", []>, Sched<[]> {
2870  bits<4> Zm;
2871  bits<2> Rv;
2872  bits<4> i;
2873  bit     imm;
2874  let Inst{31-24} = 0b11000001;
2875  let Inst{23-22} = sz;
2876  let Inst{21-20} = 0b01;
2877  let Inst{19-16} = Zm;
2878  let Inst{15}    = vg4;
2879  let Inst{14-13} = Rv;
2880  let Inst{12}    = 0b0;
2881  let Inst{11-10} = i{3-2};
2882  let Inst{5-3}   = op;
2883  let Inst{2-1}   = i{1-0};
2884  let Inst{0}     = imm;
2885
2886  let Constraints = "$ZAda = $_ZAda";
2887}
2888
2889//SME2 multi-vec indexed long long MLA two sources 32-bit
2890
2891multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> {
2892  def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2893   bits<4> Zn;
2894   let Inst{9-6} = Zn;
2895  }
2896
2897  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2898
2899  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
2900
2901  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2902                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2903}
2904
2905// SME2 multi-vec indexed long long MLA four sources 32-bit
2906
2907multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic> {
2908  def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2909   bits<3> Zn;
2910   let Inst{9-7} = Zn;
2911   let Inst{6}   = op{3};
2912  }
2913
2914  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
2915
2916  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
2917
2918  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2919                 (!cast<Instruction>(NAME) MatrixOp32:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
2920}
2921class sme2_mla_ll_array_vg24_index_64b<bit vg4,  bits<2> op,
2922                                       RegisterOperand vector_ty,
2923                                       string mnemonic>
2924    : I<(outs MatrixOp64:$ZAda),
2925        (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
2926             vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
2927        mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
2928        "", []>, Sched<[]> {
2929  bits<4> Zm;
2930  bits<2> Rv;
2931  bits<3> i;
2932  bit     imm;
2933  let Inst{31-20} = 0b110000011001;
2934  let Inst{19-16} = Zm;
2935  let Inst{15}    = vg4;
2936  let Inst{14-13} = Rv;
2937  let Inst{12-11} = 0b00;
2938  let Inst{10}    = i{2};
2939  let Inst{5}     = 0b0;
2940  let Inst{4-3}   = op;
2941  let Inst{2-1}   = i{1-0};
2942  let Inst{0}     = imm;
2943
2944  let Constraints = "$ZAda = $_ZAda";
2945}
2946
2947// SME2 multi-vec indexed long long MLA two sources 64-bit
2948
2949multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2950  def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
2951    bits<4> Zn;
2952    let Inst{9-6} = Zn;
2953  }
2954
2955  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2956
2957  def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
2958
2959  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2960                 (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
2961}
2962
2963// SME2 multi-vec indexed long long MLA four sources 64-bit
2964
2965multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
2966  def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r,  mnemonic>, SMEPseudo2Instr<NAME, 1> {
2967    bits<3> Zn;
2968    let Inst{9-7} = Zn;
2969    let Inst{6}   = 0b0;
2970  }
2971
2972  def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
2973
2974  def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
2975
2976  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
2977                 (!cast<Instruction>(NAME) MatrixOp64:$ZAda,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
2978}
2979
2980
2981//SME2 multiple and single vector long long FMA one source
2982
2983class sme2_mla_ll_array_single<string mnemonic, bits<5> op,
2984                               MatrixOperand matrix_ty, ZPRRegOp vector_ty,
2985                               ZPRRegOp zpr_ty>
2986    : I<(outs matrix_ty:$ZAda),
2987        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm,
2988             vector_ty:$Zn, zpr_ty:$Zm),
2989        mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm",
2990        "", []>, Sched<[]> {
2991  bits<4> Zm;
2992  bits<2> Rv;
2993  bits<5> Zn;
2994  bits<2> imm;
2995  let Inst{31-23} = 0b110000010;
2996  let Inst{22}    = op{4}; //sz
2997  let Inst{21}    = 0b1;
2998  let Inst{20}    = op{3}; //fp8
2999  let Inst{19-16} = Zm;
3000  let Inst{15}    = 0b0;
3001  let Inst{14-13} = Rv;
3002  let Inst{12-10} = 0b001;
3003  let Inst{9-5}   = Zn;
3004  let Inst{4-2}   = op{2-0};
3005  let Inst{1-0}   = imm;
3006
3007  let Constraints = "$ZAda = $_ZAda";
3008}
3009
3010multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op,
3011                                    MatrixOperand matrix_ty, ZPRRegOp vector_ty,
3012                                    ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
3013  def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1>;
3014
3015  def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>;
3016
3017  def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>;
3018}
3019
3020class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty,
3021                                    RegisterOperand vector_ty, ZPRRegOp zpr_ty,
3022                                    string mnemonic>
3023    : I<(outs matrix_ty:$ZAda),
3024        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3025             vector_ty:$Zn, zpr_ty:$Zm),
3026        mnemonic, "\t$ZAda[$Rv, $imm,  " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm",
3027        "", []>, Sched<[]> {
3028  bits<4> Zm;
3029  bits<2> Rv;
3030  bits<5> Zn;
3031  bit     imm;
3032  let Inst{31-23} = 0b110000010;
3033  let Inst{22}    = op{5}; //sz
3034  let Inst{21}    = 0b1;
3035  let Inst{20}    = op{4}; //vg4
3036  let Inst{19-16} = Zm;
3037  let Inst{15}    = 0b0;
3038  let Inst{14-13} = Rv;
3039  let Inst{12-10} = 0b000;
3040  let Inst{9-5}   = Zn;
3041  let Inst{4-1}   = op{3-0};
3042  let Inst{0}     = imm;
3043
3044  let Constraints = "$ZAda = $_ZAda";
3045}
3046
3047//SME2 single-multi long long MLA two and four sources
3048
3049multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op,
3050                                          MatrixOperand matrix_ty,
3051                                          RegisterOperand multi_vector_ty,
3052                                          ZPRRegOp zpr_ty> {
3053  def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty,
3054                                          zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3055
3056  def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>;
3057
3058  def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm",
3059                 (!cast<Instruction>(NAME) matrix_ty:$ZAd,  MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
3060}
3061
3062multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<5> op,
3063                                        MatrixOperand matrix_ty,
3064                                        RegisterOperand multi_vector_ty,
3065                                        ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
3066
3067  defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
3068
3069  def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
3070}
3071
3072multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<5> op,
3073                                        MatrixOperand matrix_ty,
3074                                        RegisterOperand multi_vector_ty,
3075                                        ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
3076  defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>;
3077
3078  def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
3079}
3080
3081// SME2 multiple vectors long long MLA two sources
3082
3083class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty,
3084                                  RegisterOperand vector_ty,string mnemonic>
3085    : I<(outs matrix_ty:$ZAda),
3086        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3087             vector_ty:$Zn, vector_ty:$Zm),
3088        mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm",
3089        "", []>, Sched<[]> {
3090  bits<4> Zm;
3091  bits<2> Rv;
3092  bits<4> Zn;
3093  bit     imm;
3094  let Inst{31-23} = 0b110000011;
3095  let Inst{22}    = op{4};  // sz
3096  let Inst{21}    = 0b1;
3097  let Inst{20-17} = Zm;
3098  let Inst{16-15} = 0b00;
3099  let Inst{14-13} = Rv;
3100  let Inst{12-10} = 0b000;
3101  let Inst{9-6}   = Zn;
3102  let Inst{5-2}   = op{3-0};
3103  let Inst{1}     = 0b0;
3104  let Inst{0}     = imm;
3105
3106  let Constraints = "$ZAda = $_ZAda";
3107}
3108
3109multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op,
3110                                       MatrixOperand matrix_ty,
3111                                       RegisterOperand vector_ty,
3112                                       ValueType vt, SDPatternOperator intrinsic> {
3113  def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3114
3115  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3116
3117  def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3118
3119  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3120                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3121}
3122
3123// SME2 multiple vectors long long MLA four sources
3124
3125class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty,
3126                                  RegisterOperand vector_ty,
3127                                  string mnemonic>
3128    : I<(outs matrix_ty:$ZAda),
3129        (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
3130             vector_ty:$Zn, vector_ty:$Zm),
3131        mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm",
3132        "", []>, Sched<[]> {
3133  bits<3> Zm;
3134  bits<2> Rv;
3135  bits<3> Zn;
3136  bit     imm;
3137  let Inst{31-23} = 0b110000011;
3138  let Inst{22}    = op{4}; // sz
3139  let Inst{21}    = 0b1;
3140  let Inst{20-18} = Zm;
3141  let Inst{17-15} = 0b010;
3142  let Inst{14-13} = Rv;
3143  let Inst{12-10} = 0b000;
3144  let Inst{9-7}   = Zn;
3145  let Inst{6}     = 0b0;
3146  let Inst{5-2}   = op{3-0};
3147  let Inst{1}     = 0b0;
3148  let Inst{0}     = imm;
3149
3150  let Constraints = "$ZAda = $_ZAda";
3151}
3152
3153multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op,
3154                                       MatrixOperand matrix_ty,
3155                                       RegisterOperand vector_ty,
3156                                       ValueType vt, SDPatternOperator intrinsic> {
3157  def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3158
3159  def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
3160
3161  def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
3162
3163  def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
3164                 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
3165}
3166
3167//===----------------------------------------------------------------------===//
3168// SME2 Outer Product and Accumulate
3169
3170multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3171  def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
3172    bits<2> ZAda;
3173    let Inst{1-0} = ZAda;
3174    let Inst{2}   = 0b0;
3175  }
3176
3177  def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3178
3179  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>;
3180}
3181
3182multiclass  sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
3183  def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>;
3184
3185  def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
3186
3187  def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
3188}
3189
3190//===----------------------------------------------------------------------===///
3191// SME2 Zero Lookup Table.
3192class sme2_zero_zt<string mnemonic, bits<4> opc>
3193    : I<(outs ZTR:$ZT), (ins ),
3194         mnemonic, "\t\\{ $ZT \\}",
3195         "", []>, Sched<[]> {
3196  let Inst{31-4} = 0b1100000001001000000000000000;
3197  let Inst{3-0}  = opc;
3198}
3199
3200multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
3201  def NAME : sme2_zero_zt<mnemonic, opc>;
3202  def NAME # _PSEUDO
3203        : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
3204    // Translated to actual instruction in AArch64ISelLowering.cpp
3205    let usesCustomInserter = 1;
3206  }
3207  def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
3208          (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
3209}
3210
3211//===----------------------------------------------------------------------===//
3212// SME2 lookup table load/store
3213class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
3214    : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)),
3215        !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)),
3216        mnemonic, "\t$ZTt, [$Rn]",
3217        "", []>, Sched<[]> {
3218  bits<5> Rn;
3219  let Inst{31-22} = 0b1110000100;
3220  let Inst{21-16} = opc{7-2};
3221  let Inst{15-10} = 0b100000;
3222  let Inst{9-5}   = Rn;
3223  let Inst{4-2}   = 0b000;
3224  let Inst{1-0}   = opc{1-0};
3225
3226  let mayLoad     = !not(opc{7});
3227  let mayStore    = opc{7};
3228}
3229
3230
3231multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> {
3232  def NAME : sme2_spill_fill_vector<mnemonic, opc>;
3233  def NAME # _PSEUDO
3234      : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> {
3235    // Translated to actual instruction in AArch64ISelLowering.cpp
3236    let usesCustomInserter = 1;
3237  }
3238  def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base),
3239            (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>;
3240}
3241
3242//===----------------------------------------------------------------------===///
3243// SME2 move to/from lookup table
3244class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc>
3245    : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3),
3246         mnemonic, "\t$Rt, $ZTt[$imm3]",
3247         "", []>, Sched<[]> {
3248  bits<3> imm3;
3249  bits<5> Rt;
3250  let Inst{31-15} = 0b11000000010011000;
3251  let Inst{14-12} = imm3;
3252  let Inst{11-5}  = opc;
3253  let Inst{4-0}   = Rt;
3254}
3255
3256class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc>
3257    : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt),
3258         mnemonic, "\t$ZTt[$imm3], $Rt",
3259         "", []>, Sched<[]> {
3260  bits<3> imm3;
3261  bits<5> Rt;
3262  let Inst{31-15} = 0b11000000010011100;
3263  let Inst{14-12} = imm3;
3264  let Inst{11-5}  = opc;
3265  let Inst{4-0}   = Rt;
3266}
3267
3268// SME2 move vector to lookup table
3269class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc>
3270   : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt),
3271        mnemonic, "\t$ZTt[$off2, mul vl], $Zt",
3272        "", []>, Sched<[]> {
3273  bits<5> Zt;
3274  bits<2> off2;
3275  let Inst{31-14} = 0b110000000100111100;
3276  let Inst{13-12} = off2;
3277  let Inst{11-5}  = opc;
3278  let Inst{4-0}   = Zt;
3279}
3280
3281multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> {
3282  def NAME : sme2_movt_zt_to_zt<mnemonic, opc>;
3283  def : InstAlias<mnemonic # "\t$ZTt, $Zt",
3284                 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>;
3285}
3286
3287//===----------------------------------------------------------------------===//
3288// SME2 lookup table expand one register
3289class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty,
3290                             AsmVectorIndexOpnd index_ty, string mnemonic>
3291    : I<(outs vector_ty:$Zd),
3292        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3293        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3294        "", []>, Sched<[]> {
3295  bits<5> Zn;
3296  bits<5> Zd;
3297  let Inst{31-19} = 0b1100000011001;
3298  let Inst{18-14} = opc{6-2};
3299  let Inst{13-12} = sz;
3300  let Inst{11-10} = opc{1-0};
3301  let Inst{9-5}   = Zn;
3302  let Inst{4-0}   = Zd;
3303}
3304
3305class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty,
3306                              string mnemonic>
3307    : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> {
3308  bits<4> i;
3309  let Inst{17-14} = i;
3310}
3311
3312multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3313  def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>;
3314  def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>;
3315  def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>;
3316
3317  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3318             (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3319  def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3320             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3321  def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3322             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3323  def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3324             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3325  def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3326             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3327  def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))),
3328             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>;
3329}
3330
3331class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty,
3332                              string mnemonic>
3333    : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> {
3334  bits<3> i;
3335  let Inst{16-14} = i;
3336}
3337
3338multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> {
3339  def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>;
3340  def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>;
3341  def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>;
3342
3343  def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3344             (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3345  def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3346             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3347  def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3348             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3349  def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3350             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3351  def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3352             (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3353  def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))),
3354             (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>;
3355}
3356
3357// SME2 lookup table expand two contiguous registers
3358class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty,
3359                                 AsmVectorIndexOpnd index_ty, string mnemonic>
3360    : I<(outs vector_ty:$Zd),
3361        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3362        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3363        "", []>, Sched<[]> {
3364  bits<5> Zn;
3365  bits<4> Zd;
3366  let Inst{31-19} = 0b1100000010001;
3367  let Inst{18-15} = opc{5-2};
3368  let Inst{14}    = 0b1;
3369  let Inst{13-12} = sz;
3370  let Inst{11-10} = opc{1-0};
3371  let Inst{9-5}   = Zn;
3372  let Inst{4-1}   = Zd;
3373  let Inst{0}     = 0b0;
3374}
3375
3376class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3377                                  string mnemonic>
3378    : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> {
3379  bits<3> i;
3380  let Inst{17-15} = i;
3381}
3382
3383multiclass sme2_luti2_vector_vg2_index<string mnemonic> {
3384  def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3385  def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3386  def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3387}
3388
3389class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
3390                                 string mnemonic>
3391    : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3392  bits<2> i;
3393  let Inst{16-15} = i;
3394}
3395
3396multiclass sme2_luti4_vector_vg2_index<string mnemonic> {
3397  def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>;
3398  def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>;
3399  def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>;
3400}
3401
3402// SME2 lookup table expand four contiguous registers
3403class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty,
3404                                 AsmVectorIndexOpnd index_ty, string mnemonic>
3405    : I<(outs vector_ty:$Zd),
3406        (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
3407        mnemonic, "\t$Zd, $ZTt, $Zn$i",
3408        "", []>, Sched<[]> {
3409  bits<5> Zn;
3410  bits<3> Zd;
3411  let Inst{31-19} = 0b1100000010001;
3412  let Inst{18-16} = opc{4-2};
3413  let Inst{15-14} = 0b10;
3414  let Inst{13-12} = sz;
3415  let Inst{11-10} = opc{1-0};
3416  let Inst{9-5}   = Zn;
3417  let Inst{4-2}   = Zd;
3418  let Inst{1-0}   = 0b00;
3419}
3420
3421class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3422                                  string mnemonic>
3423    : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> {
3424  bits<2> i;
3425  let Inst{17-16} = i;
3426}
3427
3428multiclass sme2_luti2_vector_vg4_index<string mnemonic> {
3429  def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>;
3430  def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3431  def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3432}
3433
3434class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
3435                                  string mnemonic>
3436    : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> {
3437  bits<1> i;
3438  let Inst{16}    = i;
3439}
3440
3441multiclass sme2_luti4_vector_vg4_index<string mnemonic> {
3442  def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>;
3443  def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>;
3444}
3445
3446//===----------------------------------------------------------------------===//
3447// SME2 MOV
3448class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v,
3449                                           RegisterOperand tile_ty,
3450                                           Operand index_ty,
3451                                           RegisterOperand vector_ty,
3452                                           string mnemonic>
3453   : I<(outs tile_ty:$ZAd),
3454       (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn),
3455       mnemonic, "\t$ZAd[$Rs, $imm], $Zn",
3456       "", []>, Sched<[]> {
3457  bits<2> Rs;
3458  bits<4> Zn;
3459  let Inst{31-24} = 0b11000000;
3460  let Inst{23-22} = sz;
3461  let Inst{21-16} = 0b000100;
3462  let Inst{15}    = v;
3463  let Inst{14-13} = Rs;
3464  let Inst{12-10} = 0b000;
3465  let Inst{9-6}   = Zn;
3466  let Inst{5-3}   = 0b000;
3467
3468  let Constraints = "$ZAd = $_ZAd";
3469}
3470
3471multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst,
3472                                                  RegisterOperand tile_or_array_ty,
3473                                                  RegisterOperand  rv_ty,
3474                                                  Operand index_ty,
3475                                                  RegisterOperand vector_ty,
3476                                                  string mnemonic,
3477                                                  string vg_acronym=""> {
3478  def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn",
3479                  (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>;
3480
3481}
3482
3483// SME2 move vector to tile, two registers
3484multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3485
3486  def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v,
3487                                                !if(v, TileVectorOpV8,
3488                                                       TileVectorOpH8),
3489                                                uimm3s2range,  ZZ_b_mul_r,
3490                                                mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3491    bits<3> imm;
3492    let Inst{2-0} = imm;
3493  }
3494
3495  def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v,
3496                                                !if(v, TileVectorOpV16,
3497                                                       TileVectorOpH16),
3498                                                uimm2s2range, ZZ_h_mul_r,
3499                                                mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3500    bits<1> ZAd;
3501    bits<2> imm;
3502    let Inst{2}   = ZAd;
3503    let Inst{1-0} = imm;
3504  }
3505
3506  def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v,
3507                                                !if(v, TileVectorOpV32,
3508                                                       TileVectorOpH32),
3509                                                 uimm1s2range, ZZ_s_mul_r,
3510                                                 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3511    bits<2> ZAd;
3512    bits<1> imm;
3513    let Inst{2-1} = ZAd;
3514    let Inst{0}   = imm;
3515  }
3516
3517  def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v,
3518                                                !if(v, TileVectorOpV64,
3519                                                       TileVectorOpH64),
3520                                                uimm0s2range, ZZ_d_mul_r,
3521                                                mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3522    bits<3> ZAd;
3523    let Inst{2-0} = ZAd;
3524   }
3525
3526  def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
3527  def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
3528  def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
3529  def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
3530
3531  def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>;
3532  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>;
3533  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>;
3534  def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>;
3535  def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>;
3536  def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>;
3537  def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>;
3538  def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>;
3539
3540  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
3541                                                !if(v, TileVectorOpV8,
3542                                                       TileVectorOpH8),
3543                                                MatrixIndexGPR32Op12_15,
3544                                                uimm3s2range,  ZZ_b_mul_r,
3545                                                "mov">;
3546  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
3547                                                !if(v, TileVectorOpV16,
3548                                                       TileVectorOpH16),
3549                                                MatrixIndexGPR32Op12_15,
3550                                                uimm2s2range,  ZZ_h_mul_r,
3551                                                "mov">;
3552  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
3553                                                !if(v, TileVectorOpV32,
3554                                                       TileVectorOpH32),
3555                                                MatrixIndexGPR32Op12_15,
3556                                                uimm1s2range,  ZZ_s_mul_r,
3557                                                "mov">;
3558  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
3559                                                !if(v, TileVectorOpV64,
3560                                                       TileVectorOpH64),
3561                                                MatrixIndexGPR32Op12_15,
3562                                                uimm0s2range,  ZZ_d_mul_r,
3563                                                "mov">;
3564
3565  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3566                                                !if(v, TileVectorOpV8,
3567                                                       TileVectorOpH8),
3568                                                MatrixIndexGPR32Op12_15,
3569                                                uimm3s2range,  ZZ_b_mul_r,
3570                                                "mova">;
3571  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3572                                                !if(v, TileVectorOpV16,
3573                                                       TileVectorOpH16),
3574                                                MatrixIndexGPR32Op12_15,
3575                                                uimm2s2range,  ZZ_h_mul_r,
3576                                                "mova">;
3577  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3578                                                !if(v, TileVectorOpV32,
3579                                                       TileVectorOpH32),
3580                                                MatrixIndexGPR32Op12_15,
3581                                                uimm1s2range,  ZZ_s_mul_r,
3582                                                "mova">;
3583  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3584                                                !if(v, TileVectorOpV64,
3585                                                       TileVectorOpH64),
3586                                                MatrixIndexGPR32Op12_15,
3587                                                uimm0s2range,  ZZ_d_mul_r,
3588                                                "mova">;
3589
3590  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3591                                                !if(v, TileVectorOpV8,
3592                                                       TileVectorOpH8),
3593                                                MatrixIndexGPR32Op12_15,
3594                                                uimm3s2range,  ZZ_b_mul_r,
3595                                                "mova">;
3596  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3597                                                !if(v, TileVectorOpV16,
3598                                                       TileVectorOpH16),
3599                                                MatrixIndexGPR32Op12_15,
3600                                                uimm2s2range,  ZZ_h_mul_r,
3601                                                "mova">;
3602  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3603                                                !if(v, TileVectorOpV32,
3604                                                       TileVectorOpH32),
3605                                                MatrixIndexGPR32Op12_15,
3606                                                uimm1s2range,  ZZ_s_mul_r,
3607                                                "mova">;
3608  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3609                                                !if(v, TileVectorOpV64,
3610                                                       TileVectorOpH64),
3611                                                MatrixIndexGPR32Op12_15,
3612                                                uimm0s2range,  ZZ_d_mul_r,
3613                                                "mova">;
3614}
3615
3616multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic,
3617                                           SDPatternOperator int_h, SDPatternOperator int_v>{
3618 defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>;
3619 defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>;
3620}
3621
3622class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
3623                                           RegisterOperand tile_ty,
3624                                           Operand index_ty,
3625                                           RegisterOperand vector_ty,
3626                                           string mnemonic>
3627   : I<(outs tile_ty:$ZAd),
3628       (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm,
3629            vector_ty:$Zn),
3630       mnemonic,
3631       "\t$ZAd[$Rs, $imm], $Zn",
3632       "", []>, Sched<[]> {
3633  bits<2> Rs;
3634  bits<3> Zn;
3635  let Inst{31-24} = 0b11000000;
3636  let Inst{23-22} = sz;
3637  let Inst{21-16} = 0b000100;
3638  let Inst{15}    = v;
3639  let Inst{14-13} = Rs;
3640  let Inst{12-10} = 0b001;
3641  let Inst{9-7}   = Zn;
3642  let Inst{6-3}   = 0b0000;
3643  let Inst{2-0}   = op;
3644  let Constraints = "$ZAd = $_ZAd";
3645}
3646
3647// SME2 move vector to tile, four registers
3648multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
3649
3650  def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?},
3651                                                !if(v, TileVectorOpV8,
3652                                                       TileVectorOpH8),
3653                                                uimm2s4range, ZZZZ_b_mul_r,
3654                                                mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
3655    bits<2> imm;
3656    let Inst{1-0} = imm;
3657  }
3658
3659  def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?},
3660                                                !if(v, TileVectorOpV16,
3661                                                       TileVectorOpH16),
3662                                                uimm1s4range, ZZZZ_h_mul_r,
3663                                                mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
3664    bits<1> ZAd;
3665    bits<1> imm;
3666    let Inst{1}   = ZAd;
3667    let Inst{0}   = imm;
3668  }
3669
3670  def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?},
3671                                                !if(v, TileVectorOpV32,
3672                                                       TileVectorOpH32),
3673                                                 uimm0s4range, ZZZZ_s_mul_r,
3674                                                 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
3675    bits<2> ZAd;
3676    let Inst{1-0} = ZAd;
3677  }
3678
3679  def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?},
3680                                                !if(v, TileVectorOpV64,
3681                                                       TileVectorOpH64),
3682                                                uimm0s4range, ZZZZ_d_mul_r,
3683                                                mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
3684    bits<3> ZAd;
3685    let Inst{2-0} = ZAd;
3686  }
3687
3688  def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
3689  def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
3690  def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
3691  def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
3692
3693  def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>;
3694  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>;
3695  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>;
3696  def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>;
3697  def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>;
3698  def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>;
3699  def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>;
3700  def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>;
3701
3702  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
3703                                                !if(v, TileVectorOpV8,
3704                                                       TileVectorOpH8),
3705                                                MatrixIndexGPR32Op12_15,
3706                                                uimm2s4range, ZZZZ_b_mul_r,
3707                                                "mov">;
3708  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H),
3709                                                !if(v, TileVectorOpV16,
3710                                                       TileVectorOpH16),
3711                                                MatrixIndexGPR32Op12_15,
3712                                                uimm1s4range, ZZZZ_h_mul_r,
3713                                                "mov">;
3714  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S),
3715                                                !if(v, TileVectorOpV32,
3716                                                       TileVectorOpH32),
3717                                                MatrixIndexGPR32Op12_15,
3718                                                uimm0s4range, ZZZZ_s_mul_r,
3719                                                "mov">;
3720  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D),
3721                                                !if(v, TileVectorOpV64,
3722                                                       TileVectorOpH64),
3723                                                MatrixIndexGPR32Op12_15,
3724                                                uimm0s4range, ZZZZ_d_mul_r,
3725                                                "mov">;
3726
3727  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B),
3728                                                !if(v, TileVectorOpV8,
3729                                                       TileVectorOpH8),
3730                                                MatrixIndexGPR32Op12_15,
3731                                                uimm2s4range, ZZZZ_b_mul_r,
3732                                                "mova">;
3733  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H),
3734                                                !if(v, TileVectorOpV16,
3735                                                       TileVectorOpH16),
3736                                                MatrixIndexGPR32Op12_15,
3737                                                uimm1s4range, ZZZZ_h_mul_r,
3738                                                "mova">;
3739  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S),
3740                                                !if(v, TileVectorOpV32,
3741                                                       TileVectorOpH32),
3742                                                MatrixIndexGPR32Op12_15,
3743                                                uimm0s4range, ZZZZ_s_mul_r,
3744                                                "mova">;
3745  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D),
3746                                                !if(v, TileVectorOpV64,
3747                                                       TileVectorOpH64),
3748                                                MatrixIndexGPR32Op12_15,
3749                                                uimm0s4range, ZZZZ_d_mul_r,
3750                                                "mova">;
3751
3752}
3753
3754multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic,
3755                                           SDPatternOperator int_h, SDPatternOperator int_v>{
3756 defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>;
3757 defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>;
3758}
3759
3760// SME Move into Array
3761class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty,
3762                                        RegisterOperand vector_ty,
3763                                        string mnemonic,
3764                                        string vg_acronym="">
3765   : I<(outs array_ty:$ZAd),
3766       (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm,
3767            vector_ty:$Zn),
3768       mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn",
3769       "", []>, Sched<[]> {
3770  bits<2> Rs;
3771  bits<3> imm;
3772  let Inst{31-15} = 0b11000000000001000;
3773  let Inst{14-13} = Rs;
3774  let Inst{12-11} = 0b01;
3775  let Inst{10-6}  = op;
3776  let Inst{5-3}   = 0b000;
3777  let Inst{2-0}   = imm;
3778
3779  let Constraints = "$ZAd = $_ZAd";
3780}
3781
3782// MOVA (vector to array, two registers)
3783multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> {
3784  def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64,
3785                                               ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> {
3786   bits<4> Zn;
3787   let Inst{9-6} = Zn;
3788  }
3789
3790  def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
3791
3792  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8,  sme_elm_idx0_7, tileslice16>;
3793  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16,  sme_elm_idx0_7, tileslice16>;
3794  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16,  sme_elm_idx0_7, tileslice16>;
3795  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3796  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32,  sme_elm_idx0_7, tileslice16>;
3797  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32,  sme_elm_idx0_7, tileslice16>;
3798  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64,  sme_elm_idx0_7, tileslice16>;
3799  def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64,  sme_elm_idx0_7, tileslice16>;
3800
3801  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3802                                                MatrixOp8,
3803                                                MatrixIndexGPR32Op8_11,
3804                                                sme_elm_idx0_7, ZZ_b_mul_r,
3805                                                "mova">;
3806  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3807                                                MatrixOp16,
3808                                                MatrixIndexGPR32Op8_11,
3809                                                sme_elm_idx0_7, ZZ_h_mul_r,
3810                                                "mova">;
3811  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3812                                                MatrixOp32,
3813                                                MatrixIndexGPR32Op8_11,
3814                                                sme_elm_idx0_7, ZZ_s_mul_r,
3815                                                "mova">;
3816  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3817                                                MatrixOp64,
3818                                                MatrixIndexGPR32Op8_11,
3819                                                sme_elm_idx0_7, ZZ_d_mul_r,
3820                                                "mova">;
3821
3822  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3823                                                MatrixOp8,
3824                                                MatrixIndexGPR32Op8_11,
3825                                                sme_elm_idx0_7, ZZ_b_mul_r,
3826                                                "mova", "vgx2">;
3827  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3828                                                MatrixOp16,
3829                                                MatrixIndexGPR32Op8_11,
3830                                                sme_elm_idx0_7, ZZ_h_mul_r,
3831                                                "mova", "vgx2">;
3832  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3833                                                MatrixOp32,
3834                                                MatrixIndexGPR32Op8_11,
3835                                                sme_elm_idx0_7, ZZ_s_mul_r,
3836                                                "mova", "vgx2">;
3837
3838  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3839                                                MatrixOp8,
3840                                                MatrixIndexGPR32Op8_11,
3841                                                sme_elm_idx0_7, ZZ_b_mul_r,
3842                                                "mov">;
3843  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3844                                                MatrixOp16,
3845                                                MatrixIndexGPR32Op8_11,
3846                                                sme_elm_idx0_7, ZZ_h_mul_r,
3847                                                "mov">;
3848  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3849                                                MatrixOp32,
3850                                                MatrixIndexGPR32Op8_11,
3851                                                sme_elm_idx0_7, ZZ_s_mul_r,
3852                                                "mov">;
3853  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3854                                                MatrixOp64,
3855                                                MatrixIndexGPR32Op8_11,
3856                                                sme_elm_idx0_7, ZZ_d_mul_r,
3857                                                "mov">;
3858
3859  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3860                                                MatrixOp8,
3861                                                MatrixIndexGPR32Op8_11,
3862                                                sme_elm_idx0_7, ZZ_b_mul_r,
3863                                                "mov", "vgx2">;
3864  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3865                                                MatrixOp16,
3866                                                MatrixIndexGPR32Op8_11,
3867                                                sme_elm_idx0_7, ZZ_h_mul_r,
3868                                                "mov", "vgx2">;
3869  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3870                                                MatrixOp32,
3871                                                MatrixIndexGPR32Op8_11,
3872                                                sme_elm_idx0_7, ZZ_s_mul_r,
3873                                                "mov", "vgx2">;
3874  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
3875                                                MatrixOp64,
3876                                                MatrixIndexGPR32Op8_11,
3877                                                sme_elm_idx0_7, ZZ_d_mul_r,
3878                                                "mov", "vgx2">;
3879}
3880
3881// MOVA (vector to array, four registers)
3882multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> {
3883  def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64,
3884                                               ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
3885    bits<3> Zn;
3886    let Inst{9-7} = Zn;
3887  }
3888
3889  def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
3890
3891  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8,  sme_elm_idx0_7, tileslice16>;
3892  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16,  sme_elm_idx0_7, tileslice16>;
3893  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16,  sme_elm_idx0_7, tileslice16>;
3894  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>;
3895  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32,  sme_elm_idx0_7, tileslice16>;
3896  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32,  sme_elm_idx0_7, tileslice16>;
3897  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64,  sme_elm_idx0_7, tileslice16>;
3898  def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64,  sme_elm_idx0_7, tileslice16>;
3899
3900  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3901                                                MatrixOp8,
3902                                                MatrixIndexGPR32Op8_11,
3903                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
3904                                                "mova">;
3905  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3906                                                MatrixOp16,
3907                                                MatrixIndexGPR32Op8_11,
3908                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
3909                                                "mova">;
3910  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3911                                                MatrixOp32,
3912                                                MatrixIndexGPR32Op8_11,
3913                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
3914                                                "mova">;
3915  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3916                                                MatrixOp64,
3917                                                MatrixIndexGPR32Op8_11,
3918                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
3919                                                "mova">;
3920
3921  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3922                                                MatrixOp8,
3923                                                MatrixIndexGPR32Op8_11,
3924                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
3925                                                "mova", "vgx4">;
3926  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3927                                                MatrixOp16,
3928                                                MatrixIndexGPR32Op8_11,
3929                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
3930                                                "mova", "vgx4">;
3931  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3932                                                MatrixOp32,
3933                                                MatrixIndexGPR32Op8_11,
3934                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
3935                                                "mova", "vgx4">;
3936
3937  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3938                                                MatrixOp8,
3939                                                MatrixIndexGPR32Op8_11,
3940                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
3941                                                "mov">;
3942  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3943                                                MatrixOp16,
3944                                                MatrixIndexGPR32Op8_11,
3945                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
3946                                                "mov">;
3947  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3948                                                MatrixOp32,
3949                                                MatrixIndexGPR32Op8_11,
3950                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
3951                                                "mov">;
3952  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3953                                                MatrixOp64,
3954                                                MatrixIndexGPR32Op8_11,
3955                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
3956                                                "mov">;
3957
3958  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3959                                                MatrixOp8,
3960                                                MatrixIndexGPR32Op8_11,
3961                                                sme_elm_idx0_7, ZZZZ_b_mul_r,
3962                                                "mov", "vgx4">;
3963  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3964                                                MatrixOp16,
3965                                                MatrixIndexGPR32Op8_11,
3966                                                sme_elm_idx0_7, ZZZZ_h_mul_r,
3967                                                "mov", "vgx4">;
3968  defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
3969                                                MatrixOp32,
3970                                                MatrixIndexGPR32Op8_11,
3971                                                sme_elm_idx0_7, ZZZZ_s_mul_r,
3972                                                "mov", "vgx4">;
3973  defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME),
3974                                                MatrixOp64,
3975                                                MatrixIndexGPR32Op8_11,
3976                                                sme_elm_idx0_7, ZZZZ_d_mul_r,
3977                                                "mov", "vgx4">;
3978
3979}
3980
3981class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op,
3982                                           RegisterOperand vector_ty,
3983                                           RegisterOperand tile_ty,
3984                                           Operand index_ty,
3985                                           string mnemonic>
3986   : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
3987       (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
3988       mnemonic,
3989       "\t$Zd, $ZAn[$Rs, $imm]",
3990       "", []>, Sched<[]> {
3991  bits<4> Zd;
3992  bits<2> Rs;
3993  let Inst{31-24} = 0b11000000;
3994  let Inst{23-22} = sz;
3995  let Inst{21-16} = 0b000110;
3996  let Inst{15}    = v;
3997  let Inst{14-13} = Rs;
3998  let Inst{12-11} = 0b00;
3999  let Inst{10-8}  = op;
4000  let Inst{4-1}   = Zd;
4001  let Inst{0}     = 0b0;
4002
4003  let Constraints = !if(op{1}, "$ZAn = $_ZAn", "");
4004}
4005
4006multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst,
4007                                                  RegisterOperand vector_ty,
4008                                                  RegisterOperand tile_or_array_ty,
4009                                                  RegisterOperand rv_ty,
4010                                                  Operand index_ty,
4011                                                  string mnemonic,
4012                                                  string vg_acronym=""> {
4013def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
4014                  (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>;
4015
4016}
4017
4018multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> {
4019
4020  def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r,
4021                                                !if(v, TileVectorOpV8,
4022                                                       TileVectorOpH8),
4023                                                 uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4024    bits<3> imm;
4025    let Inst{7-5} = imm;
4026  }
4027
4028  def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r,
4029                                                !if(v, TileVectorOpV16,
4030                                                       TileVectorOpH16),
4031                                                 uimm2s2range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4032    bits<1> ZAn;
4033    bits<2> imm;
4034    let Inst{7}   = ZAn;
4035    let Inst{6-5} = imm;
4036  }
4037
4038  def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r,
4039                                                !if(v, TileVectorOpV32,
4040                                                       TileVectorOpH32),
4041                                                 uimm1s2range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4042    bits<2> ZAn;
4043    bits<1> imm;
4044    let Inst{7-6} = ZAn;
4045    let Inst{5}   = imm;
4046  }
4047
4048  def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r,
4049                                                !if(v, TileVectorOpV64,
4050                                                       TileVectorOpH64),
4051                                                uimm0s2range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4052    bits<3> ZAn;
4053    let Inst{7-5} = ZAn;
4054  }
4055
4056  if !eq(mnemonic, "mova") then {
4057  defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B),
4058                                                ZZ_b_mul_r,
4059                                               !if(v, TileVectorOpV8,
4060                                                      TileVectorOpH8),
4061                                                MatrixIndexGPR32Op12_15,
4062                                                uimm3s2range, "mov">;
4063  defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H),
4064                                                ZZ_h_mul_r,
4065                                                !if(v, TileVectorOpV16,
4066                                                       TileVectorOpH16),
4067                                                MatrixIndexGPR32Op12_15,
4068                                                uimm2s2range, "mov">;
4069  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
4070                                                ZZ_s_mul_r,
4071                                                !if(v, TileVectorOpV32,
4072                                                       TileVectorOpH32),
4073                                                MatrixIndexGPR32Op12_15,
4074                                                uimm1s2range, "mov">;
4075  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
4076                                                ZZ_d_mul_r,
4077                                                !if(v, TileVectorOpV64,
4078                                                       TileVectorOpH64),
4079                                                MatrixIndexGPR32Op12_15,
4080                                                uimm0s2range, "mov">;
4081  }
4082
4083  defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B),
4084                                                ZZ_b_mul_r,
4085                                               !if(v, TileVectorOpV8,
4086                                                      TileVectorOpH8),
4087                                                MatrixIndexGPR32Op12_15,
4088                                                uimm3s2range, mnemonic>;
4089  defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H),
4090                                                ZZ_h_mul_r,
4091                                                !if(v, TileVectorOpV16,
4092                                                       TileVectorOpH16),
4093                                                MatrixIndexGPR32Op12_15,
4094                                                uimm2s2range, mnemonic>;
4095  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4096                                                ZZ_s_mul_r,
4097                                                !if(v, TileVectorOpV32,
4098                                                       TileVectorOpH32),
4099                                                MatrixIndexGPR32Op12_15,
4100                                                uimm1s2range, mnemonic>;
4101  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4102                                                ZZ_d_mul_r,
4103                                                !if(v, TileVectorOpV64,
4104                                                       TileVectorOpH64),
4105                                                MatrixIndexGPR32Op12_15,
4106                                                uimm0s2range, mnemonic>;
4107
4108}
4109
4110// SME2 move tile to vector, two registers
4111multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{
4112 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>;
4113 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>;
4114}
4115
4116
4117// SME2p1 move tile to vector and zero tile, two registers
4118multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{
4119 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>;
4120 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>;
4121
4122
4123 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4124 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4125 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4126 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4127
4128 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
4129 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
4130 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
4131 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
4132}
4133
4134class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op,
4135                                           RegisterOperand vector_ty,
4136                                           RegisterOperand tile_ty,
4137                                           Operand index_ty,
4138                                           string mnemonic>
4139   : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)),
4140       (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4141       mnemonic,
4142       "\t$Zd, $ZAn[$Rs, $imm]",
4143       "", []>, Sched<[]> {
4144  bits<3> Zd;
4145  bits<2> Rs;
4146  let Inst{31-24} = 0b11000000;
4147  let Inst{23-22} = sz;
4148  let Inst{21-16} = 0b000110;
4149  let Inst{15}    = v;
4150  let Inst{14-13} = Rs;
4151  let Inst{12-11} = 0b00;
4152  let Inst{10-5}  = op{5-0};
4153  let Inst{4-2}   = Zd;
4154  let Inst{1-0}   = 0b00;
4155
4156  let Constraints = !if(op{4}, "$ZAn = $_ZAn", "");
4157}
4158
4159multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> {
4160
4161  def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?},
4162                                                ZZZZ_b_mul_r,
4163                                                !if(v, TileVectorOpV8,
4164                                                       TileVectorOpH8),
4165                                                uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4166    bits<2> imm;
4167    let Inst{6-5} = imm;
4168  }
4169
4170  def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?},
4171                                                ZZZZ_h_mul_r,
4172                                                !if(v, TileVectorOpV16,
4173                                                       TileVectorOpH16),
4174                                                uimm1s4range, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4175    bits<1> ZAn;
4176    bits<1> imm;
4177    let Inst{6}   = ZAn;
4178    let Inst{5}   = imm;
4179  }
4180
4181  def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?},
4182                                                ZZZZ_s_mul_r,
4183                                                !if(v, TileVectorOpV32,
4184                                                       TileVectorOpH32),
4185                                                 uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4186    bits<2> ZAn;
4187    let Inst{6-5} = ZAn;
4188  }
4189
4190  def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?},
4191                                                ZZZZ_d_mul_r,
4192                                                !if(v, TileVectorOpV64,
4193                                                       TileVectorOpH64),
4194                                                uimm0s4range, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4195    bits<3> ZAn;
4196    let Inst{7-5} = ZAn;
4197  }
4198
4199  if !eq(mnemonic, "mova") then {
4200  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B),
4201                                                ZZZZ_b_mul_r,
4202                                                !if(v, TileVectorOpV8,
4203                                                      TileVectorOpH8),
4204                                                MatrixIndexGPR32Op12_15,
4205                                                uimm2s4range, "mov">;
4206  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H),
4207                                                ZZZZ_h_mul_r,
4208                                                !if(v, TileVectorOpV16,
4209                                                       TileVectorOpH16),
4210                                                MatrixIndexGPR32Op12_15,
4211                                                uimm1s4range, "mov">;
4212  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S),
4213                                                ZZZZ_s_mul_r,
4214                                                !if(v, TileVectorOpV32,
4215                                                      TileVectorOpH32),
4216                                                MatrixIndexGPR32Op12_15,
4217                                                uimm0s4range, "mov">;
4218  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D),
4219                                                ZZZZ_d_mul_r,
4220                                                !if(v, TileVectorOpV64,
4221                                                       TileVectorOpH64),
4222                                                MatrixIndexGPR32Op12_15,
4223                                                uimm0s4range, "mov">;
4224  }
4225
4226  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B),
4227                                                ZZZZ_b_mul_r,
4228                                                !if(v, TileVectorOpV8,
4229                                                       TileVectorOpH8),
4230                                                MatrixIndexGPR32Op12_15,
4231                                                uimm2s4range, mnemonic>;
4232  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H),
4233                                                ZZZZ_h_mul_r,
4234                                                !if(v, TileVectorOpV16,
4235                                                       TileVectorOpH16),
4236                                                MatrixIndexGPR32Op12_15,
4237                                                uimm1s4range, mnemonic>;
4238  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S),
4239                                                ZZZZ_s_mul_r,
4240                                                !if(v, TileVectorOpV32,
4241                                                      TileVectorOpH32),
4242                                                MatrixIndexGPR32Op12_15,
4243                                                uimm0s4range, mnemonic>;
4244  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D),
4245                                                ZZZZ_d_mul_r,
4246                                                !if(v, TileVectorOpV64,
4247                                                       TileVectorOpH64),
4248                                                MatrixIndexGPR32Op12_15,
4249                                                uimm0s4range, mnemonic>;
4250
4251}
4252
4253// SME2 move tile to vector, four registers
4254multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{
4255 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>;
4256 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>;
4257}
4258
4259// SME2p1 move tile to vector and zero tile, four registers
4260multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{
4261 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>;
4262 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>;
4263
4264 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4265 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4266 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4267 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4268
4269 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
4270 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
4271 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
4272 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
4273}
4274
4275
4276class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty,
4277                                        RegisterOperand array_ty,
4278                                        string mnemonic, string vg_acronym>
4279   : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)),
4280       (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm),
4281       mnemonic,
4282       "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]",
4283       "", []>, Sched<[]> {
4284  bits<2> Rs;
4285  bits<3> imm;
4286  let Inst{31-15} = 0b11000000000001100;
4287  let Inst{14-13} = Rs;
4288  let Inst{12-11} = 0b01;
4289  let Inst{10-8}  = op{3-1};
4290  let Inst{7-5}   = imm;
4291  let Inst{1}     = op{0};
4292  let Inst{0}     = 0b0;
4293  let Constraints = !if(op{2}, "$ZAn = $_ZAn", "");
4294}
4295
4296// move array to vector, two registers.
4297multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
4298  def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64,
4299                                               mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>{
4300    bits<4> Zd;
4301    let Inst{4-1} = Zd;
4302  }
4303
4304  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4305                                                ZZ_b_mul_r, MatrixOp8,
4306                                                MatrixIndexGPR32Op8_11,
4307                                                sme_elm_idx0_7, mnemonic>;
4308  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4309                                                ZZ_h_mul_r, MatrixOp16,
4310                                                MatrixIndexGPR32Op8_11,
4311                                                sme_elm_idx0_7, mnemonic>;
4312  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4313                                                ZZ_s_mul_r, MatrixOp32,
4314                                                MatrixIndexGPR32Op8_11,
4315                                                sme_elm_idx0_7, mnemonic>;
4316  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4317                                                ZZ_d_mul_r,  MatrixOp64,
4318                                                MatrixIndexGPR32Op8_11,
4319                                                sme_elm_idx0_7, mnemonic>;
4320
4321  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4322                                                ZZ_b_mul_r, MatrixOp8,
4323                                                MatrixIndexGPR32Op8_11,
4324                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4325  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4326                                                ZZ_h_mul_r, MatrixOp16,
4327                                                MatrixIndexGPR32Op8_11,
4328                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4329  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4330                                                ZZ_s_mul_r, MatrixOp32,
4331                                                MatrixIndexGPR32Op8_11,
4332                                                sme_elm_idx0_7, mnemonic, "vgx2">;
4333
4334  if !eq(mnemonic, "mova") then {
4335  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4336                                                ZZ_b_mul_r, MatrixOp8,
4337                                                MatrixIndexGPR32Op8_11,
4338                                                sme_elm_idx0_7, "mov">;
4339  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4340                                                ZZ_h_mul_r, MatrixOp16,
4341                                                MatrixIndexGPR32Op8_11,
4342                                                sme_elm_idx0_7, "mov">;
4343  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4344                                                ZZ_s_mul_r, MatrixOp32,
4345                                                MatrixIndexGPR32Op8_11,
4346                                                sme_elm_idx0_7, "mov">;
4347  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4348                                                ZZ_d_mul_r,  MatrixOp64,
4349                                                MatrixIndexGPR32Op8_11,
4350                                                sme_elm_idx0_7, "mov">;
4351
4352  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4353                                                ZZ_b_mul_r, MatrixOp8,
4354                                                MatrixIndexGPR32Op8_11,
4355                                                sme_elm_idx0_7, "mov", "vgx2">;
4356  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4357                                                ZZ_h_mul_r, MatrixOp16,
4358                                                MatrixIndexGPR32Op8_11,
4359                                                sme_elm_idx0_7, "mov", "vgx2">;
4360  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4361                                                ZZ_s_mul_r, MatrixOp32,
4362                                                MatrixIndexGPR32Op8_11,
4363                                                sme_elm_idx0_7, "mov", "vgx2">;
4364  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4365                                                ZZ_d_mul_r,  MatrixOp64,
4366                                                MatrixIndexGPR32Op8_11,
4367                                                sme_elm_idx0_7, "mov", "vgx2">;
4368  }
4369}
4370
4371multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
4372  defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
4373  def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
4374}
4375
4376// move array to vector, four registers
4377multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
4378  def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64,
4379                                               mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
4380    bits<3> Zd;
4381    let Inst{4-2} = Zd;
4382  }
4383
4384  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4385                                                ZZZZ_b_mul_r, MatrixOp8,
4386                                                MatrixIndexGPR32Op8_11,
4387                                                sme_elm_idx0_7, mnemonic>;
4388  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4389                                                ZZZZ_h_mul_r, MatrixOp16,
4390                                                MatrixIndexGPR32Op8_11,
4391                                                sme_elm_idx0_7, mnemonic>;
4392  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4393                                                ZZZZ_s_mul_r, MatrixOp32,
4394                                                MatrixIndexGPR32Op8_11,
4395                                                sme_elm_idx0_7, mnemonic>;
4396  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4397                                                ZZZZ_d_mul_r, MatrixOp64,
4398                                                MatrixIndexGPR32Op8_11,
4399                                                sme_elm_idx0_7, mnemonic>;
4400
4401  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4402                                                ZZZZ_b_mul_r, MatrixOp8,
4403                                                MatrixIndexGPR32Op8_11,
4404                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4405  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4406                                                ZZZZ_h_mul_r, MatrixOp16,
4407                                                MatrixIndexGPR32Op8_11,
4408                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4409  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4410                                                ZZZZ_s_mul_r, MatrixOp32,
4411                                                MatrixIndexGPR32Op8_11,
4412                                                sme_elm_idx0_7, mnemonic, "vgx4">;
4413
4414  if !eq(mnemonic, "mova") then {
4415  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4416                                                ZZZZ_b_mul_r, MatrixOp8,
4417                                                MatrixIndexGPR32Op8_11,
4418                                                sme_elm_idx0_7, "mov">;
4419  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4420                                                ZZZZ_h_mul_r, MatrixOp16,
4421                                                MatrixIndexGPR32Op8_11,
4422                                                sme_elm_idx0_7, "mov">;
4423  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4424                                                ZZZZ_s_mul_r, MatrixOp32,
4425                                                MatrixIndexGPR32Op8_11,
4426                                                sme_elm_idx0_7, "mov">;
4427  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4428                                                ZZZZ_d_mul_r, MatrixOp64,
4429                                                MatrixIndexGPR32Op8_11,
4430                                                sme_elm_idx0_7, "mov">;
4431
4432  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4433                                                ZZZZ_b_mul_r, MatrixOp8,
4434                                                MatrixIndexGPR32Op8_11,
4435                                                sme_elm_idx0_7, "mov", "vgx4">;
4436  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4437                                                ZZZZ_h_mul_r, MatrixOp16,
4438                                                MatrixIndexGPR32Op8_11,
4439                                                sme_elm_idx0_7, "mov", "vgx4">;
4440  defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME),
4441                                                ZZZZ_s_mul_r, MatrixOp32,
4442                                                MatrixIndexGPR32Op8_11,
4443                                                sme_elm_idx0_7, "mov", "vgx4">;
4444  defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME),
4445                                                ZZZZ_d_mul_r, MatrixOp64,
4446                                                MatrixIndexGPR32Op8_11,
4447                                                sme_elm_idx0_7, "mov", "vgx4">;
4448  }
4449}
4450
4451multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
4452  defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
4453  def NAME # _PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
4454}
4455
4456//===----------------------------------------------------------------------===//
4457// SME2 multi-vec saturating shift right narrow
4458class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
4459    : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
4460        mnemonic, "\t$Zd, $Zn, $imm4",
4461        "", []>, Sched<[]> {
4462  bits<4> imm4;
4463  bits<4> Zn;
4464  bits<5> Zd;
4465  let Inst{31-21} = 0b11000001111;
4466  let Inst{20}    = op;
4467  let Inst{19-16} = imm4;
4468  let Inst{15-10} = 0b110101;
4469  let Inst{9-6}   = Zn;
4470  let Inst{5}     = u;
4471  let Inst{4-0}   = Zd;
4472}
4473
4474multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
4475  def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
4476
4477  def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
4478}
4479
4480class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
4481                                RegisterOperand vector_ty, Operand imm_ty,
4482                                string mnemonic>
4483    : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm),
4484        mnemonic, "\t$Zd, $Zn, $imm",
4485        "", []>, Sched<[]> {
4486  bits<3> Zn;
4487  bits<5> Zd;
4488  let Inst{31-24} = 0b11000001;
4489  let Inst{23-22} = sz;
4490  let Inst{21}    = 0b1;
4491  //  Inst{20-16} = imm5;
4492  let Inst{15-11} = 0b11011;
4493  let Inst{10}    = op{2};
4494  let Inst{9-7}   = Zn;
4495  let Inst{6-5}   = op{1-0};
4496  let Inst{4-0}   = Zd;
4497}
4498
4499multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
4500  def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
4501                                     mnemonic>{
4502    bits<5> imm;
4503    let Inst{20-16} = imm;
4504  }
4505  def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
4506                                      mnemonic> {
4507    bits<6> imm;
4508    let Inst{22}    = imm{5};
4509    let Inst{20-16} = imm{4-0};
4510  }
4511
4512  def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
4513  def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
4514}
4515
4516//===----------------------------------------------------------------------===//
4517// SME2 Multi-vector - SVE Select
4518class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty,
4519                           string mnemonic>
4520    : I<(outs vector_ty:$Zd),
4521        (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm),
4522        mnemonic, "\t$Zd, $PNg, $Zn, $Zm",
4523        "", []>, Sched<[]> {
4524  bits<3> PNg;
4525  let Inst{31-24} = 0b11000001;
4526  let Inst{23-22} = sz;
4527  let Inst{21}    = 0b1;
4528  let Inst{17-16} = op{3-2};
4529  let Inst{15-13} = 0b100;
4530  let Inst{12-10} = PNg;
4531  let Inst{6}     = op{1};
4532  let Inst{5}     = 0b0;
4533  let Inst{1}     = op{0};
4534  let Inst{0}     = 0b0;
4535}
4536
4537class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty,
4538                          string mnemonic>
4539     : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> {
4540  bits<4> Zm;
4541  bits<4> Zn;
4542  bits<4> Zd;
4543  let Inst{20-17} = Zm;
4544  let Inst{9-6}   = Zn;
4545  let Inst{4-1}   = Zd;
4546}
4547
4548multiclass sme2_sel_vector_vg2<string mnemonic>{
4549  def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>;
4550  def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>;
4551  def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>;
4552  def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>;
4553}
4554class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty,
4555                          string mnemonic>
4556     : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> {
4557  bits<3> Zm;
4558  bits<3> Zn;
4559  bits<3> Zd;
4560  let Inst{20-18} = Zm;
4561  let Inst{9-7}   = Zn;
4562  let Inst{4-2}   = Zd;
4563}
4564multiclass sme2_sel_vector_vg4<string mnemonic> {
4565  def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>;
4566  def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>;
4567  def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>;
4568  def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>;
4569}
4570
4571//===----------------------------------------------------------------------===//
4572// Non contiguous Load and Store
4573
4574class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4575                                             RegisterOperand multi_vector_ty,
4576                                             RegisterOperand gpr_ty,
4577                                             string mnemonic>
4578   : I<(outs multi_vector_ty:$Zt),
4579       (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4580       mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4581       "", []>, Sched<[]> {
4582   bits<5> Rm;
4583   bits<3> PNg;
4584   bits<5> Rn;
4585   bits<4> Zt;
4586   let Inst{31-21} = 0b10100001000;
4587   let Inst{20-16} = Rm;
4588   let Inst{15}    = 0b0;
4589   let Inst{14-13} = msz;
4590   let Inst{12-10} = PNg;
4591   let Inst{9-5}   = Rn;
4592   let Inst{4}     = Zt{3};
4593   let Inst{3}     = n;
4594   let Inst{2-0}   = Zt{2-0};
4595
4596   let mayLoad = 1;
4597}
4598
4599class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
4600                                             RegisterOperand multi_vector_ty,
4601                                             RegisterOperand gpr_ty,
4602                                             string mnemonic>
4603   : I<(outs multi_vector_ty:$Zt),
4604       (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4605       mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]",
4606       "", []>, Sched<[]> {
4607   bits<5> Rm;
4608   bits<3> PNg;
4609   bits<5> Rn;
4610   bits<3> Zt;
4611   let Inst{31-21} = 0b10100001000;
4612   let Inst{20-16} = Rm;
4613   let Inst{15}    = 0b1;
4614   let Inst{14-13} = msz;
4615   let Inst{12-10} = PNg;
4616   let Inst{9-5}   = Rn;
4617   let Inst{4}     = Zt{2};
4618   let Inst{3}     = n;
4619   let Inst{2}     = 0b0;
4620   let Inst{1-0}   = Zt{1-0};
4621
4622   let mayLoad = 1;
4623}
4624
4625class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
4626                                                 RegisterOperand multi_vector_ty,
4627                                                 Operand index_ty,
4628                                                 string mnemonic>
4629    : I<(outs multi_vector_ty:$Zt),
4630        (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
4631        mnemonic,  "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]",
4632        "", []>, Sched<[]> {
4633   bits<4> imm4;
4634   bits<3> PNg;
4635   bits<5> Rn;
4636   let Inst{31-20} = 0b101000010100;
4637   let Inst{19-16} = imm4;
4638   let Inst{15}    = op{1};
4639   let Inst{14-13} = msz;
4640   let Inst{12-10} = PNg;
4641   let Inst{9-5}   = Rn;
4642   let Inst{3}     = n;
4643   let Inst{2}     = op{0};
4644
4645   let mayLoad = 1;
4646}
4647
4648multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
4649                                                     RegisterOperand multi_vector_ty,
4650                                                     Operand index_ty,
4651                                                     string mnemonic>{
4652  def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
4653                                                        multi_vector_ty,
4654                                                        index_ty, mnemonic> {
4655    bits<4> Zt;
4656    let Inst{4} = Zt{3};
4657    let Inst{2-0} = Zt{2-0};
4658  }
4659
4660   def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4661                  (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4662}
4663
4664multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
4665                                                     RegisterOperand multi_vector_ty,
4666                                                     Operand index_ty,
4667                                                     string mnemonic> {
4668  def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
4669                                                        multi_vector_ty,
4670                                                        index_ty, mnemonic> {
4671    bits<3> Zt;
4672    let Inst{4} = Zt{2};
4673    let Inst{1-0} = Zt{1-0};
4674  }
4675
4676   def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]",
4677                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>;
4678}
4679
4680//===----------------------------------------------------------------------===//
4681// SME2 Non-Contiguous Store
4682class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n,
4683                                             RegisterOperand multi_vector_ty,
4684                                             RegisterOperand gpr_ty,
4685                                             string mnemonic>
4686   : I<(outs ),
4687       (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4688       mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
4689       "", []>, Sched<[]> {
4690   bits<5> Rm;
4691   bits<3> PNg;
4692   bits<5> Rn;
4693   bits<4> Zt;
4694   let Inst{31-21} = 0b10100001001;
4695   let Inst{20-16} = Rm;
4696   let Inst{15}    = 0b0;
4697   let Inst{14-13} = msz;
4698   let Inst{12-10} = PNg;
4699   let Inst{9-5}   = Rn;
4700   let Inst{4}     = Zt{3};
4701   let Inst{3}     = n;
4702   let Inst{2-0}   = Zt{2-0};
4703
4704   let mayStore    = 1;
4705}
4706
4707class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n,
4708                                             RegisterOperand multi_vector_ty,
4709                                             RegisterOperand gpr_ty,
4710                                             string mnemonic>
4711   : I<(outs ),
4712       (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm),
4713       mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]",
4714       "", []>, Sched<[]> {
4715   bits<5> Rm;
4716   bits<3> PNg;
4717   bits<5> Rn;
4718   bits<3> Zt;
4719   let Inst{31-21} = 0b10100001001;
4720   let Inst{20-16} = Rm;
4721   let Inst{15}     = 0b1;
4722   let Inst{14-13} = msz;
4723   let Inst{12-10} = PNg;
4724   let Inst{9-5}   = Rn;
4725   let Inst{4}     = Zt{2};
4726   let Inst{3}     = n;
4727   let Inst{2}     = 0b0;
4728   let Inst{1-0}   = Zt{1-0};
4729
4730   let mayStore    = 1;
4731}
4732
4733class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op,
4734                                                 RegisterOperand multi_vector_ty,
4735                                                 Operand index_ty,
4736                                                 string mnemonic>
4737    : I<(outs ),
4738        (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4),
4739        mnemonic,  "\t$Zt, $PNg, [$Rn, $imm4, mul vl]",
4740        "", []>, Sched<[]> {
4741   bits<4> imm4;
4742   bits<3> PNg;
4743   bits<5> Rn;
4744   let Inst{31-20} = 0b101000010110;
4745   let Inst{19-16} = imm4;
4746   let Inst{15}    = op{1};
4747   let Inst{14-13} = msz;
4748   let Inst{12-10} = PNg;
4749   let Inst{9-5}   = Rn;
4750   let Inst{3}     = n;
4751   let Inst{2}     = op{0};
4752
4753   let mayStore    = 1;
4754}
4755
4756
4757multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n,
4758                                                     RegisterOperand multi_vector_ty,
4759                                                     Operand index_ty,
4760                                                     string mnemonic> {
4761  def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?},
4762                                                       multi_vector_ty,
4763                                                       index_ty, mnemonic> {
4764    bits<4> Zt;
4765    let Inst{4}   = Zt{3};
4766    let Inst{2-0} = Zt{2-0};
4767  }
4768
4769    def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
4770                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
4771}
4772
4773multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n,
4774                                                     RegisterOperand multi_vector_ty,
4775                                                     Operand index_ty,
4776                                                     string mnemonic> {
4777  def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10,
4778                                                        multi_vector_ty,
4779                                                        index_ty, mnemonic> {
4780    bits<3> Zt;
4781    let Inst{4}   = Zt{2};
4782    let Inst{1-0} = Zt{1-0};
4783  }
4784
4785    def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]",
4786                   (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>;
4787}
4788
4789//===----------------------------------------------------------------------===//
4790// SME2.1
4791//===----------------------------------------------------------------------===//
4792// SME zeroing move array to vector
4793class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty,
4794                                    RegisterOperand tile_ty, Operand index_ty,
4795                                    string mnemonic>
4796    : I<(outs vector_ty:$Zd, tile_ty:$ZAn),
4797        (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm),
4798        mnemonic, "\t$Zd, $ZAn[$Rs, $imm]",
4799        "", []>, Sched<[]> {
4800  bits<2> Rs;
4801  bits<5> Zd;
4802  let Inst{31-24} = 0b11000000;
4803  let Inst{23-22} = sz;
4804  let Inst{21-17} = 0b00001;
4805  let Inst{16}    = q;
4806  let Inst{15}    = v;
4807  let Inst{14-13} = Rs;
4808  let Inst{12-9}  = 0b0001;
4809  let Inst{4-0}   = Zd;
4810  let Constraints = "$ZAn = $_ZAn";
4811}
4812
4813multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> {
4814  def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8,
4815                                    !if(v, TileVectorOpV8, TileVectorOpH8),
4816                                    sme_elm_idx0_15, mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
4817    bits<4> imm;
4818    let Inst{8-5} = imm;
4819  }
4820
4821  def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16,
4822                                    !if(v, TileVectorOpV16, TileVectorOpH16),
4823                                    sme_elm_idx0_7, mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
4824    bits<1> ZAn;
4825    bits<3> imm;
4826    let Inst{8}   = ZAn;
4827    let Inst{7-5} = imm;
4828  }
4829
4830  def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32,
4831                                    !if(v, TileVectorOpV32, TileVectorOpH32),
4832                                    sme_elm_idx0_3, mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
4833    bits<2> ZAn;
4834    bits<2> imm;
4835    let Inst{8-7} = ZAn;
4836    let Inst{6-5} = imm;
4837  }
4838
4839  def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64,
4840                                    !if(v, TileVectorOpV64, TileVectorOpH64),
4841                                    sme_elm_idx0_1, mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
4842    bits<3> ZAn;
4843    bits<1> imm;
4844    let Inst{8-6} = ZAn;
4845    let Inst{5}   = imm;
4846  }
4847
4848  def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128,
4849                                    !if(v, TileVectorOpV128, TileVectorOpH128),
4850                                    sme_elm_idx0_0, mnemonic>, SMEPseudo2Instr<NAME # _Q, 1> {
4851    bits<4> ZAn;
4852    let Inst{8-5} = ZAn;
4853  }
4854}
4855
4856multiclass sme2p1_movaz_tile_to_vec<string mnemonic, SDPatternOperator intrinsic_horiz, SDPatternOperator intrinsic_vert,
4857                                    SDPatternOperator intrinsic_horiz_q, SDPatternOperator intrinsic_vert_q>{
4858 defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>;
4859 defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>;
4860
4861 def NAME # _H_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_B, sme_elm_idx0_0,  sme_elm_idx0_15, ZPR8,   SMEMatrixTileB>;
4862 def NAME # _H_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_H, sme_elm_idx0_1,  sme_elm_idx0_7,  ZPR16,  SMEMatrixTileH>;
4863 def NAME # _H_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_S, sme_elm_idx0_3,  sme_elm_idx0_3,  ZPR32,  SMEMatrixTileS>;
4864 def NAME # _H_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_D, sme_elm_idx0_7,  sme_elm_idx0_1,  ZPR64,  SMEMatrixTileD>;
4865 def NAME # _H_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _H_Q, sme_elm_idx0_15, sme_elm_idx0_0,  ZPR128, SMEMatrixTileQ>;
4866
4867 def NAME # _V_B_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_B, sme_elm_idx0_0, sme_elm_idx0_15, ZPR8, SMEMatrixTileB>;
4868 def NAME # _V_H_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_H, sme_elm_idx0_1, sme_elm_idx0_7, ZPR16, SMEMatrixTileH>;
4869 def NAME # _V_S_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_S, sme_elm_idx0_3, sme_elm_idx0_3, ZPR32, SMEMatrixTileS>;
4870 def NAME # _V_D_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_D, sme_elm_idx0_7, sme_elm_idx0_1, ZPR64, SMEMatrixTileD>;
4871 def NAME # _V_Q_PSEUDO : sme2_movez_to_tile_pseudo<NAME # _V_Q, sme_elm_idx0_15, sme_elm_idx0_0, ZPR128, SMEMatrixTileQ>;
4872
4873 def : SME2_Tile_Movaz_Pat<NAME # _H_B, intrinsic_horiz, nxv16i8,sme_elm_idx0_0,  sme_elm_idx0_15, tileslice8>;
4874 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4875 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4876 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4877 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4878 def : SME2_Tile_Movaz_Pat<NAME # _H_H, intrinsic_horiz, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4879 def : SME2_Tile_Movaz_Pat<NAME # _H_S, intrinsic_horiz, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4880 def : SME2_Tile_Movaz_Pat<NAME # _H_D, intrinsic_horiz, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4881
4882 def : SME2_Tile_Movaz_Pat<NAME # _V_B, intrinsic_vert, nxv16i8, sme_elm_idx0_0, sme_elm_idx0_15, tileslice8>;
4883 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8i16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4884 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4i32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4885 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2i64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4886 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8bf16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4887 def : SME2_Tile_Movaz_Pat<NAME # _V_H, intrinsic_vert, nxv8f16, sme_elm_idx0_1, sme_elm_idx0_7, tileslice16>;
4888 def : SME2_Tile_Movaz_Pat<NAME # _V_S, intrinsic_vert, nxv4f32, sme_elm_idx0_3, sme_elm_idx0_3, tileslice32>;
4889 def : SME2_Tile_Movaz_Pat<NAME # _V_D, intrinsic_vert, nxv2f64, sme_elm_idx0_7, sme_elm_idx0_1, tileslice64>;
4890
4891 // H_Q
4892 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4893 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4894 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4895 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4896 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4897 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4898 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4899 def : SME2_Tile_Movaz_Pat<NAME # _H_Q, intrinsic_horiz_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4900
4901 // _V_Q
4902 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv16i8, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4903 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8i16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4904 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4i32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4905 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2i64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4906 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8bf16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4907 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv8f16, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4908 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv4f32, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4909 def : SME2_Tile_Movaz_Pat<NAME # _V_Q, intrinsic_vert_q, nxv2f64, sme_elm_idx0_15, sme_elm_idx0_0, tileslice128>;
4910}
4911
4912//===----------------------------------------------------------------------===//
4913// SME2.1 multiple vectors zero array
4914
4915class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic,
4916                         string vg_acronym="">
4917    : I<(outs MatrixOp64:$ZAd),
4918        (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm),
4919        mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]",
4920        "", []>, Sched<[]> {
4921  bits <2> Rv;
4922  let Inst{31-18} = 0b11000000000011;
4923  let Inst{17-15} = opc{5-3};
4924  let Inst{14-13} = Rv;
4925  let Inst{12-3} = 0b0000000000;
4926  let Inst{2-0}  = opc{2-0};
4927  let Constraints = "$ZAd = $_ZAd";
4928}
4929
4930multiclass sme2p1_zero_matrix<string mnemonic> {
4931  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_Z , 1> {
4932    bits<3> imm;
4933    let Inst{2-0} = imm;
4934  }
4935  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, SMEPseudo2Instr<NAME # _2Z, 1> {
4936    bits<3> imm;
4937    let Inst{2-0} = imm;
4938  }
4939  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_2Z, 1> {
4940    bits<2> imm;
4941    let Inst{1-0} = imm;
4942  }
4943  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_2Z, 1> {
4944    bits<2> imm;
4945    let Inst{1-0} = imm;
4946  }
4947  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_Z, 1> {
4948    bits<3> imm;
4949    let Inst{2-0} = imm;
4950  }
4951  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, SMEPseudo2Instr<NAME # _4Z, 1> {
4952    bits<2> imm;
4953    let Inst{1-0} = imm;
4954  }
4955  def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2">, SMEPseudo2Instr<NAME # _VG2_4Z, 1> {
4956    bits<1> imm;
4957    let Inst{0}   = imm;
4958  }
4959  def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4">, SMEPseudo2Instr<NAME # _VG4_4Z, 1> {
4960    bits<1> imm;
4961    let Inst{0}   = imm;
4962  }
4963
4964  def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_Z, sme_elm_idx0_7, SMEMatrixArray>;
4965  def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_Z, sme_elm_idx0_7, SMEMatrixArray>;
4966  def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _2Z, uimm2s2range, SMEMatrixArray>;
4967  def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_2Z, uimm1s2range, SMEMatrixArray>;
4968  def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_2Z, uimm1s2range, SMEMatrixArray>;
4969  def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _4Z, uimm1s4range, SMEMatrixArray>;
4970  def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG2_4Z, uimm0s4range, SMEMatrixArray>;
4971  def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo<NAME # _VG4_4Z, uimm0s4range, SMEMatrixArray>;
4972
4973  def : SME2_Zero_Matrix_Pat<NAME # _VG2_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x2, sme_elm_idx0_7, tileslice16>;
4974  def : SME2_Zero_Matrix_Pat<NAME # _VG4_Z_PSEUDO, int_aarch64_sme_zero_za64_vg1x4, sme_elm_idx0_7, tileslice16>;
4975  def : SME2_Zero_Matrix_Pat<NAME # _2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x1, uimm2s2range, tileslicerange2s2>;
4976  def : SME2_Zero_Matrix_Pat<NAME # _VG2_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x2, uimm1s2range, tileslicerange1s2>;
4977  def : SME2_Zero_Matrix_Pat<NAME # _VG4_2Z_PSEUDO, int_aarch64_sme_zero_za64_vg2x4, uimm1s2range, tileslicerange1s2>;
4978  def : SME2_Zero_Matrix_Pat<NAME # _4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x1, uimm1s4range, tileslicerange1s4>;
4979  def : SME2_Zero_Matrix_Pat<NAME # _VG2_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x2, uimm0s4range, tileslicerange0s4>;
4980  def : SME2_Zero_Matrix_Pat<NAME # _VG4_4Z_PSEUDO, int_aarch64_sme_zero_za64_vg4x4, uimm0s4range, tileslicerange0s4>;
4981}
4982
4983//===----------------------------------------------------------------------===//
4984// SME2.1 lookup table expand two non-contiguous registers
4985
4986class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty,
4987                                   AsmVectorIndexOpnd index_ty,
4988                                   string mnemonic>
4989    :  I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
4990          mnemonic, "\t$Zd, $ZTt, $Zn$i",
4991          "", []>, Sched<[]> {
4992  bits<5> Zn;
4993  bits<4> Zd;
4994  let Inst{31-19} = 0b1100000010011;
4995  let Inst{18-15} = op;
4996  let Inst{14}    = 0b1;
4997  let Inst{13-12} = sz;
4998  let Inst{11-10} = 0b00;
4999  let Inst{9-5}   = Zn;
5000  let Inst{4}     = Zd{3};
5001  let Inst{3}     = 0b0;
5002  let Inst{2-0}   = Zd{2-0};
5003}
5004
5005class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
5006                                    AsmVectorIndexOpnd index_ty,
5007                                    string mnemonic>
5008  : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> {
5009  bits<3> i;
5010  let Inst{17-15} = i;
5011}
5012
5013multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> {
5014  def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH,
5015                                         mnemonic>;
5016  def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH,
5017                                         mnemonic>;
5018}
5019
5020class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty,
5021                                    AsmVectorIndexOpnd index_ty,
5022                                    string mnemonic>
5023  : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> {
5024  bits<2> i;
5025  let Inst{16-15} = i;
5026}
5027multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> {
5028  def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS,
5029                                         mnemonic>;
5030  def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS,
5031                                         mnemonic>;
5032}
5033
5034// SME2.1 lookup table expand four non-contiguous registers
5035class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty,
5036                                   AsmVectorIndexOpnd index_ty,
5037                                   string mnemonic>
5038    :  I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i),
5039          mnemonic, "\t$Zd, $ZTt, $Zn$i",
5040          "", []>, Sched<[]> {
5041  bits<5> Zn;
5042  bits<3> Zd;
5043  let Inst{31-19} = 0b1100000010011;
5044  let Inst{18-16} = op;
5045  let Inst{15-14} = 0b10;
5046  let Inst{13-12} = sz;
5047  let Inst{11-10} = 0b00;
5048  let Inst{9-5}   = Zn;
5049  let Inst{4}     = Zd{2};
5050  let Inst{3-2}   = 0b00;
5051  let Inst{1-0}   = Zd{1-0};
5052}
5053
5054class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
5055                                    AsmVectorIndexOpnd index_ty,
5056                                    string mnemonic>
5057  : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> {
5058  bits<2> i;
5059  let Inst{17-16} = i;
5060}
5061
5062multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> {
5063  def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS,
5064                                         mnemonic>;
5065  def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS,
5066                                         mnemonic>;
5067}
5068
5069class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty,
5070                                    AsmVectorIndexOpnd index_ty,
5071                                    string mnemonic>
5072  : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> {
5073  bit i;
5074  let Inst{16}    = i;
5075}
5076
5077multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> {
5078  def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>;
5079}
5080
5081// SME2 lookup table two source registers expand to four contiguous destination registers
5082class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic>
5083  : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
5084       mnemonic, "\t$Zd, $ZTt, $Zn",
5085       "", []>, Sched<[]> {
5086  bits<4> Zn;
5087  bits<3> Zd;
5088  let Inst{31-14} = 0b110000001000101100;
5089  let Inst{13-12} = sz;
5090  let Inst{11-10} = op;
5091  let Inst{9-6}   = Zn;
5092  let Inst{5}     = 0b0;
5093  let Inst{4-2}   = Zd;
5094  let Inst{1-0}   = 0b00;
5095}
5096
5097// SME2 lookup table two source registers expand to four non-contiguous destination registers
5098class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
5099   : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn),
5100        mnemonic, "\t$Zd, $ZTt, $Zn",
5101        "", []>, Sched<[]> {
5102  bits<4> Zn;
5103  bits<3> Zd;
5104  let Inst{31-14} = 0b110000001001101100;
5105  let Inst{13-12} = sz;
5106  let Inst{11-10} = op;
5107  let Inst{9-6}   = Zn;
5108  let Inst{5}     = 0b0;
5109  let Inst{4}     = Zd{2};
5110  let Inst{3-2}   = 0b00;
5111  let Inst{1-0}   = Zd{1-0};
5112}
5113