xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td (revision 577b62c2bacc7dfa228591ca3da361e1bc398301)
1//=-- SMEInstrFormats.td -  AArch64 SME Instruction classes -*- tablegen -*--=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions.
10//
11//===----------------------------------------------------------------------===//
12
13def imm_to_tile8   : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAB0>", []>;
14def imm_to_tile16  : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAH0>", []>;
15def imm_to_tile32  : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAS0>", []>;
16def imm_to_tile64  : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAD0>", []>;
17def imm_to_tile128 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAQ0>", []>;
18
19def tileslice8   : ComplexPattern<i32 , 2, "SelectSMETileSlice<4>", []>;
20def tileslice16  : ComplexPattern<i32 , 2, "SelectSMETileSlice<3>", []>;
21def tileslice32  : ComplexPattern<i32 , 2, "SelectSMETileSlice<2>", []>;
22def tileslice64  : ComplexPattern<i32 , 2, "SelectSMETileSlice<1>", []>;
23def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0>", []>; // nop
24
25def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
26
27//===----------------------------------------------------------------------===//
28// SME Outer Products
29//===----------------------------------------------------------------------===//
30
31class sme_outer_product_pseudo<ZPRRegOp zpr_ty>
32    : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm,
33                          zpr_ty:$zn, zpr_ty:$zm), []>,
34      Sched<[]> {
35  // Translated to the actual instructions in AArch64ISelLowering.cpp
36  let usesCustomInserter = 1;
37}
38
39class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty,
40                                ZPRRegOp zpr_ty, string mnemonic>
41    : I<(outs za_ty:$ZAda),
42      (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
43        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
44        "", []>,
45      Sched<[]> {
46  bits<5> Zm;
47  bits<3> Pm;
48  bits<3> Pn;
49  bits<5> Zn;
50  let Inst{31-23} = 0b100000001;
51  let Inst{22}    = sz;
52  let Inst{21}    = 0b0;
53  let Inst{20-16} = Zm;
54  let Inst{15-13} = Pm;
55  let Inst{12-10} = Pn;
56  let Inst{9-5}   = Zn;
57  let Inst{4}     = S;
58  let Inst{3}     = 0b0;
59
60  let Constraints = "$ZAda = $_ZAda";
61}
62
63multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> {
64  def NAME : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> {
65    bits<2> ZAda;
66    let Inst{1-0} = ZAda;
67    let Inst{2}   = 0b0;
68  }
69
70  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32>;
71
72  def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
73                (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
74            (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
75}
76
77multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
78  def NAME : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> {
79    bits<3> ZAda;
80    let Inst{2-0} = ZAda;
81  }
82
83  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64>;
84
85  def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
86                (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
87            (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
88}
89
90class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz,
91                                 MatrixTileOperand za_ty, ZPRRegOp zpr_ty,
92                                 string mnemonic>
93    : I<(outs za_ty:$ZAda),
94        (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm),
95        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
96        "", []>,
97      Sched<[]> {
98  bits<5> Zm;
99  bits<3> Pm;
100  bits<3> Pn;
101  bits<5> Zn;
102  let Inst{31-25} = 0b1010000;
103  let Inst{24}    = u0;
104  let Inst{23}    = 0b1;
105  let Inst{22}    = sz;
106  let Inst{21}    = u1;
107  let Inst{20-16} = Zm;
108  let Inst{15-13} = Pm;
109  let Inst{12-10} = Pn;
110  let Inst{9-5}   = Zn;
111  let Inst{4}     = S;
112  let Inst{3}     = 0b0;
113
114  let Constraints = "$ZAda = $_ZAda";
115}
116
117multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
118                                     SDPatternOperator op> {
119  def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32,
120                                        ZPR8, mnemonic> {
121    bits<2> ZAda;
122    let Inst{1-0} = ZAda;
123    let Inst{2}   = 0b0;
124  }
125
126  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8>;
127
128  def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
129                (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
130            (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
131}
132
133multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
134                                     SDPatternOperator op> {
135  def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64,
136                                        ZPR16, mnemonic> {
137    bits<3> ZAda;
138    let Inst{2-0} = ZAda;
139  }
140
141  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
142
143  def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
144                (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
145            (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>;
146}
147
148class sme_outer_product_widening_inst<bit op, bit S, string mnemonic>
149    : I<(outs TileOp32:$ZAda),
150        (ins  TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm),
151        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm",
152        "", []>,
153      Sched<[]> {
154  bits<5> Zm;
155  bits<3> Pm;
156  bits<3> Pn;
157  bits<5> Zn;
158  bits<2> ZAda;
159  let Inst{31-22} = 0b1000000110;
160  let Inst{21}    = op;
161  let Inst{20-16} = Zm;
162  let Inst{15-13} = Pm;
163  let Inst{12-10} = Pn;
164  let Inst{9-5}   = Zn;
165  let Inst{4}     = S;
166  let Inst{3-2}   = 0b00;
167  let Inst{1-0}   = ZAda;
168
169  let Constraints = "$ZAda = $_ZAda";
170}
171
172multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
173  def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>;
174
175  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
176
177  def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
178                (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
179            (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
180}
181
182multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> {
183  def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>;
184
185  def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>;
186
187  def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
188                (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
189            (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>;
190}
191
192//===----------------------------------------------------------------------===//
193// SME Add Vector to Tile
194//===----------------------------------------------------------------------===//
195
196class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty,
197                                  ZPRRegOp zpr_ty, string mnemonic>
198    : I<(outs tile_ty:$ZAda),
199        (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn),
200        mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn",
201        "", []>, Sched<[]> {
202  bits<3> Pm;
203  bits<3> Pn;
204  bits<5> Zn;
205  let Inst{31-23} = 0b110000001;
206  let Inst{22}    = op;
207  let Inst{21-17} = 0b01000;
208  let Inst{16}    = V;
209  let Inst{15-13} = Pm;
210  let Inst{12-10} = Pn;
211  let Inst{9-5}   = Zn;
212  let Inst{4-3}   = 0b00;
213
214  let Constraints = "$ZAda = $_ZAda";
215}
216
217class sme_add_vector_to_tile_u32<bit V, string mnemonic>
218    : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> {
219  bits<2> ZAda;
220  let Inst{2}   = 0b0;
221  let Inst{1-0} = ZAda;
222}
223
224class sme_add_vector_to_tile_u64<bit V, string mnemonic>
225    : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> {
226  bits<3> ZAda;
227  let Inst{2-0} = ZAda;
228}
229
230class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty>
231    : Pseudo<(outs),
232             (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>,
233      Sched<[]> {
234  // Translated to the actual instructions in AArch64ISelLowering.cpp
235  let usesCustomInserter = 1;
236}
237
238def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
239def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>;
240
241def : Pat<(int_aarch64_sme_addha
242            imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
243            (nxv4i32 ZPR32:$zn)),
244          (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
245def : Pat<(int_aarch64_sme_addva
246            imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
247            (nxv4i32 ZPR32:$zn)),
248          (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>;
249
250let Predicates = [HasSMEI64] in {
251def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
252def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>;
253
254def : Pat<(int_aarch64_sme_addha
255            imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
256            (nxv2i64 ZPR64:$zn)),
257          (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
258def : Pat<(int_aarch64_sme_addva
259            imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
260            (nxv2i64 ZPR64:$zn)),
261          (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>;
262}
263
264//===----------------------------------------------------------------------===//
265// SME Contiguous Loads
266//===----------------------------------------------------------------------===//
267
268class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
269                         string mnemonic, string argstr>
270    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
271  bits<5> Rm;
272  bits<2> Rv;
273  bits<3> Pg;
274  bits<5> Rn;
275  let Inst{31-25} = 0b1110000;
276  let Inst{24}    = Q;
277  let Inst{23-22} = msz;
278  let Inst{21}    = 0b0;
279  let Inst{20-16} = Rm;
280  let Inst{15}    = V;
281  let Inst{14-13} = Rv;
282  let Inst{12-10} = Pg;
283  let Inst{9-5}   = Rn;
284  let Inst{4}     = 0b0;
285
286  let mayLoad = 1;
287}
288
289class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
290                         MatrixTileVectorOperand tile_ty, bit is_col,
291                         Operand imm_ty, RegisterOperand gpr_ty>
292    : sme_mem_ld_ss_base<
293        Q, is_col, msz, (outs tile_ty:$ZAt),
294        (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
295             gpr_ty:$Rm),
296        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
297
298multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
299                                   MatrixTileVectorOperand tile_ty,
300                                   Operand imm_ty, RegisterOperand gpr_ty,
301                                   string pg_suffix=""> {
302  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
303                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
304  // Default XZR offset aliases
305  def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]",
306                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>;
307  def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]",
308                  (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
309}
310
311multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
312                              string pg_suffix=""> {
313  defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
314                                 !if(is_col, TileVectorOpV8, TileVectorOpH8),
315                                 sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
316  defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
317                                 !if(is_col, TileVectorOpV16, TileVectorOpH16),
318                                 sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
319  defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
320                                 !if(is_col, TileVectorOpV32, TileVectorOpH32),
321                                 sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
322  defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
323                                 !if(is_col, TileVectorOpV64, TileVectorOpH64),
324                                 sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
325  defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
326                                 !if(is_col, TileVectorOpV128, TileVectorOpH128),
327                                 sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
328}
329
330multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
331  defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">;
332}
333
334multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load,
335                                  Operand tile_ty, Operand offset_ty,
336                                  ComplexPattern addr,
337                                  ComplexPattern tileslice> {
338  // base, tileslice
339  def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile,
340                  (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
341            (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>;
342
343  // reg + reg, tileslice
344  let AddedComplexity = 1 in {
345    def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
346                    tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
347                                              offset_ty:$imm))),
348              (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>;
349  }
350}
351
352class sme_load_pseudo
353    : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
354                          i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>,
355      Sched<[]> {
356  // Translated to the actual instructions in AArch64ISelLowering.cpp
357  let usesCustomInserter = 1;
358  let mayLoad = 1;
359}
360
361multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
362  def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
363                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
364                              is_col, sme_elm_idx0_15, GPR64shifted8> {
365    bits<4> imm;
366    let Inst{3-0} = imm;
367  }
368  def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
369                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
370                              is_col, sme_elm_idx0_7, GPR64shifted16> {
371    bits<1> ZAt;
372    bits<3> imm;
373    let Inst{3}   = ZAt;
374    let Inst{2-0} = imm;
375  }
376  def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
377                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
378                              is_col, sme_elm_idx0_3, GPR64shifted32> {
379    bits<2> ZAt;
380    bits<2> imm;
381    let Inst{3-2} = ZAt;
382    let Inst{1-0} = imm;
383  }
384  def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
385                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
386                              is_col, sme_elm_idx0_1, GPR64shifted64> {
387    bits<3> ZAt;
388    bits<1> imm;
389    let Inst{3-1} = ZAt;
390    let Inst{0}   = imm;
391  }
392  def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
393                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
394                              is_col, sme_elm_idx0_0, GPR64shifted128> {
395    bits<4> ZAt;
396    let Inst{3-0} = ZAt;
397  }
398
399  defm : sme_mem_ld_ss_aliases<NAME, is_col>;
400
401  // Pseudo instructions for lowering intrinsics, using immediates instead of
402  // tile registers.
403  def _PSEUDO_B : sme_load_pseudo;
404  def _PSEUDO_H : sme_load_pseudo;
405  def _PSEUDO_S : sme_load_pseudo;
406  def _PSEUDO_D : sme_load_pseudo;
407  def _PSEUDO_Q : sme_load_pseudo;
408
409  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
410                                !if(is_col, int_aarch64_sme_ld1b_vert,
411                                            int_aarch64_sme_ld1b_horiz),
412                                sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0,
413                                tileslice8>;
414  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
415                                !if(is_col, int_aarch64_sme_ld1h_vert,
416                                            int_aarch64_sme_ld1h_horiz),
417                                imm0_1, imm0_7, am_sve_regreg_lsl1,
418                                tileslice16>;
419  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
420                                !if(is_col, int_aarch64_sme_ld1w_vert,
421                                            int_aarch64_sme_ld1w_horiz),
422                                imm0_3, imm0_3, am_sve_regreg_lsl2,
423                                tileslice32>;
424  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
425                                !if(is_col, int_aarch64_sme_ld1d_vert,
426                                            int_aarch64_sme_ld1d_horiz),
427                                imm0_7, imm0_1, am_sve_regreg_lsl3,
428                                tileslice64>;
429  defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
430                                !if(is_col, int_aarch64_sme_ld1q_vert,
431                                            int_aarch64_sme_ld1q_horiz),
432                                imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4,
433                                tileslice128>;
434}
435
436multiclass sme_mem_ld_ss<string mnemonic> {
437  defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>;
438  defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>;
439}
440
441//===----------------------------------------------------------------------===//
442// SME Contiguous Stores
443//===----------------------------------------------------------------------===//
444
445class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
446                         string mnemonic, string argstr>
447    : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> {
448  bits<5> Rm;
449  bits<2> Rv;
450  bits<3> Pg;
451  bits<5> Rn;
452  let Inst{31-25} = 0b1110000;
453  let Inst{24}    = Q;
454  let Inst{23-22} = msz;
455  let Inst{21}    = 0b1;
456  let Inst{20-16} = Rm;
457  let Inst{15}    = V;
458  let Inst{14-13} = Rv;
459  let Inst{12-10} = Pg;
460  let Inst{9-5}   = Rn;
461  let Inst{4}     = 0b0;
462
463  let mayStore = 1;
464  let hasSideEffects = 1;
465}
466
467class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
468                         MatrixTileVectorOperand tile_ty, bit is_col,
469                         Operand imm_ty, RegisterOperand gpr_ty>
470    : sme_mem_st_ss_base<
471        Q, is_col, msz,
472        (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
473             GPR64sp:$Rn, gpr_ty:$Rm),
474        mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
475
476multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
477  defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
478}
479
480multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store,
481                                  Operand offset_ty,
482                                  ComplexPattern imm2tile,
483                                  ComplexPattern addr,
484                                  ComplexPattern tileslice> {
485  // base, tileslice
486  def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile),
487                   (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
488            (Inst $tile, $idx, $imm, $pg, $base, XZR)>;
489
490  // reg + reg, tileslice
491  let AddedComplexity = 1 in {
492    def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset),
493                     (imm2tile untyped:$tile),
494                     (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))),
495              (Inst $tile, $idx, $imm, $pg, $base, $offset)>;
496  }
497}
498
499multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
500  def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
501                              !if(is_col, TileVectorOpV8, TileVectorOpH8),
502                              is_col, sme_elm_idx0_15, GPR64shifted8> {
503    bits<4> imm;
504    let Inst{3-0} = imm;
505  }
506  def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
507                              !if(is_col, TileVectorOpV16, TileVectorOpH16),
508                              is_col, sme_elm_idx0_7, GPR64shifted16> {
509    bits<1> ZAt;
510    bits<3> imm;
511    let Inst{3}   = ZAt;
512    let Inst{2-0} = imm;
513  }
514  def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
515                              !if(is_col, TileVectorOpV32, TileVectorOpH32),
516                              is_col, sme_elm_idx0_3, GPR64shifted32> {
517    bits<2> ZAt;
518    bits<2> imm;
519    let Inst{3-2} = ZAt;
520    let Inst{1-0} = imm;
521  }
522  def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
523                              !if(is_col, TileVectorOpV64, TileVectorOpH64),
524                              is_col, sme_elm_idx0_1, GPR64shifted64> {
525    bits<3> ZAt;
526    bits<1> imm;
527    let Inst{3-1} = ZAt;
528    let Inst{0}   = imm;
529  }
530  def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
531                              !if(is_col, TileVectorOpV128, TileVectorOpH128),
532                              is_col, sme_elm_idx0_0, GPR64shifted128> {
533    bits<4> ZAt;
534    let Inst{3-0} = ZAt;
535  }
536
537  defm : sme_mem_st_ss_aliases<NAME, is_col>;
538
539  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B),
540                                !if(is_col, int_aarch64_sme_st1b_vert,
541                                            int_aarch64_sme_st1b_horiz),
542                                imm0_15, imm_to_tile8, am_sve_regreg_lsl0,
543                                tileslice8>;
544  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H),
545                                !if(is_col, int_aarch64_sme_st1h_vert,
546                                            int_aarch64_sme_st1h_horiz),
547                                imm0_7, imm_to_tile16, am_sve_regreg_lsl1,
548                                tileslice16>;
549  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S),
550                                !if(is_col, int_aarch64_sme_st1w_vert,
551                                            int_aarch64_sme_st1w_horiz),
552                                imm0_3, imm_to_tile32, am_sve_regreg_lsl2,
553                                tileslice32>;
554  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D),
555                                !if(is_col, int_aarch64_sme_st1d_vert,
556                                            int_aarch64_sme_st1d_horiz),
557                                imm0_1, imm_to_tile64, am_sve_regreg_lsl3,
558                                tileslice64>;
559  defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q),
560                                !if(is_col, int_aarch64_sme_st1q_vert,
561                                            int_aarch64_sme_st1q_horiz),
562                                sme_elm_idx0_0, imm_to_tile128,
563                                am_sve_regreg_lsl4, tileslice128>;
564}
565
566multiclass sme_mem_st_ss<string mnemonic> {
567  defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>;
568  defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>;
569}
570
571//===----------------------------------------------------------------------===//
572// SME Save and Restore Array
573//===----------------------------------------------------------------------===//
574
575class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr>
576    : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "",
577        []>,
578      Sched<[]> {
579  bits<2> Rv;
580  bits<5> Rn;
581  bits<4> imm4;
582  let Inst{31-22} = 0b1110000100;
583  let Inst{21}    = isStore;
584  let Inst{20-15} = 0b000000;
585  let Inst{14-13} = Rv;
586  let Inst{12-10} = 0b000;
587  let Inst{9-5}   = Rn;
588  let Inst{4}     = 0b0;
589  let Inst{3-0}   = imm4;
590}
591
592let mayStore = 1 in
593class sme_spill_inst<string opcodestr>
594    : sme_spill_fill_base<0b1, (outs),
595                          (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
596                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
597                               imm0_15:$offset),
598                          opcodestr>;
599let mayLoad = 1 in
600class sme_fill_inst<string opcodestr>
601    : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt),
602                          (ins MatrixIndexGPR32Op12_15:$Rv,
603                               sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
604                               imm0_15:$offset),
605                          opcodestr>;
606multiclass sme_spill<string opcodestr> {
607  def NAME : sme_spill_inst<opcodestr>;
608  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
609                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
610                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
611  // base
612  def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
613            (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
614  // scalar + immediate (mul vl)
615  let AddedComplexity = 2 in {
616    def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx,
617                                   (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
618              (!cast<Instruction>(NAME) ZA, $idx, 0, $base, $imm4)>;
619  }
620}
621
622multiclass sme_fill<string opcodestr> {
623  def NAME : sme_fill_inst<opcodestr>;
624  def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
625                  (!cast<Instruction>(NAME) MatrixOp:$ZAt,
626                   MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
627  def NAME # _PSEUDO
628      : Pseudo<(outs),
629               (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4,
630                    GPR64sp:$base), []>,
631        Sched<[]> {
632    // Translated to actual instruction in AArch64ISelLowering.cpp
633    let usesCustomInserter = 1;
634    let mayLoad = 1;
635  }
636  // base
637  def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
638            (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
639  // scalar + immediate (mul vl)
640  let AddedComplexity = 2 in {
641    def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx,
642                                   (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
643              (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm4, $base)>;
644  }
645}
646
647//===----------------------------------------------------------------------===//
648// Move instructions
649//===----------------------------------------------------------------------===//
650
651class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
652                              string mnemonic, string argstr>
653    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
654  bits<2> Rv;
655  bits<3> Pg;
656  bits<5> Zn;
657  let Inst{31-24} = 0b11000000;
658  let Inst{23-22} = sz;
659  let Inst{21-17} = 0b00000;
660  let Inst{16}    = Q;
661  let Inst{15}    = V;
662  let Inst{14-13} = Rv;
663  let Inst{12-10} = Pg;
664  let Inst{9-5}   = Zn;
665  let Inst{4}     = 0b0;
666}
667
668class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
669                              bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
670                              string mnemonic>
671    : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
672        (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
673        mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{
674
675  let Constraints = "$ZAd = $_ZAd";
676}
677
678
679multiclass sme_vector_to_tile_aliases<Instruction inst,
680                                      MatrixTileVectorOperand tile_ty,
681                                      ZPRRegOp zpr_ty, Operand imm_ty> {
682  def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
683                  (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
684}
685
686multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
687                                       ValueType ppr_vt, Operand imm_ty,
688                                       Operand offset_ty,
689                                       SDPatternOperator op,
690                                       ComplexPattern tileslice> {
691  def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx,
692                (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
693            (inst imm_ty:$tile, $idx, 0, $pg, $zn)>;
694  let AddedComplexity = 1 in {
695    def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
696                                                offset_ty:$imm)),
697                  (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
698              (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
699  }
700}
701
702class sme_mova_insert_pseudo
703    : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx,
704                          i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>,
705      Sched<[]> {
706  // Translated to the actual instructions in AArch64ISelLowering.cpp
707  let usesCustomInserter = 1;
708}
709
710multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
711  def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
712                                                          TileVectorOpH8),
713                                   is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
714    bits<4> imm;
715    let Inst{3-0} = imm;
716  }
717  def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
718                                                          TileVectorOpH16),
719                                   is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
720    bits<1> ZAd;
721    bits<3> imm;
722    let Inst{3}   = ZAd;
723    let Inst{2-0} = imm;
724  }
725  def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
726                                                          TileVectorOpH32),
727                                   is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
728    bits<2> ZAd;
729    bits<2> imm;
730    let Inst{3-2} = ZAd;
731    let Inst{1-0} = imm;
732  }
733  def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
734                                                          TileVectorOpH64),
735                                   is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
736    bits<3> ZAd;
737    bits<1> imm;
738    let Inst{3-1} = ZAd;
739    let Inst{0}   = imm;
740  }
741  def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
742                                                          TileVectorOpH128),
743                                   is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
744    bits<4> ZAd;
745    bits<1> imm;
746    let Inst{3-0} = ZAd;
747  }
748
749  // Pseudo instructions for lowering intrinsics, using immediates instead of
750  // tile registers.
751  def _PSEUDO_B : sme_mova_insert_pseudo;
752  def _PSEUDO_H : sme_mova_insert_pseudo;
753  def _PSEUDO_S : sme_mova_insert_pseudo;
754  def _PSEUDO_D : sme_mova_insert_pseudo;
755  def _PSEUDO_Q : sme_mova_insert_pseudo;
756
757  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
758                                    !if(is_col, TileVectorOpV8,
759                                                TileVectorOpH8),
760                                    ZPR8, sme_elm_idx0_15>;
761  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
762                                    !if(is_col, TileVectorOpV16,
763                                                TileVectorOpH16),
764                                    ZPR16, sme_elm_idx0_7>;
765  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
766                                    !if(is_col, TileVectorOpV32,
767                                                TileVectorOpH32),
768                                    ZPR32, sme_elm_idx0_3>;
769  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
770                                    !if(is_col, TileVectorOpV64,
771                                                TileVectorOpH64),
772                                    ZPR64, sme_elm_idx0_1>;
773  defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
774                                    !if(is_col, TileVectorOpV128,
775                                                TileVectorOpH128),
776                                    ZPR128, sme_elm_idx0_0>;
777
778  defvar op = !if(is_col, int_aarch64_sme_write_vert,
779                          int_aarch64_sme_write_horiz);
780
781  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B),
782                                     nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15,
783                                     op, tileslice8>;
784  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
785                                     nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7,
786                                     op, tileslice16>;
787  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
788                                     nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7,
789                                     op, tileslice16>;
790  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H),
791                                     nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7,
792                                     op, tileslice16>;
793  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
794                                     nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3,
795                                     op, tileslice32>;
796  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S),
797                                     nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3,
798                                     op, tileslice32>;
799  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
800                                     nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1,
801                                     op, tileslice64>;
802  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D),
803                                     nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1,
804                                     op, tileslice64>;
805
806  defvar opq = !if(is_col, int_aarch64_sme_writeq_vert,
807                           int_aarch64_sme_writeq_horiz);
808
809  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
810                                     nxv16i8, nxv16i1, sme_elm_idx0_15,
811                                     sme_elm_idx0_0, opq, tileslice128>;
812  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
813                                     nxv8i16, nxv8i1, sme_elm_idx0_15,
814                                     sme_elm_idx0_0, opq, tileslice128>;
815  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
816                                     nxv8f16, nxv8i1, sme_elm_idx0_15,
817                                     sme_elm_idx0_0, opq, tileslice128>;
818  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
819                                     nxv8bf16, nxv8i1, sme_elm_idx0_15,
820                                     sme_elm_idx0_0, opq, tileslice128>;
821  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
822                                     nxv4i32, nxv4i1, sme_elm_idx0_15,
823                                     sme_elm_idx0_0, opq, tileslice128>;
824  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
825                                     nxv4f32, nxv4i1, sme_elm_idx0_15,
826                                     sme_elm_idx0_0, opq, tileslice128>;
827  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
828                                     nxv2i64, nxv2i1, sme_elm_idx0_15,
829                                     sme_elm_idx0_0, opq, tileslice128>;
830  defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q),
831                                     nxv2f64, nxv2i1, sme_elm_idx0_15,
832                                     sme_elm_idx0_0, opq, tileslice128>;
833}
834
835multiclass sme_vector_to_tile<string mnemonic> {
836  defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>;
837  defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>;
838}
839
840class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
841                              string mnemonic, string argstr>
842    : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> {
843  bits<2> Rv;
844  bits<3> Pg;
845  bits<5> Zd;
846  let Inst{31-24} = 0b11000000;
847  let Inst{23-22} = sz;
848  let Inst{21-17} = 0b00001;
849  let Inst{16}    = Q;
850  let Inst{15}    = V;
851  let Inst{14-13} = Rv;
852  let Inst{12-10} = Pg;
853  let Inst{9}     = 0b0;
854  let Inst{4-0}   = Zd;
855}
856
857class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
858                              MatrixTileVectorOperand tile_ty,
859                              bit is_col, Operand imm_ty, string mnemonic>
860    : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
861        (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
862        mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> {
863
864  let Constraints = "$Zd = $_Zd";
865}
866
867multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
868                                      MatrixTileVectorOperand tile_ty,
869                                      Operand imm_ty > {
870  def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]",
871                  (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>;
872}
873
874multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt,
875                                       ValueType ppr_vt, Operand offset_ty,
876                                       ComplexPattern imm2tile,
877                                       ComplexPattern tileslice,
878                                       SDPatternOperator op> {
879  def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
880                        (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)),
881            (inst $passthru, $pg, $tile, $idx, 0)>;
882  let AddedComplexity = 1 in {
883    def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg),
884                          (imm2tile untyped:$tile),
885                          (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
886                                          offset_ty:$imm)))),
887              (inst $passthru, $pg, $tile, $idx, $imm)>;
888  }
889}
890
891multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
892  def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
893                                                                TileVectorOpH8),
894                                   is_col, sme_elm_idx0_15, mnemonic> {
895    bits<4> imm;
896    let Inst{8-5} = imm;
897  }
898  def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
899                                                                 TileVectorOpH16),
900                                   is_col, sme_elm_idx0_7, mnemonic> {
901    bits<1> ZAn;
902    bits<3> imm;
903    let Inst{8}   = ZAn;
904    let Inst{7-5} = imm;
905  }
906  def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
907                                                                 TileVectorOpH32),
908                                   is_col, sme_elm_idx0_3, mnemonic> {
909    bits<2> ZAn;
910    bits<2> imm;
911    let Inst{8-7} = ZAn;
912    let Inst{6-5} = imm;
913  }
914  def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
915                                                                 TileVectorOpH64),
916                                   is_col, sme_elm_idx0_1, mnemonic> {
917    bits<3> ZAn;
918    bits<1> imm;
919    let Inst{8-6} = ZAn;
920    let Inst{5}   = imm;
921  }
922  def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
923                                                                  TileVectorOpH128),
924                                   is_col, sme_elm_idx0_0, mnemonic> {
925    bits<4> ZAn;
926    let Inst{8-5} = ZAn;
927  }
928
929  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
930                                    !if(is_col, TileVectorOpV8,
931                                                TileVectorOpH8), sme_elm_idx0_15>;
932  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
933                                    !if(is_col, TileVectorOpV16,
934                                                TileVectorOpH16), sme_elm_idx0_7>;
935  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
936                                    !if(is_col, TileVectorOpV32,
937                                                TileVectorOpH32), sme_elm_idx0_3>;
938  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
939                                    !if(is_col, TileVectorOpV64,
940                                                TileVectorOpH64), sme_elm_idx0_1>;
941  defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
942                                    !if(is_col, TileVectorOpV128,
943                                                TileVectorOpH128), sme_elm_idx0_0>;
944
945  defvar op = !if(is_col, int_aarch64_sme_read_vert,
946                          int_aarch64_sme_read_horiz);
947
948  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B),
949                                     nxv16i8, nxv16i1, imm0_15,
950                                     imm_to_tile8, tileslice8, op>;
951  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
952                                     nxv8i16, nxv8i1, imm0_7,
953                                     imm_to_tile16, tileslice16, op>;
954  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
955                                     nxv8f16, nxv8i1, imm0_7,
956                                     imm_to_tile16, tileslice16, op>;
957  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H),
958                                     nxv8bf16, nxv8i1, imm0_7,
959                                     imm_to_tile16, tileslice16, op>;
960  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
961                                     nxv4i32, nxv4i1, imm0_3,
962                                     imm_to_tile32, tileslice32, op>;
963  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S),
964                                     nxv4f32, nxv4i1, imm0_3,
965                                     imm_to_tile32, tileslice32, op>;
966  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
967                                     nxv2i64, nxv2i1, imm0_1,
968                                     imm_to_tile64, tileslice64, op>;
969  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D),
970                                     nxv2f64, nxv2i1, imm0_1,
971                                     imm_to_tile64, tileslice64, op>;
972
973  defvar opq = !if(is_col, int_aarch64_sme_readq_vert,
974                           int_aarch64_sme_readq_horiz);
975
976  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
977                                     nxv16i8, nxv16i1, sme_elm_idx0_0,
978                                     imm_to_tile128, tileslice128, opq>;
979  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
980                                     nxv8i16, nxv8i1, sme_elm_idx0_0,
981                                     imm_to_tile128, tileslice128, opq>;
982  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
983                                     nxv8f16, nxv8i1, sme_elm_idx0_0,
984                                     imm_to_tile128, tileslice128, opq>;
985  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
986                                     nxv8bf16, nxv8i1, sme_elm_idx0_0,
987                                     imm_to_tile128, tileslice128, opq>;
988  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
989                                     nxv4i32, nxv4i1, sme_elm_idx0_0,
990                                     imm_to_tile128, tileslice128, opq>;
991  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
992                                     nxv4f32, nxv4i1, sme_elm_idx0_0,
993                                     imm_to_tile128, tileslice128, opq>;
994  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
995                                     nxv2i64, nxv2i1, sme_elm_idx0_0,
996                                     imm_to_tile128, tileslice128, opq>;
997  defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q),
998                                     nxv2f64, nxv2i1, sme_elm_idx0_0,
999                                     imm_to_tile128, tileslice128, opq>;
1000}
1001
1002multiclass sme_tile_to_vector<string mnemonic> {
1003  defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>;
1004  defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>;
1005}
1006
1007//===----------------------------------------------------------------------===//
1008// SME Zero
1009//===----------------------------------------------------------------------===//
1010
1011// NOTE: This definition isn't really correct because there are outputs, i.e.
1012// the tile registers being zeroed. We fix this up in a custom inserter that
1013// marks the appropriate registers as being implicitly defined.
1014class sme_zero_inst<string mnemonic>
1015    : I<(outs), (ins MatrixTileList:$imm),
1016        mnemonic, "\t$imm", "", []>, Sched<[]> {
1017  bits<8> imm;
1018  let Inst{31-8} = 0b110000000000100000000000;
1019  let Inst{7-0}  = imm;
1020}
1021
1022multiclass sme_zero<string mnemonic> {
1023  def NAME : sme_zero_inst<mnemonic>;
1024
1025  def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>;
1026  def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>;
1027  def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>;
1028  def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>;
1029  def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>;
1030  def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>;
1031  def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>;
1032  def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>;
1033  def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>;
1034  def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>;
1035  def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>;
1036  def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>;
1037  def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>;
1038  def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>;
1039  def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>;
1040
1041  def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>,
1042      Sched<[]> {
1043    // Translated to the actual instructions in AArch64ISelLowering.cpp
1044    let usesCustomInserter = 1;
1045  }
1046
1047  def : Pat<(int_aarch64_sme_zero imm:$imm),
1048            (!cast<Instruction>(NAME # _PSEUDO) imm:$imm)>;
1049}
1050
1051//===----------------------------------------------------------------------===//
1052// SVE2 Instructions
1053//===----------------------------------------------------------------------===//
1054
1055class sve2_int_perm_revd<string asm>
1056    : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn),
1057        asm, "\t$Zd, $Pg/m, $Zn", "", []>,
1058      Sched<[]> {
1059  bits<5> Zd;
1060  bits<3> Pg;
1061  bits<5> Zn;
1062  let Inst{31-24} = 0b00000101;
1063  let Inst{23-22} = 0b00; // size
1064  let Inst{21-13} = 0b101110100;
1065  let Inst{12-10} = Pg;
1066  let Inst{9-5}   = Zn;
1067  let Inst{4-0}   = Zd;
1068
1069  let Constraints = "$Zd = $_Zd";
1070  let DestructiveInstType = DestructiveUnary;
1071  let ElementSize = ZPR128.ElementSize;
1072}
1073
1074multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
1075  def NAME : sve2_int_perm_revd<asm>;
1076
1077  def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
1078  def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1,  nxv8i16, !cast<Instruction>(NAME)>;
1079  def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1,  nxv4i32, !cast<Instruction>(NAME)>;
1080  def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Instruction>(NAME)>;
1081}
1082
1083class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
1084    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
1085        asm, "\t$Zd, $Zn, $Zm", "", []>,
1086      Sched<[]> {
1087  bits<5> Zm;
1088  bits<5> Zn;
1089  bits<5> Zd;
1090  let Inst{31-24} = 0b01000100;
1091  let Inst{23-22} = sz;
1092  let Inst{21}    = 0b0;
1093  let Inst{20-16} = Zm;
1094  let Inst{15-11} = 0b11000;
1095  let Inst{10}    = U;
1096  let Inst{9-5}   = Zn;
1097  let Inst{4-0}   = Zd;
1098
1099  let Constraints = "$Zd = $_Zd";
1100  let DestructiveInstType = DestructiveOther;
1101  let ElementSize = zpr_ty.ElementSize;
1102}
1103
1104multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
1105  def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
1106  def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
1107  def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
1108  def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
1109
1110  def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
1111  def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
1112  def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
1113  def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
1114}
1115
1116class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
1117    : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
1118                            MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
1119        asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
1120      Sched<[]> {
1121  bits<2> Rv;
1122  bits<4> Pn;
1123  bits<4> Pm;
1124  bits<4> Pd;
1125  let Inst{31-24} = 0b00100101;
1126  let Inst{21}    = 0b1;
1127  let Inst{17-16} = Rv;
1128  let Inst{15-14} = 0b01;
1129  let Inst{13-10} = Pn;
1130  let Inst{9}     = 0b0;
1131  let Inst{8-5}   = Pm;
1132  let Inst{4}     = 0b0;
1133  let Inst{3-0}   = Pd;
1134}
1135
1136multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
1137  def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
1138    bits<4> imm;
1139    let Inst{23-22} = imm{3-2};
1140    let Inst{20-19} = imm{1-0};
1141    let Inst{18}    = 0b1;
1142  }
1143  def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
1144    bits<3> imm;
1145    let Inst{23-22} = imm{2-1};
1146    let Inst{20}    = imm{0};
1147    let Inst{19-18} = 0b10;
1148  }
1149  def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
1150    bits<2> imm;
1151    let Inst{23-22} = imm{1-0};
1152    let Inst{20-18} = 0b100;
1153  }
1154  def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
1155    bits<1> imm;
1156    let Inst{23}    = imm;
1157    let Inst{22}    = 0b1;
1158    let Inst{20-18} = 0b000;
1159  }
1160
1161  def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
1162             MatrixIndexGPR32Op12_15:$idx)),
1163            (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
1164  def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
1165             MatrixIndexGPR32Op12_15:$idx)),
1166            (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
1167  def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
1168             MatrixIndexGPR32Op12_15:$idx)),
1169            (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
1170  def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
1171             MatrixIndexGPR32Op12_15:$idx)),
1172            (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
1173
1174  let AddedComplexity = 1 in {
1175    def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
1176               (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
1177              (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
1178    def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
1179               (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
1180              (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
1181    def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
1182               (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
1183              (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
1184    def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
1185               (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
1186              (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
1187  }
1188}
1189