xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//=- AArch64SMEInstrInfo.td -  AArch64 SME Instructions -*- tablegen -*-----=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Scalable Matrix Extension (SME) Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13def AArch64_smstart : SDNode<"AArch64ISD::SMSTART", SDTypeProfile<0, 2,
14                             [SDTCisInt<0>, SDTCisInt<0>]>,
15                             [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
16                              SDNPOptInGlue, SDNPOutGlue]>;
17def AArch64_smstop  : SDNode<"AArch64ISD::SMSTOP", SDTypeProfile<0, 2,
18                             [SDTCisInt<0>, SDTCisInt<0>]>,
19                             [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
20                              SDNPOptInGlue, SDNPOutGlue]>;
21def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3,
22                             [SDTCisInt<0>, SDTCisPtrTy<1>]>,
23                             [SDNPHasChain, SDNPSideEffect, SDNPVariadic,
24                              SDNPOptInGlue]>;
25def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2,
26                                [SDTCisInt<0>, SDTCisPtrTy<1>]>,
27                                [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;
28def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2,
29                             [SDTCisInt<0>, SDTCisPtrTy<1>]>,
30                             [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
31def AArch64CoalescerBarrier
32    : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>;
33
34def AArch64VGSave : SDNode<"AArch64ISD::VG_SAVE", SDTypeProfile<0, 0, []>,
35                           [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
36
37def AArch64VGRestore : SDNode<"AArch64ISD::VG_RESTORE", SDTypeProfile<0, 0, []>,
38                              [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
39
40def AArch64AllocateZABuffer : SDNode<"AArch64ISD::ALLOCATE_ZA_BUFFER", SDTypeProfile<1, 1,
41                              [SDTCisInt<0>, SDTCisInt<1>]>,
42                              [SDNPHasChain, SDNPSideEffect]>;
43let usesCustomInserter = 1, Defs = [SP], Uses = [SP] in {
44  def AllocateZABuffer : Pseudo<(outs GPR64sp:$dst), (ins GPR64:$size), []>, Sched<[WriteI]> {}
45}
46def : Pat<(i64 (AArch64AllocateZABuffer GPR64:$size)),
47          (AllocateZABuffer $size)>;
48
49def AArch64InitTPIDR2Obj  : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 1,
50                              [SDTCisInt<0>]>, [SDNPHasChain, SDNPMayStore]>;
51let usesCustomInserter = 1 in {
52  def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer), [(AArch64InitTPIDR2Obj GPR64:$buffer)]>, Sched<[WriteI]> {}
53}
54
55//===----------------------------------------------------------------------===//
56// Instruction naming conventions.
57//===----------------------------------------------------------------------===//
58
59// M = SME array register (ZA)
60// P = Predicate register
61// C = Predicate-as-counter register
62// I = immediate
63// Z = SVE vector register
64// T = ZT0 register
65//
66
67//===----------------------------------------------------------------------===//
68// Add vector elements horizontally or vertically to ZA tile.
69//===----------------------------------------------------------------------===//
70
71def SDT_AArch64RDSVL  : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
72def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>;
73
74let Predicates = [HasSMEandIsNonStreamingSafe] in {
75def RDSVLI_XI  : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>;
76def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>;
77def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>;
78
79def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>;
80}
81
82let Predicates = [HasSME] in {
83defm ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha", int_aarch64_sme_addha>;
84defm ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva", int_aarch64_sme_addva>;
85}
86
87let Predicates = [HasSMEI16I64] in {
88defm ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha", int_aarch64_sme_addha>;
89defm ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva", int_aarch64_sme_addva>;
90}
91
92let Predicates = [HasSME] in {
93//===----------------------------------------------------------------------===//
94// Outer products
95//===----------------------------------------------------------------------===//
96
97defm BFMOPA_MPPZZ  : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>;
98defm BFMOPS_MPPZZ  : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>;
99
100defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, 0b00, ZPR32, "fmopa", int_aarch64_sme_mopa>;
101defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, 0b00, ZPR32, "fmops", int_aarch64_sme_mops>;
102}
103
104let Predicates = [HasSMEF64F64] in {
105defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>;
106defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>;
107}
108
109let Predicates = [HasSME] in {
110defm FMOPAL_MPPZZ  : sme_f16_outer_product<0b010, "fmopa", int_aarch64_sme_mopa_wide>;
111defm FMOPSL_MPPZZ  : sme_f16_outer_product<0b011, "fmops", int_aarch64_sme_mops_wide>;
112
113defm SMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b000, "smopa",  int_aarch64_sme_smopa_wide>;
114defm SMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b001, "smops",  int_aarch64_sme_smops_wide>;
115defm UMOPA_MPPZZ_S  : sme_int_outer_product_i32<0b110, "umopa",  int_aarch64_sme_umopa_wide>;
116defm UMOPS_MPPZZ_S  : sme_int_outer_product_i32<0b111, "umops",  int_aarch64_sme_umops_wide>;
117defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
118defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>;
119defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
120defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>;
121}
122
123let Predicates = [HasSMEI16I64] in {
124defm SMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b000, "smopa",  int_aarch64_sme_smopa_wide>;
125defm SMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b001, "smops",  int_aarch64_sme_smops_wide>;
126defm UMOPA_MPPZZ_D  : sme_int_outer_product_i64<0b110, "umopa",  int_aarch64_sme_umopa_wide>;
127defm UMOPS_MPPZZ_D  : sme_int_outer_product_i64<0b111, "umops",  int_aarch64_sme_umops_wide>;
128defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>;
129defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>;
130defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>;
131defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>;
132}
133
134let Predicates = [HasSME] in {
135//===----------------------------------------------------------------------===//
136// Loads and stores
137//===----------------------------------------------------------------------===//
138
139defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">;
140defm ST1_MXIPXX : sme_mem_st_ss<"st1">;
141
142//===----------------------------------------------------------------------===//
143// Move instructions
144//===----------------------------------------------------------------------===//
145
146defm INSERT_MXIPZ  : sme_vector_to_tile<"mova">;
147defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">;
148} // End let Predicates = [HasSME]
149
150let Predicates = [HasSMEandIsNonStreamingSafe] in {
151//===----------------------------------------------------------------------===//
152// Spill + fill
153//===----------------------------------------------------------------------===//
154
155defm LDR_ZA : sme_fill<"ldr">;
156defm STR_ZA : sme_spill<"str">;
157
158//===----------------------------------------------------------------------===//
159// Zero instruction
160//===----------------------------------------------------------------------===//
161
162defm ZERO_M : sme_zero<"zero">;
163
164//===----------------------------------------------------------------------===//
165// Mode selection and state access instructions
166//===----------------------------------------------------------------------===//
167
168// Pseudo to conditionally restore ZA state. This expands:
169//
170//   pseudonode tpidr2_el0, tpidr2obj, restore_routine
171//
172// Into:
173//
174//   if (tpidr2_el0 == 0)
175//     BL restore_routine, implicit-use tpidr2obj
176//
177def RestoreZAPseudo :
178  Pseudo<(outs),
179         (ins GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, i64imm:$restore_routine, variable_ops), []>,
180         Sched<[]>;
181
182def : Pat<(AArch64_restore_za
183            (i64 GPR64:$tpidr2_el0), (i64 GPR64sp:$tpidr2obj), (i64 texternalsym:$restore_routine)),
184          (RestoreZAPseudo GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, texternalsym:$restore_routine)>;
185
186// Read and write TPIDR2_EL0
187def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val),
188          (MSR 0xde85, GPR64:$val)>;
189def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
190          (MRS 0xde85)>;
191
192} // End let Predicates = [HasSMEandIsNonStreamingSafe]
193
194multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> {
195  def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> {
196    let Constraints = "$dst = $src";
197  }
198  foreach vt = vts in {
199    def : Pat<(vt (AArch64CoalescerBarrier (vt rc:$src))),
200              (!cast<Instruction>(NAME) rc:$src)>;
201  }
202}
203
204multiclass CoalescerBarriers {
205  defm _FPR16  : CoalescerBarrierPseudo<FPR16, [bf16, f16]>;
206  defm _FPR32  : CoalescerBarrierPseudo<FPR32, [f32]>;
207  defm _FPR64  : CoalescerBarrierPseudo<FPR64, [f64, v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16]>;
208  defm _FPR128 : CoalescerBarrierPseudo<FPR128, [f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16]>;
209}
210
211defm COALESCER_BARRIER : CoalescerBarriers;
212
213// Pseudo to match to smstart/smstop. This expands:
214//
215//  pseudonode (pstate_za|pstate_sm), before_call, expected_value
216//
217// Into:
218//
219//   if (before_call != expected_value)
220//     node (pstate_za|pstate_sm)
221//
222// where node can be either 'smstart' or 'smstop'.
223//
224// This pseudo and corresponding patterns don't need to be predicated by SME,
225// because when they're emitted for streaming-compatible functions and run
226// in a non-SME context the generated code-paths will never execute any
227// SME instructions.
228def MSRpstatePseudo :
229  Pseudo<(outs),
230           (ins svcr_op:$pstatefield, timm0_1:$imm, timm0_31:$condition, variable_ops), []>,
231    Sched<[WriteSys]> {
232  let hasPostISelHook = 1;
233  let Uses = [VG];
234  let Defs = [VG];
235}
236
237def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 timm0_31:$condition)),
238          (MSRpstatePseudo svcr_op:$pstate, 0b1, timm0_31:$condition)>;
239def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 timm0_31:$condition)),
240          (MSRpstatePseudo svcr_op:$pstate, 0b0, timm0_31:$condition)>;
241
242// Unconditional start/stop
243def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)),
244          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b1)>;
245def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)),
246          (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>;
247
248
249// Pseudo to insert cfi_offset/cfi_restore instructions. Used to save or restore
250// the streaming value of VG around streaming-mode changes in locally-streaming
251// functions.
252def VGSavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
253def : Pat<(AArch64VGSave), (VGSavePseudo)>;
254
255def VGRestorePseudo : Pseudo<(outs), (ins), []>, Sched<[]>;
256def : Pat<(AArch64VGRestore), (VGRestorePseudo)>;
257
258//===----------------------------------------------------------------------===//
259// SME2 Instructions
260//===----------------------------------------------------------------------===//
261let Predicates = [HasSME2] in {
262defm ADD_VG2_M2ZZ_S  : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b0011010, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x2>;
263defm ADD_VG4_M4ZZ_S  : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b0111010, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x4>;
264defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b0110010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>;
265defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b0110010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>;
266
267defm ADD_VG2_2ZZ  : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>;
268defm ADD_VG4_4ZZ  : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>;
269
270defm SUB_VG2_M2ZZ_S  : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b0011011, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x2>;
271defm SUB_VG4_M4ZZ_S  : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b0111011, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x4>;
272defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b0110011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>;
273defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b0110011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>;
274
275defm FMLA_VG2_M2ZZ_S  : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011000, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x2>;
276defm FMLA_VG4_M4ZZ_S  : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111000, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x4>;
277defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0110000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>;
278defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0110000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>;
279defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b01, 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>;
280defm FMLA_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmla", 0b0000, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x4>;
281
282defm FMLS_VG2_M2ZZ_S  : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011001, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x2>;
283defm FMLS_VG4_M4ZZ_S  : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111001, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x4>;
284defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0110001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>;
285defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0110001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>;
286defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b01, 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>;
287defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>;
288
289defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>;
290defm ADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"add", 0b0010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x4>;
291
292defm SUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"sub", 0b0011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x2>;
293defm SUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"sub", 0b0011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x4>;
294
295defm FADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x2>;
296defm FADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x4>;
297
298defm FSUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x2>;
299defm FSUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x4>;
300
301defm SQDMULH_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"sqdmulh", 0b1000000>;
302defm SQDMULH_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"sqdmulh", 0b1000000>;
303defm SQDMULH_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"sqdmulh", 0b1000000>;
304defm SQDMULH_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"sqdmulh", 0b1000000>;
305
306defm FMLAL_MZZI      : sme2_mla_long_array_index<"fmlal",  0b10,   0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x1>;
307defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal",   0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>;
308defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal",   0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>;
309defm FMLAL_MZZ       : sme2_mla_long_array_single<"fmlal", 0b00,   0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>;
310defm FMLAL_VG2_M2ZZ_HtoS  : sme2_fp_mla_long_array_vg2_single<"fmlal",  0b000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>;
311defm FMLAL_VG4_M4ZZ_HtoS  : sme2_fp_mla_long_array_vg4_single<"fmlal",  0b000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>;
312defm FMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlal",   0b000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x2>;
313defm FMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlal",   0b000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x4>;
314
315defm FMLSL_MZZI      : sme2_mla_long_array_index<"fmlsl",  0b10,   0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>;
316defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl",   0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>;
317defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl",   0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>;
318defm FMLSL_MZZ       : sme2_mla_long_array_single<"fmlsl", 0b00,   0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>;
319defm FMLSL_VG2_M2ZZ_HtoS  : sme2_fp_mla_long_array_vg2_single<"fmlsl",  0b010,  MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>;
320defm FMLSL_VG4_M4ZZ_HtoS  : sme2_fp_mla_long_array_vg4_single<"fmlsl",  0b010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>;
321defm FMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlsl",   0b001, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>;
322defm FMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlsl",   0b001, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>;
323
324defm BFMLAL_MZZI      : sme2_mla_long_array_index<"bfmlal",  0b10,   0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>;
325defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal",   0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>;
326defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal",   0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>;
327defm BFMLAL_MZZ       : sme2_mla_long_array_single<"bfmlal", 0b00,   0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>;
328defm BFMLAL_VG2_M2ZZ_HtoS  : sme2_fp_mla_long_array_vg2_single<"bfmlal",  0b100, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>;
329defm BFMLAL_VG4_M4ZZ_HtoS  : sme2_fp_mla_long_array_vg4_single<"bfmlal",  0b100, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>;
330defm BFMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlal",   0b010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>;
331defm BFMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlal",   0b010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>;
332
333defm BFMLSL_MZZI      : sme2_mla_long_array_index<"bfmlsl",  0b10,   0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>;
334defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl",   0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>;
335defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl",   0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>;
336defm BFMLSL_MZZ       : sme2_mla_long_array_single<"bfmlsl", 0b00,   0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>;
337defm BFMLSL_VG2_M2ZZ_HtoS  : sme2_fp_mla_long_array_vg2_single<"bfmlsl",  0b110, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>;
338defm BFMLSL_VG4_M4ZZ_HtoS  : sme2_fp_mla_long_array_vg4_single<"bfmlsl",  0b110, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>;
339defm BFMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlsl",   0b011, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>;
340defm BFMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlsl",   0b011, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>;
341
342defm SMLAL_MZZI      : sme2_mla_long_array_index<"smlal", 0b11,    0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>;
343defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal",  0b00, int_aarch64_sme_smlal_lane_vg2x2>;
344defm SMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlal",  0b00, int_aarch64_sme_smlal_lane_vg2x4>;
345defm SMLAL_MZZ       : sme2_mla_long_array_single<"smlal",0b01,    0b00, nxv8i16, int_aarch64_sme_smlal_single_vg2x1>;
346defm SMLAL_VG2_M2ZZ  : sme2_int_mla_long_array_vg2_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x2>;
347defm SMLAL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x4>;
348defm SMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlal",  0b00, int_aarch64_sme_smlal_vg2x2>;
349defm SMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlal",  0b00, int_aarch64_sme_smlal_vg2x4>;
350
351defm SMLSL_MZZI      : sme2_mla_long_array_index<"smlsl", 0b11,    0b01, nxv8i16, int_aarch64_sme_smlsl_lane_vg2x1>;
352defm SMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlsl",  0b01, int_aarch64_sme_smlsl_lane_vg2x2>;
353defm SMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlsl",  0b01, int_aarch64_sme_smlsl_lane_vg2x4>;
354defm SMLSL_MZZ       : sme2_mla_long_array_single<"smlsl",0b01,    0b01, nxv8i16, int_aarch64_sme_smlsl_single_vg2x1>;
355defm SMLSL_VG2_M2ZZ  : sme2_int_mla_long_array_vg2_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x2>;
356defm SMLSL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x4>;
357defm SMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlsl",  0b01, int_aarch64_sme_smlsl_vg2x2>;
358defm SMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlsl",  0b01, int_aarch64_sme_smlsl_vg2x4>;
359
360defm UMLAL_MZZI      : sme2_mla_long_array_index<"umlal", 0b11,    0b10, nxv8i16, int_aarch64_sme_umlal_lane_vg2x1>;
361defm UMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlal",  0b10, int_aarch64_sme_umlal_lane_vg2x2>;
362defm UMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlal",  0b10, int_aarch64_sme_umlal_lane_vg2x4>;
363defm UMLAL_MZZ       : sme2_mla_long_array_single<"umlal",0b01,    0b10, nxv8i16, int_aarch64_sme_umlal_single_vg2x1>;
364defm UMLAL_VG2_M2ZZ  : sme2_int_mla_long_array_vg2_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x2>;
365defm UMLAL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x4>;
366defm UMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlal",  0b10, int_aarch64_sme_umlal_vg2x2>;
367defm UMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlal",  0b10, int_aarch64_sme_umlal_vg2x4>;
368
369defm UMLSL_MZZI      : sme2_mla_long_array_index<"umlsl", 0b11,    0b11, nxv8i16, int_aarch64_sme_umlsl_lane_vg2x1>;
370defm UMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlsl",  0b11, int_aarch64_sme_umlsl_lane_vg2x2>;
371defm UMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlsl",  0b11, int_aarch64_sme_umlsl_lane_vg2x4>;
372defm UMLSL_MZZ       : sme2_mla_long_array_single<"umlsl",0b01,    0b11, nxv8i16, int_aarch64_sme_umlsl_single_vg2x1>;
373defm UMLSL_VG2_M2ZZ  : sme2_int_mla_long_array_vg2_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x2>;
374defm UMLSL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x4>;
375defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x2>;
376defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x4>;
377
378defm FCVT_Z2Z_StoH   : sme2_cvt_vg2_single<"fcvt",   0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
379defm FCVTN_Z2Z_StoH  : sme2_cvt_vg2_single<"fcvtn",  0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
380defm BFCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"bfcvt",  0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
381defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
382
383defm SQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"sqcvt",  0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>;
384defm UQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"uqcvt",  0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>;
385defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>;
386defm SQCVT_Z4Z      : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>;
387defm UQCVT_Z4Z      : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>;
388defm SQCVTU_Z4Z     : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>;
389defm SQCVTN_Z4Z     : sme2_int_cvt_vg4_single<"sqcvtn", 0b010, int_aarch64_sve_sqcvtn_x4>;
390defm SQCVTUN_Z4Z    : sme2_int_cvt_vg4_single<"sqcvtun", 0b110, int_aarch64_sve_sqcvtun_x4>;
391defm UQCVTN_Z4Z     : sme2_int_cvt_vg4_single<"uqcvtn", 0b011, int_aarch64_sve_uqcvtn_x4>;
392
393defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>;
394defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>;
395defm FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b00011>;
396defm FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b0001010>;
397defm SCVTF_2Z2Z_StoS  : sme2_fp_cvt_vg2_multi<"scvtf", 0b00100>;
398defm SCVTF_4Z4Z_StoS  : sme2_fp_cvt_vg4_multi<"scvtf", 0b0010000>;
399defm UCVTF_2Z2Z_StoS  : sme2_fp_cvt_vg2_multi<"ucvtf", 0b00101>;
400defm UCVTF_4Z4Z_StoS  : sme2_fp_cvt_vg4_multi<"ucvtf", 0b0010010>;
401
402defm SMAX_VG2_2ZZ  : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>;
403defm SMAX_VG4_4ZZ  : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>;
404defm SMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smax",  0b0000000>;
405defm SMAX_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"smax",  0b0000000>;
406
407defm UMAX_VG2_2ZZ  : sme2_int_sve_destructive_vector_vg2_single<"umax", 0b0000001>;
408defm UMAX_VG4_4ZZ  : sme2_int_sve_destructive_vector_vg4_single<"umax", 0b0000001>;
409defm UMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umax",  0b0000001>;
410defm UMAX_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"umax",  0b0000001>;
411
412defm SMIN_VG2_2ZZ  : sme2_int_sve_destructive_vector_vg2_single<"smin", 0b0000010>;
413defm SMIN_VG4_4ZZ  : sme2_int_sve_destructive_vector_vg4_single<"smin", 0b0000010>;
414defm SMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smin",  0b0000010>;
415defm SMIN_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"smin",  0b0000010>;
416
417defm UMIN_VG2_2ZZ  : sme2_int_sve_destructive_vector_vg2_single<"umin", 0b0000011>;
418defm UMIN_VG4_4ZZ  : sme2_int_sve_destructive_vector_vg4_single<"umin", 0b0000011>;
419defm UMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umin",  0b0000011>;
420defm UMIN_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"umin",  0b0000011>;
421
422defm FMAX_VG2_2ZZ  : sme2_fp_sve_destructive_vector_vg2_single<"fmax", 0b0010000>;
423defm FMAX_VG4_4ZZ  : sme2_fp_sve_destructive_vector_vg4_single<"fmax", 0b0010000>;
424defm FMAX_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmax",  0b0010000>;
425defm FMAX_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmax",  0b0010000>;
426
427defm FMIN_VG2_2ZZ  : sme2_fp_sve_destructive_vector_vg2_single<"fmin", 0b0010001>;
428defm FMIN_VG4_4ZZ  : sme2_fp_sve_destructive_vector_vg4_single<"fmin", 0b0010001>;
429defm FMIN_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmin",  0b0010001>;
430defm FMIN_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmin",  0b0010001>;
431
432defm FMAXNM_VG2_2ZZ  : sme2_fp_sve_destructive_vector_vg2_single<"fmaxnm", 0b0010010>;
433defm FMAXNM_VG4_4ZZ  : sme2_fp_sve_destructive_vector_vg4_single<"fmaxnm", 0b0010010>;
434defm FMAXNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmaxnm",  0b0010010>;
435defm FMAXNM_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmaxnm",  0b0010010>;
436
437defm FMINNM_VG2_2ZZ  : sme2_fp_sve_destructive_vector_vg2_single<"fminnm", 0b0010011>;
438defm FMINNM_VG4_4ZZ  : sme2_fp_sve_destructive_vector_vg4_single<"fminnm", 0b0010011>;
439defm FMINNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fminnm",  0b0010011>;
440defm FMINNM_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fminnm",  0b0010011>;
441
442defm SRSHL_VG2_2ZZ  :  sme2_int_sve_destructive_vector_vg2_single<"srshl", 0b0100010>;
443defm SRSHL_VG4_4ZZ  :  sme2_int_sve_destructive_vector_vg4_single<"srshl", 0b0100010>;
444defm SRSHL_VG2_2Z2Z :  sme2_int_sve_destructive_vector_vg2_multi<"srshl",  0b0100010>;
445defm SRSHL_VG4_4Z4Z :  sme2_int_sve_destructive_vector_vg4_multi<"srshl",  0b0100010>;
446
447defm URSHL_VG2_2ZZ  :  sme2_int_sve_destructive_vector_vg2_single<"urshl", 0b0100011>;
448defm URSHL_VG4_4ZZ  :  sme2_int_sve_destructive_vector_vg4_single<"urshl", 0b0100011>;
449defm URSHL_VG2_2Z2Z :  sme2_int_sve_destructive_vector_vg2_multi<"urshl",  0b0100011>;
450defm URSHL_VG4_4Z4Z :  sme2_int_sve_destructive_vector_vg4_multi<"urshl",  0b0100011>;
451
452defm FCLAMP_VG2_2Z2Z : sme2_fp_clamp_vector_vg2_multi<"fclamp">;
453defm FCLAMP_VG4_4Z4Z : sme2_fp_clamp_vector_vg4_multi<"fclamp">;
454
455defm SCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"sclamp", 0b0>;
456defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>;
457
458defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>;
459defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>;
460
461defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
462defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
463defm FDOT_VG2_M2ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>;
464defm FDOT_VG4_M4ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>;
465defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot",  0b0100000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>;
466defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot",  0b0100000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>;
467
468defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b01, 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
469defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
470defm BFDOT_VG2_M2ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>;
471defm BFDOT_VG4_M4ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>;
472defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot",  0b0100010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>;
473defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot",  0b0100010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>;
474
475defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b01,  0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
476
477defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b01, 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
478
479defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01,  0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>;
480defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>;
481defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>;
482defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>;
483defm SDOT_VG2_M2ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>;
484defm SDOT_VG4_M4ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>;
485defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101001, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>;
486defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101001, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>;
487defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>;
488defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>;
489defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b0101000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>;
490defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b0101000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>;
491
492defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b01, 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>;
493defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>;
494defm SUDOT_VG2_M2ZZ_BToS  : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>;
495defm SUDOT_VG4_M4ZZ_BToS  : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>;
496
497defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b01, 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>;
498defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>;
499
500defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>;
501
502defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>;
503defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>;
504defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>;
505defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>;
506defm UDOT_VG2_M2ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>;
507defm UDOT_VG4_M4ZZ_HtoS  : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>;
508defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101011, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>;
509defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101011, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>;
510defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>;
511defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>;
512defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b0101010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>;
513defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b0101010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>;
514
515defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b01, 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>;
516defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>;
517defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot",  0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>;
518defm USDOT_VG4_M4ZZ_BToS  : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>;
519defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b0101001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>;
520defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b0101001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>;
521
522defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>;
523
524defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b01, 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>;
525defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>;
526
527defm SMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>;
528defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>;
529defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b00, 0b0000, int_aarch64_sme_smla_za32_lane_vg4x4>;
530defm SMLALL_MZZ_BtoS       : sme2_mla_ll_array_single<"smlall", 0b00000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>;
531defm SMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>;
532defm SMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>;
533defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b00000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>;
534defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b00000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>;
535
536defm USMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"usmlall", 0b00, 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>;
537defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b00, 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>;
538defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b00, 0b0100, int_aarch64_sme_usmla_za32_lane_vg4x4>;
539defm USMLALL_MZZ_BtoS       : sme2_mla_ll_array_single<"usmlall", 0b00001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>;
540defm USMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>;
541defm USMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>;
542defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b00001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>;
543defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b00001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>;
544
545defm SMLSLL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"smlsll", 0b00, 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>;
546defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b00, 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>;
547defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b00, 0b0001, int_aarch64_sme_smls_za32_lane_vg4x4>;
548defm SMLSLL_MZZ_BtoS       : sme2_mla_ll_array_single<"smlsll", 0b00010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>;
549defm SMLSLL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>;
550defm SMLSLL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>;
551defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b00010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>;
552defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b00010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>;
553
554defm UMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"umlall", 0b00, 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>;
555defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b00, 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>;
556defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b00, 0b0010, int_aarch64_sme_umla_za32_lane_vg4x4>;
557defm UMLALL_MZZ_BtoS       : sme2_mla_ll_array_single<"umlall", 0b00100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>;
558defm UMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>;
559defm UMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>;
560defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b00100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>;
561defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b00100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>;
562
563defm SUMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"sumlall", 0b00, 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>;
564defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b00, 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>;
565defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b00, 0b0110, int_aarch64_sme_sumla_za32_lane_vg4x4>;
566defm SUMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>;
567defm SUMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>;
568
569defm UMLSLL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"umlsll", 0b00, 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>;
570defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b00, 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>;
571defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b00, 0b0011, int_aarch64_sme_umls_za32_lane_vg4x4>;
572defm UMLSLL_MZZ_BtoS       : sme2_mla_ll_array_single<"umlsll", 0b00110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>;
573defm UMLSLL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>;
574defm UMLSLL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>;
575defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b00110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>;
576defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b00110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>;
577
578defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>;
579defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>;
580
581defm SMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"smopa", 0b000, int_aarch64_sme_smopa_za32>;
582defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops_za32>;
583
584defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
585defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
586
587def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>;
588def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>;
589
590defm LUTI2_ZTZI  : sme2_luti2_vector_index<"luti2", int_aarch64_sme_luti2_lane_zt>;
591defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">;
592defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">;
593
594defm LUTI4_ZTZI  : sme2_luti4_vector_index<"luti4", int_aarch64_sme_luti4_lane_zt>;
595defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">;
596defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">;
597
598defm SUNPK_VG2_2ZZ  : sme2_unpk_vector_vg2<"sunpk", 0b0>;
599defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>;
600defm UUNPK_VG2_2ZZ  : sme2_unpk_vector_vg2<"uunpk", 0b1>;
601defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>;
602
603defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>;
604defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>;
605defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0110000>;
606defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b0110001>;
607defm ZIP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"zip", 0b0111000>;
608defm UZP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"uzp", 0b0111001>;
609
610defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b11000>;
611defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b1100000>;
612defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b10100>;
613defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b1010000>;
614defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b10000>;
615defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>;
616defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>;
617defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>;
618
619defm MOVA_MXI2Z   : sme2_mova_vec_to_tile_vg2_multi<"mova", int_aarch64_sme_write_hor_vg2, int_aarch64_sme_write_ver_vg2>;
620defm MOVA_MXI4Z   : sme2_mova_vec_to_tile_vg4_multi<"mova", int_aarch64_sme_write_hor_vg4, int_aarch64_sme_write_ver_vg4>;
621defm MOVA_2ZMXI  : sme2_mova_tile_to_vec_vg2_multi<"mova">;
622defm MOVA_4ZMXI : sme2_mova_tile_to_vec_vg4_multi<"mova">;
623
624defm MOVA_VG2_MXI2Z  : sme2_mova_vec_to_array_vg2_multi<"mova", int_aarch64_sme_write_vg1x2>;
625defm MOVA_VG4_MXI4Z  : sme2_mova_vec_to_array_vg4_multi<"mova", int_aarch64_sme_write_vg1x4>;
626defm MOVA_VG2_2ZMXI : sme2_mova_array_to_vec_vg2_multi<0b000, "mova">;
627defm MOVA_VG4_4ZMXI : sme2_mova_array_to_vec_vg4_multi<0b1000, "mova">;
628
629defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_x2>;
630defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_x4>;
631
632defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_x2>;
633defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_x4>;
634
635defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_x2>;
636defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_x4>;
637
638defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_x4>;
639defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>;
640defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>;
641
642defm SEL_VG2_2ZC2Z2Z: sme2_sel_vector_vg2<"sel">;
643defm SEL_VG4_4ZC4Z4Z: sme2_sel_vector_vg4<"sel">;
644
645def  LD1B_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0,    ZZ_b_strided,    GPR64shifted8, "ld1b">;
646def  LD1B_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0,    ZZZZ_b_strided,  GPR64shifted8, "ld1b">;
647defm LD1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided,    simm4s2, "ld1b">;
648defm LD1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided,  simm4s4, "ld1b">;
649def  LD1H_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b0,    ZZ_h_strided,    GPR64shifted16, "ld1h">;
650def  LD1H_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b0,    ZZZZ_h_strided,  GPR64shifted16, "ld1h">;
651defm LD1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided,    simm4s2, "ld1h">;
652defm LD1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided,  simm4s4, "ld1h">;
653def  LD1W_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b0,    ZZ_s_strided,    GPR64shifted32, "ld1w">;
654def  LD1W_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b0,    ZZZZ_s_strided,  GPR64shifted32, "ld1w">;
655defm LD1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided,    simm4s2, "ld1w">;
656defm LD1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided,  simm4s4, "ld1w">;
657def  LD1D_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b0,    ZZ_d_strided,    GPR64shifted64, "ld1d">;
658def  LD1D_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b0,    ZZZZ_d_strided,  GPR64shifted64, "ld1d">;
659defm LD1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided,    simm4s2, "ld1d">;
660defm LD1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided,  simm4s4, "ld1d">;
661
662def  LDNT1B_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b1,    ZZ_b_strided,   GPR64shifted8, "ldnt1b">;
663def  LDNT1B_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b1,    ZZZZ_b_strided, GPR64shifted8, "ldnt1b">;
664defm LDNT1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided,   simm4s2, "ldnt1b">;
665defm LDNT1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "ldnt1b">;
666def  LDNT1H_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b1,    ZZ_h_strided,   GPR64shifted16, "ldnt1h">;
667def  LDNT1H_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b1,    ZZZZ_h_strided, GPR64shifted16, "ldnt1h">;
668defm LDNT1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided,   simm4s2, "ldnt1h">;
669defm LDNT1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "ldnt1h">;
670def  LDNT1W_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b1,    ZZ_s_strided,   GPR64shifted32, "ldnt1w">;
671def  LDNT1W_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b1,    ZZZZ_s_strided, GPR64shifted32, "ldnt1w">;
672defm LDNT1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided,   simm4s2, "ldnt1w">;
673defm LDNT1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "ldnt1w">;
674def  LDNT1D_2Z_STRIDED     : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b1,    ZZ_d_strided,   GPR64shifted64, "ldnt1d">;
675def  LDNT1D_4Z_STRIDED     : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b1,    ZZZZ_d_strided, GPR64shifted64, "ldnt1d">;
676defm LDNT1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided,   simm4s2, "ldnt1d">;
677defm LDNT1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "ldnt1d">;
678
679def  ST1B_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b0,     ZZ_b_strided,   GPR64shifted8, "st1b">;
680def  ST1B_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b0,     ZZZZ_b_strided, GPR64shifted8, "st1b">;
681defm ST1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b0,  ZZ_b_strided,   simm4s2, "st1b">;
682defm ST1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b0,  ZZZZ_b_strided, simm4s4, "st1b">;
683def  ST1H_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b0,     ZZ_h_strided,   GPR64shifted16, "st1h">;
684def  ST1H_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b0,     ZZZZ_h_strided, GPR64shifted16, "st1h">;
685defm ST1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b0,  ZZ_h_strided,   simm4s2, "st1h">;
686defm ST1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b0,  ZZZZ_h_strided, simm4s4, "st1h">;
687def  ST1W_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b0,     ZZ_s_strided,   GPR64shifted32, "st1w">;
688def  ST1W_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b0,     ZZZZ_s_strided, GPR64shifted32, "st1w">;
689defm ST1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b0,  ZZ_s_strided,   simm4s2, "st1w">;
690defm ST1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b0,  ZZZZ_s_strided, simm4s4, "st1w">;
691def  ST1D_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b0,     ZZ_d_strided,   GPR64shifted64, "st1d">;
692def  ST1D_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b0,     ZZZZ_d_strided, GPR64shifted64, "st1d">;
693defm ST1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b0,  ZZ_d_strided,   simm4s2, "st1d">;
694defm ST1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b0,  ZZZZ_d_strided, simm4s4, "st1d">;
695
696def  STNT1B_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b1,    ZZ_b_strided,   GPR64shifted8, "stnt1b">;
697def  STNT1B_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b1,    ZZZZ_b_strided, GPR64shifted8, "stnt1b">;
698defm STNT1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided,   simm4s2, "stnt1b">;
699defm STNT1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "stnt1b">;
700def  STNT1H_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b1,    ZZ_h_strided,   GPR64shifted16, "stnt1h">;
701def  STNT1H_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b1,    ZZZZ_h_strided, GPR64shifted16, "stnt1h">;
702defm STNT1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided,   simm4s2, "stnt1h">;
703defm STNT1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "stnt1h">;
704def  STNT1W_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b1,    ZZ_s_strided,   GPR64shifted32, "stnt1w">;
705def  STNT1W_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b1,    ZZZZ_s_strided, GPR64shifted32, "stnt1w">;
706defm STNT1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided,   simm4s2, "stnt1w">;
707defm STNT1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "stnt1w">;
708def  STNT1D_2Z_STRIDED     : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b1,    ZZ_d_strided,   GPR64shifted64, "stnt1d">;
709def  STNT1D_4Z_STRIDED     : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b1,    ZZZZ_d_strided, GPR64shifted64, "stnt1d">;
710defm STNT1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided,   simm4s2, "stnt1d">;
711defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "stnt1d">;
712} // End let Predicates = [HasSME2]
713
714
715let Predicates = [HasSME2andIsNonStreamingSafe] in {
716defm ZERO_T : sme2_zero_zt<"zero", 0b0001>;
717
718defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, AArch64_restore_zt>;
719defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, AArch64_save_zt>;
720} // End let Predicates = [HasSME2andIsNonStreamingSafe]
721
722let Predicates = [HasSME2, HasSMEI16I64] in {
723defm ADD_VG2_M2ZZ_D  : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x2>;
724defm ADD_VG4_M4ZZ_D  : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b1111010, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x4>;
725defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b1110010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>;
726defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b1110010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>;
727
728defm SUB_VG2_M2ZZ_D  : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b1011011, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x2>;
729defm SUB_VG4_M4ZZ_D  : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x4>;
730defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b1110011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>;
731defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b1110011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>;
732
733defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>;
734defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>;
735
736defm SUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"sub", 0b1011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x2>;
737defm SUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"sub", 0b1011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x4>;
738
739defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x2>;
740defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>;
741defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>;
742defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>;
743defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>;
744defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>;
745
746defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>;
747
748defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x2>;
749defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>;
750defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>;
751defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>;
752defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>;
753defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>;
754
755defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>;
756
757defm SMLALL_MZZI_HtoD      : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>;
758defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>;
759defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>;
760defm SMLALL_MZZ_HtoD       : sme2_mla_ll_array_single<"smlall", 0b10000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>;
761defm SMLALL_VG2_M2ZZ_HtoD  : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>;
762defm SMLALL_VG4_M4ZZ_HtoD  : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>;
763defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall",  0b10000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>;
764defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall",  0b10000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>;
765
766defm SMLSLL_MZZI_HtoD      : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>;
767defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>;
768defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>;
769defm SMLSLL_MZZ_HtoD       : sme2_mla_ll_array_single<"smlsll", 0b10010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>;
770defm SMLSLL_VG2_M2ZZ_HtoD  : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>;
771defm SMLSLL_VG4_M4ZZ_HtoD  : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>;
772defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll",  0b10010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>;
773defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll",  0b10010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>;
774
775defm UMLALL_MZZI_HtoD      : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>;
776defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>;
777defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>;
778defm UMLALL_MZZ_HtoD       : sme2_mla_ll_array_single<"umlall", 0b10100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>;
779defm UMLALL_VG2_M2ZZ_HtoD  : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>;
780defm UMLALL_VG4_M4ZZ_HtoD  : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>;
781defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall",  0b10100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>;
782defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall",  0b10100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>;
783
784defm UMLSLL_MZZI_HtoD      : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>;
785defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>;
786defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>;
787defm UMLSLL_MZZ_HtoD       : sme2_mla_ll_array_single<"umlsll", 0b10110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>;
788defm UMLSLL_VG2_M2ZZ_HtoD  : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>;
789defm UMLSLL_VG4_M4ZZ_HtoD  : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>;
790defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll",  0b10110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>;
791defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll",  0b10110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>;
792}
793
794let Predicates = [HasSME2, HasSMEF64F64] in {
795defm FMLA_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmla", 0b00, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x2>;
796defm FMLA_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmla", 0b000, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x4>;
797defm FMLA_VG2_M2ZZ_D  : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b1011000, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x2>;
798defm FMLA_VG4_M4ZZ_D  : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b1111000, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x4>;
799defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b1110000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>;
800defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b1110000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>;
801
802defm FMLS_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmls", 0b10, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x2>;
803defm FMLS_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmls", 0b010, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x4>;
804defm FMLS_VG2_M2ZZ_D  : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b1011001, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x2>;
805defm FMLS_VG4_M4ZZ_D  : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x4>;
806defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b1110001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>;
807defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b1110001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>;
808
809defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>;
810defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>;
811
812defm FSUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fsub", 0b1001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x2>;
813defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x4>;
814}
815
816let Predicates = [HasSME2p1] in {
817defm MOVAZ_ZMI  : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz, int_aarch64_sme_readz_vert,
818                                           int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>;
819defm MOVAZ_2ZMI  : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
820defm MOVAZ_4ZMI  : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
821defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
822defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;
823
824defm ZERO_MXI :  sme2p1_zero_matrix<"zero">;
825
826defm LUTI2_S_2ZTZI : sme2p1_luti2_vector_vg2_index<"luti2">;
827defm LUTI2_S_4ZTZI : sme2p1_luti2_vector_vg4_index<"luti2">;
828
829defm LUTI4_S_2ZTZI : sme2p1_luti4_vector_vg2_index<"luti4">;
830defm LUTI4_S_4ZTZI : sme2p1_luti4_vector_vg4_index<"luti4">;
831}
832
833let Predicates = [HasSMEF16F16orSMEF8F16] in {
834defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x2>;
835defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16,  int_aarch64_sme_add_za16_vg1x4>;
836defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16,  int_aarch64_sme_sub_za16_vg1x2>;
837defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16,  int_aarch64_sme_sub_za16_vg1x4>;
838
839defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00, 0b100, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_lane_vg1x2>;
840defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b000, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_lane_vg1x4>;
841defm FMLA_VG2_M2ZZ_H :  sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011100, MatrixOp16, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_single_vg1x2>;
842defm FMLA_VG4_M4ZZ_H :  sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111100, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_single_vg1x4>;
843defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0100001, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmla_vg1x2>;
844defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0100001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmla_vg1x4>;
845
846defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b00, 0b101, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_lane_vg1x2>;
847defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_lane_vg1x4>;
848defm FMLS_VG2_M2ZZ_H :  sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011101, MatrixOp16, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_single_vg1x2>;
849defm FMLS_VG4_M4ZZ_H :  sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111101, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_single_vg1x4>;
850defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0100011, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmls_vg1x2>;
851defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmls_vg1x4>;
852
853defm FCVT_2ZZ_H  : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>;
854defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>;
855
856defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, nxv8f16, int_aarch64_sme_mopa>;
857defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, nxv8f16, int_aarch64_sme_mops>;
858}
859
860let Predicates = [HasSME2, HasB16B16] in {
861defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16,    int_aarch64_sme_add_za16_vg1x2>;
862defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16,  int_aarch64_sme_add_za16_vg1x4>;
863defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r,  nxv8bf16,   int_aarch64_sme_sub_za16_vg1x2>;
864defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r,  nxv8bf16, int_aarch64_sme_sub_za16_vg1x4>;
865
866defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_lane_vg1x2>;
867defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_lane_vg1x4>;
868defm BFMLA_VG2_M2ZZ  : sme2_dot_mla_add_sub_array_vg2_single<"bfmla", 0b1011100, MatrixOp16, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_single_vg1x2>;
869defm BFMLA_VG4_M4ZZ  : sme2_dot_mla_add_sub_array_vg4_single<"bfmla", 0b1111100, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_single_vg1x4>;
870defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b1100001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmla_vg1x2>;
871defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b1100001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmla_vg1x4>;
872
873defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b00, 0b111, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_lane_vg1x2>;
874defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_lane_vg1x4>;
875defm BFMLS_VG2_M2ZZ  : sme2_dot_mla_add_sub_array_vg2_single<"bfmls", 0b1011101, MatrixOp16, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_single_vg1x2>;
876defm BFMLS_VG4_M4ZZ  : sme2_dot_mla_add_sub_array_vg4_single<"bfmls", 0b1111101, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_single_vg1x4>;
877defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b1100011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmls_vg1x2>;
878defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b1100011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmls_vg1x4>;
879
880defm BFMAX_VG2_2ZZ  : sme2p1_bf_max_min_vector_vg2_single<"bfmax", 0b0010000>;
881defm BFMAX_VG4_4ZZ  : sme2p1_bf_max_min_vector_vg4_single<"bfmax", 0b0010000>;
882defm BFMAX_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmax",  0b0010000>;
883defm BFMAX_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmax",  0b0010000>;
884
885defm BFMIN_VG2_2ZZ  : sme2p1_bf_max_min_vector_vg2_single<"bfmin", 0b0010001>;
886defm BFMIN_VG4_4ZZ  : sme2p1_bf_max_min_vector_vg4_single<"bfmin", 0b0010001>;
887defm BFMIN_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmin",  0b0010001>;
888defm BFMIN_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmin",  0b0010001>;
889
890defm BFMAXNM_VG2_2ZZ  : sme2p1_bf_max_min_vector_vg2_single<"bfmaxnm", 0b0010010>;
891defm BFMAXNM_VG4_4ZZ  : sme2p1_bf_max_min_vector_vg4_single<"bfmaxnm", 0b0010010>;
892defm BFMAXNM_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmaxnm",  0b0010010>;
893defm BFMAXNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmaxnm",  0b0010010>;
894
895defm BFMINNM_VG2_2ZZ  : sme2p1_bf_max_min_vector_vg2_single<"bfminnm", 0b0010011>;
896defm BFMINNM_VG4_4ZZ  : sme2p1_bf_max_min_vector_vg4_single<"bfminnm", 0b0010011>;
897defm BFMINNM_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfminnm",  0b0010011>;
898defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm",  0b0010011>;
899
900defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">;
901defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">;
902
903defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, nxv8bf16, int_aarch64_sme_mopa>;
904defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, nxv8bf16, int_aarch64_sme_mops>;
905}
906
907let Predicates = [HasSME2, HasFP8] in {
908defm F1CVT_2ZZ_BtoH    : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt",   0b00, 0b0>;
909defm F1CVTL_2ZZ_BtoH   : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl",  0b00, 0b1>;
910defm BF1CVT_2ZZ_BtoH   : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt",  0b01, 0b0>;
911defm BF1CVTL_2ZZ_BtoH  : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>;
912defm F2CVT_2ZZ_BtoH    : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt",   0b10, 0b0>;
913defm F2CVTL_2ZZ_BtoH   : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl",  0b10, 0b1>;
914defm BF2CVT_2ZZ_BtoH   : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt",  0b11, 0b0>;
915defm BF2CVTL_2ZZ_BtoH  : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>;
916
917defm FCVT_Z2Z_HtoB  : sme2_fp8_cvt_vg2_single<"fcvt",   0b0>;
918defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt",  0b1>;
919defm FCVT_Z4Z_StoB  : sme2_fp8_cvt_vg4_single<"fcvt",   0b0>;
920defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn",  0b1>;
921
922defm FSCALE_2ZZ   : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>;
923defm FSCALE_4ZZ   : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>;
924defm FSCALE_2Z2Z  : sme2_fp_sve_destructive_vector_vg2_multi<"fscale",  0b0011000>;
925defm FSCALE_4Z4Z  : sme2_fp_sve_destructive_vector_vg4_multi<"fscale",  0b0011000>;
926
927} // [HasSME2, HasFP8]
928
929let Predicates = [HasSME2, HasFAMINMAX] in {
930defm FAMAX_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famax", 0b0010100>;
931defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>;
932
933defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>;
934defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>;
935} //[HasSME2, HasFAMINMAX]
936
937let Predicates = [HasSME2, HasSME_LUTv2] in {
938defm MOVT : sme2_movt_zt_to_zt<"movt",  0b0011111>;
939def LUTI4_4ZZT2Z    : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">;
940} //[HasSME2, HasSME_LUTv2]
941
942let Predicates = [HasSME2p1, HasSME_LUTv2] in {
943def LUTI4_S_4ZZT2Z  : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
944} //[HasSME2p1, HasSME_LUTv2]
945
946let Predicates = [HasSMEF8F16] in {
947defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
948defm FDOT_VG2_M2ZZI_BtoH  : sme2p1_multi_vec_array_vg2_index_f8f16<"fdot",  0b11, 0b010, ZZ_b_mul_r, ZPR4b8>;
949defm FDOT_VG4_M4ZZI_BtoH  : sme2p1_multi_vec_array_vg4_index_f8f16<"fdot",    0b100, ZZZZ_b_mul_r, ZPR4b8>;
950defm FDOT_VG2_M2ZZ_BtoH   :  sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
951defm FDOT_VG4_M4ZZ_BtoH   :  sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
952// TODO: Replace nxv16i8 by nxv16f8
953defm FDOT_VG2_M2Z2Z_BtoH  : sme2_dot_mla_add_sub_array_vg2_multi<"fdot",    0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
954defm FDOT_VG4_M4Z4Z_BtoH  : sme2_dot_mla_add_sub_array_vg4_multi<"fdot",    0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
955
956def  FMLAL_MZZI_BtoH      : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>;
957defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>;
958defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>;
959def  FMLAL_VG2_MZZ_BtoH   : sme2_mla_long_array_single_16b<"fmlal">;
960// TODO: Replace nxv16i8 by nxv16f8
961defm FMLAL_VG2_M2ZZ_BtoH  : sme2_fp_mla_long_array_vg2_single<"fmlal",  0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>;
962defm FMLAL_VG4_M4ZZ_BtoH  :  sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>;
963defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal",   0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>;
964defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal",   0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>;
965
966defm FMOPA_MPPZZ_BtoH     : sme2p1_fmop_tile_f8f16<"fmopa", 0b1, 0b0, 0b01>;
967
968} //[HasSMEF8F16]
969
970let Predicates = [HasSMEF8F32] in {
971// TODO : Replace nxv16i8 by nxv16f8
972defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
973defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
974defm FDOT_VG2_M2ZZ_BtoS  : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>;
975defm FDOT_VG4_M4ZZ_BtoS  : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>;
976// TODO : Replace nxv16i8 by nxv16f8
977defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot",   0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
978defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot",   0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
979
980def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
981def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;
982
983defm FMLALL_MZZI_BtoS      : sme2_mla_ll_array_index_32b<"fmlall",     0b01, 0b000, null_frag>;
984defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
985defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>;
986// TODO: Replace nxv16i8 by nxv16f8
987defm FMLALL_MZZ_BtoS       : sme2_mla_ll_array_single<"fmlall",      0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>;
988defm FMLALL_VG2_M2ZZ_BtoS  : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>;
989defm FMLALL_VG4_M4ZZ_BtoS  : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>;
990defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall",   0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
991defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall",   0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
992
993
994defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>;
995
996} //[HasSMEF8F32]
997
998