1//=- AArch64SMEInstrInfo.td - AArch64 SME Instructions -*- tablegen -*-----=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Scalable Matrix Extension (SME) Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13def AArch64_smstart : SDNode<"AArch64ISD::SMSTART", SDTypeProfile<0, 2, 14 [SDTCisInt<0>, SDTCisInt<0>]>, 15 [SDNPHasChain, SDNPSideEffect, SDNPVariadic, 16 SDNPOptInGlue, SDNPOutGlue]>; 17def AArch64_smstop : SDNode<"AArch64ISD::SMSTOP", SDTypeProfile<0, 2, 18 [SDTCisInt<0>, SDTCisInt<0>]>, 19 [SDNPHasChain, SDNPSideEffect, SDNPVariadic, 20 SDNPOptInGlue, SDNPOutGlue]>; 21def AArch64_restore_za : SDNode<"AArch64ISD::RESTORE_ZA", SDTypeProfile<0, 3, 22 [SDTCisInt<0>, SDTCisPtrTy<1>]>, 23 [SDNPHasChain, SDNPSideEffect, SDNPVariadic, 24 SDNPOptInGlue]>; 25def AArch64_restore_zt : SDNode<"AArch64ISD::RESTORE_ZT", SDTypeProfile<0, 2, 26 [SDTCisInt<0>, SDTCisPtrTy<1>]>, 27 [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; 28def AArch64_save_zt : SDNode<"AArch64ISD::SAVE_ZT", SDTypeProfile<0, 2, 29 [SDTCisInt<0>, SDTCisPtrTy<1>]>, 30 [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; 31def AArch64CoalescerBarrier 32 : SDNode<"AArch64ISD::COALESCER_BARRIER", SDTypeProfile<1, 1, []>, [SDNPOptInGlue, SDNPOutGlue]>; 33 34def AArch64VGSave : SDNode<"AArch64ISD::VG_SAVE", SDTypeProfile<0, 0, []>, 35 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; 36 37def AArch64VGRestore : SDNode<"AArch64ISD::VG_RESTORE", SDTypeProfile<0, 0, []>, 38 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; 39 40def AArch64AllocateZABuffer : SDNode<"AArch64ISD::ALLOCATE_ZA_BUFFER", SDTypeProfile<1, 1, 41 [SDTCisInt<0>, SDTCisInt<1>]>, 42 [SDNPHasChain, SDNPSideEffect]>; 43let usesCustomInserter = 1, Defs = [SP], Uses = [SP] in { 44 def AllocateZABuffer : Pseudo<(outs GPR64sp:$dst), (ins GPR64:$size), []>, Sched<[WriteI]> {} 45} 46def : Pat<(i64 (AArch64AllocateZABuffer GPR64:$size)), 47 (AllocateZABuffer $size)>; 48 49def AArch64InitTPIDR2Obj : SDNode<"AArch64ISD::INIT_TPIDR2OBJ", SDTypeProfile<0, 1, 50 [SDTCisInt<0>]>, [SDNPHasChain, SDNPMayStore]>; 51let usesCustomInserter = 1 in { 52 def InitTPIDR2Obj : Pseudo<(outs), (ins GPR64:$buffer), [(AArch64InitTPIDR2Obj GPR64:$buffer)]>, Sched<[WriteI]> {} 53} 54 55//===----------------------------------------------------------------------===// 56// Instruction naming conventions. 57//===----------------------------------------------------------------------===// 58 59// M = SME array register (ZA) 60// P = Predicate register 61// C = Predicate-as-counter register 62// I = immediate 63// Z = SVE vector register 64// T = ZT0 register 65// 66 67//===----------------------------------------------------------------------===// 68// Add vector elements horizontally or vertically to ZA tile. 69//===----------------------------------------------------------------------===// 70 71def SDT_AArch64RDSVL : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>; 72def AArch64rdsvl : SDNode<"AArch64ISD::RDSVL", SDT_AArch64RDSVL>; 73 74let Predicates = [HasSMEandIsNonStreamingSafe] in { 75def RDSVLI_XI : sve_int_read_vl_a<0b0, 0b11111, "rdsvl", /*streaming_sve=*/0b1>; 76def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>; 77def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>; 78 79def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>; 80} 81 82let Predicates = [HasSME] in { 83defm ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha", int_aarch64_sme_addha>; 84defm ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva", int_aarch64_sme_addva>; 85} 86 87let Predicates = [HasSMEI16I64] in { 88defm ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha", int_aarch64_sme_addha>; 89defm ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva", int_aarch64_sme_addva>; 90} 91 92let Predicates = [HasSME] in { 93//===----------------------------------------------------------------------===// 94// Outer products 95//===----------------------------------------------------------------------===// 96 97defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b000, "bfmopa", int_aarch64_sme_mopa_wide>; 98defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b001, "bfmops", int_aarch64_sme_mops_wide>; 99 100defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, 0b00, ZPR32, "fmopa", int_aarch64_sme_mopa>; 101defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, 0b00, ZPR32, "fmops", int_aarch64_sme_mops>; 102} 103 104let Predicates = [HasSMEF64F64] in { 105defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>; 106defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>; 107} 108 109let Predicates = [HasSME] in { 110defm FMOPAL_MPPZZ : sme_f16_outer_product<0b010, "fmopa", int_aarch64_sme_mopa_wide>; 111defm FMOPSL_MPPZZ : sme_f16_outer_product<0b011, "fmops", int_aarch64_sme_mops_wide>; 112 113defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>; 114defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>; 115defm UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa", int_aarch64_sme_umopa_wide>; 116defm UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops", int_aarch64_sme_umops_wide>; 117defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; 118defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>; 119defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; 120defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>; 121} 122 123let Predicates = [HasSMEI16I64] in { 124defm SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa", int_aarch64_sme_smopa_wide>; 125defm SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops", int_aarch64_sme_smops_wide>; 126defm UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa", int_aarch64_sme_umopa_wide>; 127defm UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops", int_aarch64_sme_umops_wide>; 128defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; 129defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>; 130defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; 131defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>; 132} 133 134let Predicates = [HasSME] in { 135//===----------------------------------------------------------------------===// 136// Loads and stores 137//===----------------------------------------------------------------------===// 138 139defm LD1_MXIPXX : sme_mem_ld_ss<"ld1">; 140defm ST1_MXIPXX : sme_mem_st_ss<"st1">; 141 142//===----------------------------------------------------------------------===// 143// Move instructions 144//===----------------------------------------------------------------------===// 145 146defm INSERT_MXIPZ : sme_vector_to_tile<"mova">; 147defm EXTRACT_ZPMXI : sme_tile_to_vector<"mova">; 148} // End let Predicates = [HasSME] 149 150let Predicates = [HasSMEandIsNonStreamingSafe] in { 151//===----------------------------------------------------------------------===// 152// Spill + fill 153//===----------------------------------------------------------------------===// 154 155defm LDR_ZA : sme_fill<"ldr">; 156defm STR_ZA : sme_spill<"str">; 157 158//===----------------------------------------------------------------------===// 159// Zero instruction 160//===----------------------------------------------------------------------===// 161 162defm ZERO_M : sme_zero<"zero">; 163 164//===----------------------------------------------------------------------===// 165// Mode selection and state access instructions 166//===----------------------------------------------------------------------===// 167 168// Pseudo to conditionally restore ZA state. This expands: 169// 170// pseudonode tpidr2_el0, tpidr2obj, restore_routine 171// 172// Into: 173// 174// if (tpidr2_el0 == 0) 175// BL restore_routine, implicit-use tpidr2obj 176// 177def RestoreZAPseudo : 178 Pseudo<(outs), 179 (ins GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, i64imm:$restore_routine, variable_ops), []>, 180 Sched<[]>; 181 182def : Pat<(AArch64_restore_za 183 (i64 GPR64:$tpidr2_el0), (i64 GPR64sp:$tpidr2obj), (i64 texternalsym:$restore_routine)), 184 (RestoreZAPseudo GPR64:$tpidr2_el0, GPR64sp:$tpidr2obj, texternalsym:$restore_routine)>; 185 186// Read and write TPIDR2_EL0 187def : Pat<(int_aarch64_sme_set_tpidr2 i64:$val), 188 (MSR 0xde85, GPR64:$val)>; 189def : Pat<(i64 (int_aarch64_sme_get_tpidr2)), 190 (MRS 0xde85)>; 191 192} // End let Predicates = [HasSMEandIsNonStreamingSafe] 193 194multiclass CoalescerBarrierPseudo<RegisterClass rc, list<ValueType> vts> { 195 def NAME : Pseudo<(outs rc:$dst), (ins rc:$src), []>, Sched<[]> { 196 let Constraints = "$dst = $src"; 197 } 198 foreach vt = vts in { 199 def : Pat<(vt (AArch64CoalescerBarrier (vt rc:$src))), 200 (!cast<Instruction>(NAME) rc:$src)>; 201 } 202} 203 204multiclass CoalescerBarriers { 205 defm _FPR16 : CoalescerBarrierPseudo<FPR16, [bf16, f16]>; 206 defm _FPR32 : CoalescerBarrierPseudo<FPR32, [f32]>; 207 defm _FPR64 : CoalescerBarrierPseudo<FPR64, [f64, v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16]>; 208 defm _FPR128 : CoalescerBarrierPseudo<FPR128, [f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16]>; 209} 210 211defm COALESCER_BARRIER : CoalescerBarriers; 212 213// Pseudo to match to smstart/smstop. This expands: 214// 215// pseudonode (pstate_za|pstate_sm), before_call, expected_value 216// 217// Into: 218// 219// if (before_call != expected_value) 220// node (pstate_za|pstate_sm) 221// 222// where node can be either 'smstart' or 'smstop'. 223// 224// This pseudo and corresponding patterns don't need to be predicated by SME, 225// because when they're emitted for streaming-compatible functions and run 226// in a non-SME context the generated code-paths will never execute any 227// SME instructions. 228def MSRpstatePseudo : 229 Pseudo<(outs), 230 (ins svcr_op:$pstatefield, timm0_1:$imm, timm0_31:$condition, variable_ops), []>, 231 Sched<[WriteSys]> { 232 let hasPostISelHook = 1; 233 let Uses = [VG]; 234 let Defs = [VG]; 235} 236 237def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 timm0_31:$condition)), 238 (MSRpstatePseudo svcr_op:$pstate, 0b1, timm0_31:$condition)>; 239def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 timm0_31:$condition)), 240 (MSRpstatePseudo svcr_op:$pstate, 0b0, timm0_31:$condition)>; 241 242// Unconditional start/stop 243def : Pat<(AArch64_smstart (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)), 244 (MSRpstatesvcrImm1 svcr_op:$pstate, 0b1)>; 245def : Pat<(AArch64_smstop (i32 svcr_op:$pstate), (i64 /*AArch64SME::Always*/0)), 246 (MSRpstatesvcrImm1 svcr_op:$pstate, 0b0)>; 247 248 249// Pseudo to insert cfi_offset/cfi_restore instructions. Used to save or restore 250// the streaming value of VG around streaming-mode changes in locally-streaming 251// functions. 252def VGSavePseudo : Pseudo<(outs), (ins), []>, Sched<[]>; 253def : Pat<(AArch64VGSave), (VGSavePseudo)>; 254 255def VGRestorePseudo : Pseudo<(outs), (ins), []>, Sched<[]>; 256def : Pat<(AArch64VGRestore), (VGRestorePseudo)>; 257 258//===----------------------------------------------------------------------===// 259// SME2 Instructions 260//===----------------------------------------------------------------------===// 261let Predicates = [HasSME2] in { 262defm ADD_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b0011010, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x2>; 263defm ADD_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b0111010, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_add_write_single_za_vg1x4>; 264defm ADD_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b0110010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x2>; 265defm ADD_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b0110010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_write_za_vg1x4>; 266 267defm ADD_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>; 268defm ADD_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>; 269 270defm SUB_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b0011011, MatrixOp32, ZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x2>; 271defm SUB_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b0111011, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4i32, int_aarch64_sme_sub_write_single_za_vg1x4>; 272defm SUB_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b0110011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x2>; 273defm SUB_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b0110011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_write_za_vg1x4>; 274 275defm FMLA_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011000, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x2>; 276defm FMLA_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111000, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_single_vg1x4>; 277defm FMLA_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0110000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x2>; 278defm FMLA_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0110000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmla_vg1x4>; 279defm FMLA_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmla", 0b01, 0b0000, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x2>; 280defm FMLA_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmla", 0b0000, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmla_lane_vg1x4>; 281 282defm FMLS_VG2_M2ZZ_S : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011001, MatrixOp32, ZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x2>; 283defm FMLS_VG4_M4ZZ_S : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111001, MatrixOp32, ZZZZ_s, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_single_vg1x4>; 284defm FMLS_VG2_M2Z2Z_S : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0110001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x2>; 285defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0110001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_fmls_vg1x4>; 286defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b01, 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>; 287defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>; 288 289defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>; 290defm ADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"add", 0b0010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x4>; 291 292defm SUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"sub", 0b0011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x2>; 293defm SUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"sub", 0b0011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x4>; 294 295defm FADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x2>; 296defm FADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x4>; 297 298defm FSUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x2>; 299defm FSUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x4>; 300 301defm SQDMULH_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"sqdmulh", 0b1000000>; 302defm SQDMULH_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"sqdmulh", 0b1000000>; 303defm SQDMULH_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"sqdmulh", 0b1000000>; 304defm SQDMULH_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"sqdmulh", 0b1000000>; 305 306defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x1>; 307defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>; 308defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>; 309defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>; 310defm FMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; 311defm FMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; 312defm FMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x2>; 313defm FMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlal_vg2x4>; 314 315defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>; 316defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>; 317defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>; 318defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>; 319defm FMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b010, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; 320defm FMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; 321defm FMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b001, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x2>; 322defm FMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b001, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmlsl_vg2x4>; 323 324defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>; 325defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>; 326defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>; 327defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>; 328defm BFMLAL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b100, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; 329defm BFMLAL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b100, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; 330defm BFMLAL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x2>; 331defm BFMLAL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlal_vg2x4>; 332 333defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>; 334defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>; 335defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>; 336defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>; 337defm BFMLSL_VG2_M2ZZ_HtoS : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b110, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; 338defm BFMLSL_VG4_M4ZZ_HtoS : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b110, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; 339defm BFMLSL_VG2_M2Z2Z_HtoS : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b011, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x2>; 340defm BFMLSL_VG4_M4Z4Z_HtoS : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b011, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmlsl_vg2x4>; 341 342defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>; 343defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x2>; 344defm SMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x4>; 345defm SMLAL_MZZ : sme2_mla_long_array_single<"smlal",0b01, 0b00, nxv8i16, int_aarch64_sme_smlal_single_vg2x1>; 346defm SMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x2>; 347defm SMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x4>; 348defm SMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlal", 0b00, int_aarch64_sme_smlal_vg2x2>; 349defm SMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlal", 0b00, int_aarch64_sme_smlal_vg2x4>; 350 351defm SMLSL_MZZI : sme2_mla_long_array_index<"smlsl", 0b11, 0b01, nxv8i16, int_aarch64_sme_smlsl_lane_vg2x1>; 352defm SMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlsl", 0b01, int_aarch64_sme_smlsl_lane_vg2x2>; 353defm SMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlsl", 0b01, int_aarch64_sme_smlsl_lane_vg2x4>; 354defm SMLSL_MZZ : sme2_mla_long_array_single<"smlsl",0b01, 0b01, nxv8i16, int_aarch64_sme_smlsl_single_vg2x1>; 355defm SMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x2>; 356defm SMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x4>; 357defm SMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlsl", 0b01, int_aarch64_sme_smlsl_vg2x2>; 358defm SMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlsl", 0b01, int_aarch64_sme_smlsl_vg2x4>; 359 360defm UMLAL_MZZI : sme2_mla_long_array_index<"umlal", 0b11, 0b10, nxv8i16, int_aarch64_sme_umlal_lane_vg2x1>; 361defm UMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlal", 0b10, int_aarch64_sme_umlal_lane_vg2x2>; 362defm UMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlal", 0b10, int_aarch64_sme_umlal_lane_vg2x4>; 363defm UMLAL_MZZ : sme2_mla_long_array_single<"umlal",0b01, 0b10, nxv8i16, int_aarch64_sme_umlal_single_vg2x1>; 364defm UMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x2>; 365defm UMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x4>; 366defm UMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlal", 0b10, int_aarch64_sme_umlal_vg2x2>; 367defm UMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlal", 0b10, int_aarch64_sme_umlal_vg2x4>; 368 369defm UMLSL_MZZI : sme2_mla_long_array_index<"umlsl", 0b11, 0b11, nxv8i16, int_aarch64_sme_umlsl_lane_vg2x1>; 370defm UMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlsl", 0b11, int_aarch64_sme_umlsl_lane_vg2x2>; 371defm UMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlsl", 0b11, int_aarch64_sme_umlsl_lane_vg2x4>; 372defm UMLSL_MZZ : sme2_mla_long_array_single<"umlsl",0b01, 0b11, nxv8i16, int_aarch64_sme_umlsl_single_vg2x1>; 373defm UMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x2>; 374defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x4>; 375defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>; 376defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>; 377 378defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>; 379defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b00001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>; 380defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>; 381defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b10001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>; 382 383defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvt_x2>; 384defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b00111, nxv8i16, nxv4i32, int_aarch64_sve_uqcvt_x2>; 385defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10110, nxv8i16, nxv4i32, int_aarch64_sve_sqcvtu_x2>; 386defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000, int_aarch64_sve_sqcvt_x4>; 387defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001, int_aarch64_sve_uqcvt_x4>; 388defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100, int_aarch64_sve_sqcvtu_x4>; 389defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010, int_aarch64_sve_sqcvtn_x4>; 390defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110, int_aarch64_sve_sqcvtun_x4>; 391defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011, int_aarch64_sve_uqcvtn_x4>; 392 393defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>; 394defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>; 395defm FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b00011>; 396defm FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b0001010>; 397defm SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b00100>; 398defm SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b0010000>; 399defm UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b00101>; 400defm UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b0010010>; 401 402defm SMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>; 403defm SMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>; 404defm SMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smax", 0b0000000>; 405defm SMAX_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"smax", 0b0000000>; 406 407defm UMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"umax", 0b0000001>; 408defm UMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"umax", 0b0000001>; 409defm UMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umax", 0b0000001>; 410defm UMAX_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"umax", 0b0000001>; 411 412defm SMIN_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smin", 0b0000010>; 413defm SMIN_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smin", 0b0000010>; 414defm SMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smin", 0b0000010>; 415defm SMIN_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"smin", 0b0000010>; 416 417defm UMIN_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"umin", 0b0000011>; 418defm UMIN_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"umin", 0b0000011>; 419defm UMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umin", 0b0000011>; 420defm UMIN_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"umin", 0b0000011>; 421 422defm FMAX_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmax", 0b0010000>; 423defm FMAX_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmax", 0b0010000>; 424defm FMAX_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmax", 0b0010000>; 425defm FMAX_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmax", 0b0010000>; 426 427defm FMIN_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmin", 0b0010001>; 428defm FMIN_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmin", 0b0010001>; 429defm FMIN_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmin", 0b0010001>; 430defm FMIN_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmin", 0b0010001>; 431 432defm FMAXNM_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmaxnm", 0b0010010>; 433defm FMAXNM_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmaxnm", 0b0010010>; 434defm FMAXNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmaxnm", 0b0010010>; 435defm FMAXNM_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmaxnm", 0b0010010>; 436 437defm FMINNM_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fminnm", 0b0010011>; 438defm FMINNM_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fminnm", 0b0010011>; 439defm FMINNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fminnm", 0b0010011>; 440defm FMINNM_VG4_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fminnm", 0b0010011>; 441 442defm SRSHL_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"srshl", 0b0100010>; 443defm SRSHL_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"srshl", 0b0100010>; 444defm SRSHL_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"srshl", 0b0100010>; 445defm SRSHL_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"srshl", 0b0100010>; 446 447defm URSHL_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"urshl", 0b0100011>; 448defm URSHL_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"urshl", 0b0100011>; 449defm URSHL_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"urshl", 0b0100011>; 450defm URSHL_VG4_4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"urshl", 0b0100011>; 451 452defm FCLAMP_VG2_2Z2Z : sme2_fp_clamp_vector_vg2_multi<"fclamp">; 453defm FCLAMP_VG4_4Z4Z : sme2_fp_clamp_vector_vg4_multi<"fclamp">; 454 455defm SCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"sclamp", 0b0>; 456defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>; 457 458defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>; 459defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>; 460 461defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>; 462defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>; 463defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>; 464defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>; 465defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>; 466defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>; 467 468defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b01, 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>; 469defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>; 470defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>; 471defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>; 472defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b0100010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>; 473defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b0100010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>; 474 475defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b01, 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; 476 477defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b01, 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>; 478 479defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>; 480defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b01, 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>; 481defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>; 482defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>; 483defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>; 484defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>; 485defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101001, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>; 486defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101001, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>; 487defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>; 488defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>; 489defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b0101000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>; 490defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b0101000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>; 491 492defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b01, 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>; 493defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>; 494defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>; 495defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>; 496 497defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b01, 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>; 498defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>; 499 500defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>; 501 502defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>; 503defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b01, 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>; 504defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>; 505defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>; 506defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>; 507defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>; 508defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101011, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>; 509defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101011, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>; 510defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>; 511defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>; 512defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b0101010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>; 513defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b0101010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>; 514 515defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b01, 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>; 516defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>; 517defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>; 518defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>; 519defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b0101001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>; 520defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b0101001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>; 521 522defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>; 523 524defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b01, 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>; 525defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>; 526 527defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>; 528defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b00, 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>; 529defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b00, 0b0000, int_aarch64_sme_smla_za32_lane_vg4x4>; 530defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b00000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>; 531defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>; 532defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>; 533defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b00000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>; 534defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b00000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>; 535 536defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b00, 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>; 537defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b00, 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>; 538defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b00, 0b0100, int_aarch64_sme_usmla_za32_lane_vg4x4>; 539defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b00001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>; 540defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>; 541defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>; 542defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b00001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>; 543defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b00001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>; 544 545defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b00, 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>; 546defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b00, 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>; 547defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b00, 0b0001, int_aarch64_sme_smls_za32_lane_vg4x4>; 548defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b00010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>; 549defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>; 550defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>; 551defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b00010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>; 552defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b00010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>; 553 554defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b00, 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>; 555defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b00, 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>; 556defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b00, 0b0010, int_aarch64_sme_umla_za32_lane_vg4x4>; 557defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b00100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>; 558defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>; 559defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>; 560defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b00100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>; 561defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b00100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>; 562 563defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b00, 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>; 564defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b00, 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>; 565defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b00, 0b0110, int_aarch64_sme_sumla_za32_lane_vg4x4>; 566defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>; 567defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>; 568 569defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b00, 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>; 570defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b00, 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>; 571defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b00, 0b0011, int_aarch64_sme_umls_za32_lane_vg4x4>; 572defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b00110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>; 573defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>; 574defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>; 575defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b00110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>; 576defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b00110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>; 577 578defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>; 579defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>; 580 581defm SMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"smopa", 0b000, int_aarch64_sme_smopa_za32>; 582defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops_za32>; 583 584defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>; 585defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>; 586 587def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; 588def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; 589 590defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2", int_aarch64_sme_luti2_lane_zt>; 591defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">; 592defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">; 593 594defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4", int_aarch64_sme_luti4_lane_zt>; 595defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">; 596defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">; 597 598defm SUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"sunpk", 0b0>; 599defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>; 600defm UUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"uunpk", 0b1>; 601defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>; 602 603defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>; 604defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>; 605defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0110000>; 606defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b0110001>; 607defm ZIP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"zip", 0b0111000>; 608defm UZP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"uzp", 0b0111001>; 609 610defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b11000>; 611defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b1100000>; 612defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b10100>; 613defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b1010000>; 614defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b10000>; 615defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>; 616defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>; 617defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>; 618 619defm MOVA_MXI2Z : sme2_mova_vec_to_tile_vg2_multi<"mova", int_aarch64_sme_write_hor_vg2, int_aarch64_sme_write_ver_vg2>; 620defm MOVA_MXI4Z : sme2_mova_vec_to_tile_vg4_multi<"mova", int_aarch64_sme_write_hor_vg4, int_aarch64_sme_write_ver_vg4>; 621defm MOVA_2ZMXI : sme2_mova_tile_to_vec_vg2_multi<"mova">; 622defm MOVA_4ZMXI : sme2_mova_tile_to_vec_vg4_multi<"mova">; 623 624defm MOVA_VG2_MXI2Z : sme2_mova_vec_to_array_vg2_multi<"mova", int_aarch64_sme_write_vg1x2>; 625defm MOVA_VG4_MXI4Z : sme2_mova_vec_to_array_vg4_multi<"mova", int_aarch64_sme_write_vg1x4>; 626defm MOVA_VG2_2ZMXI : sme2_mova_array_to_vec_vg2_multi<0b000, "mova">; 627defm MOVA_VG4_4ZMXI : sme2_mova_array_to_vec_vg4_multi<0b1000, "mova">; 628 629defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_x2>; 630defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_x4>; 631 632defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_x2>; 633defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_x4>; 634 635defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_x2>; 636defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_x4>; 637 638defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_x4>; 639defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>; 640defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>; 641 642defm SEL_VG2_2ZC2Z2Z: sme2_sel_vector_vg2<"sel">; 643defm SEL_VG4_4ZC4Z4Z: sme2_sel_vector_vg4<"sel">; 644 645def LD1B_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "ld1b">; 646def LD1B_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "ld1b">; 647defm LD1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "ld1b">; 648defm LD1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "ld1b">; 649def LD1H_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "ld1h">; 650def LD1H_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "ld1h">; 651defm LD1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "ld1h">; 652defm LD1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "ld1h">; 653def LD1W_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "ld1w">; 654def LD1W_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "ld1w">; 655defm LD1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "ld1w">; 656defm LD1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "ld1w">; 657def LD1D_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "ld1d">; 658def LD1D_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "ld1d">; 659defm LD1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "ld1d">; 660defm LD1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "ld1d">; 661 662def LDNT1B_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "ldnt1b">; 663def LDNT1B_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "ldnt1b">; 664defm LDNT1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "ldnt1b">; 665defm LDNT1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "ldnt1b">; 666def LDNT1H_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "ldnt1h">; 667def LDNT1H_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "ldnt1h">; 668defm LDNT1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "ldnt1h">; 669defm LDNT1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "ldnt1h">; 670def LDNT1W_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "ldnt1w">; 671def LDNT1W_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "ldnt1w">; 672defm LDNT1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "ldnt1w">; 673defm LDNT1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "ldnt1w">; 674def LDNT1D_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "ldnt1d">; 675def LDNT1D_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "ldnt1d">; 676defm LDNT1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "ldnt1d">; 677defm LDNT1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "ldnt1d">; 678 679def ST1B_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "st1b">; 680def ST1B_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "st1b">; 681defm ST1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "st1b">; 682defm ST1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "st1b">; 683def ST1H_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "st1h">; 684def ST1H_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "st1h">; 685defm ST1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "st1h">; 686defm ST1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "st1h">; 687def ST1W_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "st1w">; 688def ST1W_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "st1w">; 689defm ST1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "st1w">; 690defm ST1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "st1w">; 691def ST1D_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "st1d">; 692def ST1D_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "st1d">; 693defm ST1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "st1d">; 694defm ST1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "st1d">; 695 696def STNT1B_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "stnt1b">; 697def STNT1B_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "stnt1b">; 698defm STNT1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "stnt1b">; 699defm STNT1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "stnt1b">; 700def STNT1H_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "stnt1h">; 701def STNT1H_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "stnt1h">; 702defm STNT1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "stnt1h">; 703defm STNT1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "stnt1h">; 704def STNT1W_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "stnt1w">; 705def STNT1W_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "stnt1w">; 706defm STNT1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "stnt1w">; 707defm STNT1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "stnt1w">; 708def STNT1D_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "stnt1d">; 709def STNT1D_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "stnt1d">; 710defm STNT1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "stnt1d">; 711defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "stnt1d">; 712} // End let Predicates = [HasSME2] 713 714 715let Predicates = [HasSME2andIsNonStreamingSafe] in { 716defm ZERO_T : sme2_zero_zt<"zero", 0b0001>; 717 718defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, AArch64_restore_zt>; 719defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, AArch64_save_zt>; 720} // End let Predicates = [HasSME2andIsNonStreamingSafe] 721 722let Predicates = [HasSME2, HasSMEI16I64] in { 723defm ADD_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"add", 0b1011010, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x2>; 724defm ADD_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"add", 0b1111010, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_add_write_single_za_vg1x4>; 725defm ADD_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"add", 0b1110010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x2>; 726defm ADD_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"add", 0b1110010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_write_za_vg1x4>; 727 728defm SUB_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"sub", 0b1011011, MatrixOp64, ZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x2>; 729defm SUB_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2i64, int_aarch64_sme_sub_write_single_za_vg1x4>; 730defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b1110011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>; 731defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b1110011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>; 732 733defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>; 734defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>; 735 736defm SUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"sub", 0b1011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x2>; 737defm SUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"sub", 0b1011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x4>; 738 739defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x2>; 740defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>; 741defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>; 742defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>; 743defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b1101000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>; 744defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b1101000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>; 745 746defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>; 747 748defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x2>; 749defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>; 750defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>; 751defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>; 752defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b1101010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>; 753defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b1101010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>; 754 755defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>; 756 757defm SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>; 758defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>; 759defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>; 760defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b10000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>; 761defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>; 762defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>; 763defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b10000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>; 764defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b10000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>; 765 766defm SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>; 767defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>; 768defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>; 769defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b10010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>; 770defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>; 771defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>; 772defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b10010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>; 773defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b10010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>; 774 775defm UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>; 776defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>; 777defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>; 778defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b10100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>; 779defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>; 780defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>; 781defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b10100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>; 782defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b10100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>; 783 784defm UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>; 785defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>; 786defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>; 787defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b10110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>; 788defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>; 789defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>; 790defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b10110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>; 791defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b10110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>; 792} 793 794let Predicates = [HasSME2, HasSMEF64F64] in { 795defm FMLA_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmla", 0b00, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x2>; 796defm FMLA_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmla", 0b000, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_lane_vg1x4>; 797defm FMLA_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b1011000, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x2>; 798defm FMLA_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b1111000, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmla_single_vg1x4>; 799defm FMLA_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b1110000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x2>; 800defm FMLA_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b1110000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmla_vg1x4>; 801 802defm FMLS_VG2_M2ZZI_D : sme2_multi_vec_array_vg2_index_64b<"fmls", 0b10, ZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x2>; 803defm FMLS_VG4_M4ZZI_D : sme2_multi_vec_array_vg4_index_64b<"fmls", 0b010, ZZZZ_d_mul_r, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_lane_vg1x4>; 804defm FMLS_VG2_M2ZZ_D : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b1011001, MatrixOp64, ZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x2>; 805defm FMLS_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001, MatrixOp64, ZZZZ_d, ZPR4b64, nxv2f64, int_aarch64_sme_fmls_single_vg1x4>; 806defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b1110001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>; 807defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b1110001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>; 808 809defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>; 810defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>; 811 812defm FSUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fsub", 0b1001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x2>; 813defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x4>; 814} 815 816let Predicates = [HasSME2p1] in { 817defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz", int_aarch64_sme_readz_horiz, int_aarch64_sme_readz_vert, 818 int_aarch64_sme_readz_q_horiz, int_aarch64_sme_readz_q_vert>; 819defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">; 820defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">; 821defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">; 822defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">; 823 824defm ZERO_MXI : sme2p1_zero_matrix<"zero">; 825 826defm LUTI2_S_2ZTZI : sme2p1_luti2_vector_vg2_index<"luti2">; 827defm LUTI2_S_4ZTZI : sme2p1_luti2_vector_vg4_index<"luti2">; 828 829defm LUTI4_S_2ZTZI : sme2p1_luti4_vector_vg2_index<"luti4">; 830defm LUTI4_S_4ZTZI : sme2p1_luti4_vector_vg4_index<"luti4">; 831} 832 833let Predicates = [HasSMEF16F16orSMEF8F16] in { 834defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x2>; 835defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_add_za16_vg1x4>; 836defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x2>; 837defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_sub_za16_vg1x4>; 838 839defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00, 0b100, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_lane_vg1x2>; 840defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b000, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_lane_vg1x4>; 841defm FMLA_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg2_single<"fmla", 0b0011100, MatrixOp16, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_single_vg1x2>; 842defm FMLA_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg4_single<"fmla", 0b0111100, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmla_single_vg1x4>; 843defm FMLA_VG2_M2Z4Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmla", 0b0100001, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmla_vg1x2>; 844defm FMLA_VG4_M4Z4Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmla", 0b0100001, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmla_vg1x4>; 845 846defm FMLS_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmls", 0b00, 0b101, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_lane_vg1x2>; 847defm FMLS_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmls", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_lane_vg1x4>; 848defm FMLS_VG2_M2ZZ_H : sme2_dot_mla_add_sub_array_vg2_single<"fmls", 0b0011101, MatrixOp16, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_single_vg1x2>; 849defm FMLS_VG4_M4ZZ_H : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b0111101, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fmls_single_vg1x4>; 850defm FMLS_VG2_M2Z2Z_H : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b0100011, MatrixOp16, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmls_vg1x2>; 851defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fmls_vg1x4>; 852 853defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; 854defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; 855 856defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, nxv8f16, int_aarch64_sme_mopa>; 857defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, nxv8f16, int_aarch64_sme_mops>; 858} 859 860let Predicates = [HasSME2, HasB16B16] in { 861defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x2>; 862defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_add_za16_vg1x4>; 863defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x2>; 864defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_sub_za16_vg1x4>; 865 866defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b00, 0b110, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_lane_vg1x2>; 867defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b010, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_lane_vg1x4>; 868defm BFMLA_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg2_single<"bfmla", 0b1011100, MatrixOp16, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_single_vg1x2>; 869defm BFMLA_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg4_single<"bfmla", 0b1111100, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmla_single_vg1x4>; 870defm BFMLA_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmla", 0b1100001, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmla_vg1x2>; 871defm BFMLA_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmla", 0b1100001, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmla_vg1x4>; 872 873defm BFMLS_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmls", 0b00, 0b111, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_lane_vg1x2>; 874defm BFMLS_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmls", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_lane_vg1x4>; 875defm BFMLS_VG2_M2ZZ : sme2_dot_mla_add_sub_array_vg2_single<"bfmls", 0b1011101, MatrixOp16, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_single_vg1x2>; 876defm BFMLS_VG4_M4ZZ : sme2_dot_mla_add_sub_array_vg4_single<"bfmls", 0b1111101, MatrixOp16, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fmls_single_vg1x4>; 877defm BFMLS_VG2_M2Z2Z : sme2_dot_mla_add_sub_array_vg2_multi<"bfmls", 0b1100011, MatrixOp16, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmls_vg1x2>; 878defm BFMLS_VG4_M4Z4Z : sme2_dot_mla_add_sub_array_vg4_multi<"bfmls", 0b1100011, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fmls_vg1x4>; 879 880defm BFMAX_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmax", 0b0010000>; 881defm BFMAX_VG4_4ZZ : sme2p1_bf_max_min_vector_vg4_single<"bfmax", 0b0010000>; 882defm BFMAX_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmax", 0b0010000>; 883defm BFMAX_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmax", 0b0010000>; 884 885defm BFMIN_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmin", 0b0010001>; 886defm BFMIN_VG4_4ZZ : sme2p1_bf_max_min_vector_vg4_single<"bfmin", 0b0010001>; 887defm BFMIN_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmin", 0b0010001>; 888defm BFMIN_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmin", 0b0010001>; 889 890defm BFMAXNM_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfmaxnm", 0b0010010>; 891defm BFMAXNM_VG4_4ZZ : sme2p1_bf_max_min_vector_vg4_single<"bfmaxnm", 0b0010010>; 892defm BFMAXNM_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfmaxnm", 0b0010010>; 893defm BFMAXNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfmaxnm", 0b0010010>; 894 895defm BFMINNM_VG2_2ZZ : sme2p1_bf_max_min_vector_vg2_single<"bfminnm", 0b0010011>; 896defm BFMINNM_VG4_4ZZ : sme2p1_bf_max_min_vector_vg4_single<"bfminnm", 0b0010011>; 897defm BFMINNM_VG2_2Z2Z : sme2p1_bf_max_min_vector_vg2_multi<"bfminnm", 0b0010011>; 898defm BFMINNM_VG4_4Z2Z : sme2p1_bf_max_min_vector_vg4_multi<"bfminnm", 0b0010011>; 899 900defm BFCLAMP_VG2_2ZZZ: sme2p1_bfclamp_vector_vg2_multi<"bfclamp">; 901defm BFCLAMP_VG4_4ZZZ: sme2p1_bfclamp_vector_vg4_multi<"bfclamp">; 902 903defm BFMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmopa", 0b1, 0b0, nxv8bf16, int_aarch64_sme_mopa>; 904defm BFMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"bfmops", 0b1, 0b1, nxv8bf16, int_aarch64_sme_mops>; 905} 906 907let Predicates = [HasSME2, HasFP8] in { 908defm F1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvt", 0b00, 0b0>; 909defm F1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f1cvtl", 0b00, 0b1>; 910defm BF1CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvt", 0b01, 0b0>; 911defm BF1CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf1cvtl", 0b01, 0b1>; 912defm F2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvt", 0b10, 0b0>; 913defm F2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>; 914defm BF2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>; 915defm BF2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>; 916 917defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>; 918defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>; 919defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>; 920defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>; 921 922defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>; 923defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>; 924defm FSCALE_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fscale", 0b0011000>; 925defm FSCALE_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"fscale", 0b0011000>; 926 927} // [HasSME2, HasFP8] 928 929let Predicates = [HasSME2, HasFAMINMAX] in { 930defm FAMAX_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famax", 0b0010100>; 931defm FAMIN_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"famin", 0b0010101>; 932 933defm FAMAX_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famax", 0b0010100>; 934defm FAMIN_4Z4Z : sme2_fp_sve_destructive_vector_vg4_multi<"famin", 0b0010101>; 935} //[HasSME2, HasFAMINMAX] 936 937let Predicates = [HasSME2, HasSME_LUTv2] in { 938defm MOVT : sme2_movt_zt_to_zt<"movt", 0b0011111>; 939def LUTI4_4ZZT2Z : sme2_luti4_vector_vg4<0b00, 0b00,"luti4">; 940} //[HasSME2, HasSME_LUTv2] 941 942let Predicates = [HasSME2p1, HasSME_LUTv2] in { 943def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">; 944} //[HasSME2p1, HasSME_LUTv2] 945 946let Predicates = [HasSMEF8F16] in { 947defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>; 948defm FDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fdot", 0b11, 0b010, ZZ_b_mul_r, ZPR4b8>; 949defm FDOT_VG4_M4ZZI_BtoH : sme2p1_multi_vec_array_vg4_index_f8f16<"fdot", 0b100, ZZZZ_b_mul_r, ZPR4b8>; 950defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>; 951defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>; 952// TODO: Replace nxv16i8 by nxv16f8 953defm FDOT_VG2_M2Z2Z_BtoH : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>; 954defm FDOT_VG4_M4Z4Z_BtoH : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>; 955 956def FMLAL_MZZI_BtoH : sme2_mla_ll_array_index_16b<"fmlal", 0b11, 0b00>; 957defm FMLAL_VG2_M2ZZI_BtoH : sme2_multi_vec_array_vg2_index_16b<"fmlal", 0b10, 0b111>; 958defm FMLAL_VG4_M4ZZI_BtoH : sme2_multi_vec_array_vg4_index_16b<"fmlal", 0b10, 0b110>; 959def FMLAL_VG2_MZZ_BtoH : sme2_mla_long_array_single_16b<"fmlal">; 960// TODO: Replace nxv16i8 by nxv16f8 961defm FMLAL_VG2_M2ZZ_BtoH : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b001, MatrixOp16, ZZ_b, ZPR4b8, nxv16i8, null_frag>; 962defm FMLAL_VG4_M4ZZ_BtoH : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b001, MatrixOp16, ZZZZ_b, ZPR4b8, nxv16i8, null_frag>; 963defm FMLAL_VG2_M2Z2Z_BtoH : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b100, MatrixOp16, ZZ_b_mul_r, nxv16i8, null_frag>; 964defm FMLAL_VG4_M4Z4Z_BtoH : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b100, MatrixOp16, ZZZZ_b_mul_r, nxv16i8, null_frag>; 965 966defm FMOPA_MPPZZ_BtoH : sme2p1_fmop_tile_f8f16<"fmopa", 0b1, 0b0, 0b01>; 967 968} //[HasSMEF8F16] 969 970let Predicates = [HasSMEF8F32] in { 971// TODO : Replace nxv16i8 by nxv16f8 972defm FDOT_VG2_M2ZZI_BtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b01, 0b0111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>; 973defm FDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b0001, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>; 974defm FDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010011, MatrixOp32, ZZ_b, ZPR4b8>; 975defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110011, MatrixOp32, ZZZZ_b, ZPR4b8>; 976// TODO : Replace nxv16i8 by nxv16f8 977defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>; 978defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>; 979 980def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>; 981def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>; 982 983defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b000, null_frag>; 984defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>; 985defm FMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"fmlall", 0b00, 0b1000, null_frag>; 986// TODO: Replace nxv16i8 by nxv16f8 987defm FMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"fmlall", 0b01000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, null_frag>; 988defm FMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b000001, MatrixOp32, ZZ_b, ZPR4b8>; 989defm FMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"fmlall", 0b010001, MatrixOp32, ZZZZ_b, ZPR4b8>; 990defm FMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"fmlall", 0b01000, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>; 991defm FMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"fmlall", 0b01000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>; 992 993 994defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_frag>; 995 996} //[HasSMEF8F32] 997 998