1//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. 10// 11//===----------------------------------------------------------------------===// 12 13def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAB0, 0>", []>; 14def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAH0, 1>", []>; 15def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAS0, 3>", []>; 16def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAD0, 7>", []>; 17def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZAQ0, 15>", []>; 18def imm_to_zt : ComplexPattern<i32, 1, "ImmToReg<AArch64::ZT0, 0>", []>; 19 20def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>; 21def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>; 22def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3, 1>", []>; 23def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1, 1>", []>; 24def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; // nop 25 26def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>; 27def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>; 28def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2, 2>", []>; 29def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 2>", []>; 30 31def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>; 32def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4, 4>", []>; 33def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>; 34 35def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>; 36 37def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; 38def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore, 39 [SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>; 40def AArch64SMEStr : SDNode<"AArch64ISD::SME_ZA_STR", SDTZALoadStore, 41 [SDNPHasChain, SDNPSideEffect, SDNPMayStore]>; 42 43//===----------------------------------------------------------------------===// 44// SME Pseudo Classes 45//===----------------------------------------------------------------------===// 46 47def getSMEPseudoMap : InstrMapping { 48 let FilterClass = "SMEPseudo2Instr"; 49 let RowFields = ["PseudoName"]; 50 let ColFields = ["IsInstr"]; 51 let KeyCol = ["0"]; 52 let ValueCols = [["1"]]; 53} 54 55class SMEPseudo2Instr<string name, bit instr> { 56 string PseudoName = name; 57 bit IsInstr = instr; 58} 59 60class sme_outer_product_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 61 : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, 62 zpr_ty:$zn, zpr_ty:$zm), []>, 63 Sched<[]> { 64 // Translated to the actual instructions in AArch64ISelLowering.cpp 65 let SMEMatrixType = za_flag; 66 let usesCustomInserter = 1; 67} 68 69class sme2_za_array_2op_multi_single_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 70 ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 71 : SMEPseudo2Instr<name, 0>, 72 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> { 73 let SMEMatrixType = za_flag; 74 let usesCustomInserter = 1; 75} 76 77class sme2_za_array_2op_multi_multi_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 78 SMEMatrixTypeEnum za_flag> 79 : SMEPseudo2Instr<name, 0>, 80 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> { 81 let SMEMatrixType = za_flag; 82 let usesCustomInserter = 1; 83} 84 85class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty, 86 ZPRRegOp zpr_ty, Operand imm_ty, SMEMatrixTypeEnum za_flag> 87 : SMEPseudo2Instr<name, 0>, 88 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> { 89 let SMEMatrixType = za_flag; 90 let usesCustomInserter = 1; 91} 92 93class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag> 94 : SMEPseudo2Instr<name, 0>, 95 Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { 96 let SMEMatrixType = za_flag; 97 let usesCustomInserter = 1; 98} 99 100class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag> 101 : SMEPseudo2Instr<name, 0>, 102 Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { 103 let SMEMatrixType = za_flag; 104 let usesCustomInserter = 1; 105} 106 107//===----------------------------------------------------------------------===// 108// SME pattern match helpers. 109//===----------------------------------------------------------------------===// 110 111class SME2_ZA_TwoOp_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 112 ValueType vt, ComplexPattern tileslice> 113 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm), 114 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>; 115 116 117class SME2_ZA_TwoOp_VG2_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 118 ValueType vt, ComplexPattern tileslice> 119 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm), 120 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), 121 zpr_ty:$Zm)>; 122class SME2_ZA_TwoOp_VG4_Multi_Single_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, 123 ValueType vt, ComplexPattern tileslice> 124 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 125 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm), 126 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 127 (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), 128 zpr_ty:$Zm)>; 129 130class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice> 131 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2), 132 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 133 (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), 134 (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; 135 136class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ValueType vt, ComplexPattern tileslice> 137 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 138 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4), 139 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 140 (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), 141 (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>; 142 143class SME2_ZA_TwoOp_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 144 Operand imm_ty, ComplexPattern tileslice> 145 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)), 146 (!cast<Instruction>(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>; 147 148 149class SME2_ZA_TwoOp_VG2_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 150 Operand imm_ty, ComplexPattern tileslice> 151 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), 152 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 153 (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>; 154 155class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic, Operand index_ty, ZPRRegOp zpr_ty, ValueType vt, 156 Operand imm_ty, ComplexPattern tileslice> 157 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), 158 vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)), 159 (!cast<Instruction>(name # _PSEUDO) $base, $offset, 160 (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), 161 zpr_ty:$Zm, imm_ty:$i)>; 162 163class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> 164 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), 165 (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; 166 167class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty> 168 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))), 169 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3), 170 imm_ty:$i)>; 171 172class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt> 173 : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)), 174 (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>; 175 176class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice> 177 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), 178 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; 179 180class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice> 181 : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 182 (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; 183 184class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice> 185 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), 186 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; 187 188class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice> 189 : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), 190 (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; 191 192//===----------------------------------------------------------------------===// 193// SME pattern match helpers. 194//===----------------------------------------------------------------------===// 195 196class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt> 197 : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm), 198 (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>; 199 200 201//===----------------------------------------------------------------------===// 202// SME smstart/smstop 203//===----------------------------------------------------------------------===// 204 205// SME defines three pstate fields to set or clear PSTATE.SM, PSTATE.ZA, or 206// both fields: 207// 208// MSR SVCRSM, #<imm1> 209// MSR SVCRZA, #<imm1> 210// MSR SVCRSMZA, #<imm1> 211// 212// It's tricky to using the existing pstate operand defined in 213// AArch64SystemOperands.td since it only encodes 5 bits including op1;op2, 214// when these fields are also encoded in CRm[3:1]. 215def MSRpstatesvcrImm1 216 : PstateWriteSimple<(ins svcr_op:$pstatefield, timm0_1:$imm), "msr", 217 "\t$pstatefield, $imm">, 218 Sched<[WriteSys]> { 219 bits<3> pstatefield; 220 bit imm; 221 let Inst{18-16} = 0b011; // op1 222 let Inst{11-9} = pstatefield; 223 let Inst{8} = imm; 224 let Inst{7-5} = 0b011; // op2 225 let hasPostISelHook = 1; 226} 227 228def : InstAlias<"smstart", (MSRpstatesvcrImm1 0b011, 0b1)>; 229def : InstAlias<"smstart sm", (MSRpstatesvcrImm1 0b001, 0b1)>; 230def : InstAlias<"smstart za", (MSRpstatesvcrImm1 0b010, 0b1)>; 231 232def : InstAlias<"smstop", (MSRpstatesvcrImm1 0b011, 0b0)>; 233def : InstAlias<"smstop sm", (MSRpstatesvcrImm1 0b001, 0b0)>; 234def : InstAlias<"smstop za", (MSRpstatesvcrImm1 0b010, 0b0)>; 235 236 237//===----------------------------------------------------------------------===// 238// SME Outer Products 239//===----------------------------------------------------------------------===// 240 241class sme_fp_outer_product_inst<bit S, bits<2> sz, bits<2> op, MatrixTileOperand za_ty, 242 ZPRRegOp zpr_ty, string mnemonic> 243 : I<(outs za_ty:$ZAda), 244 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 245 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 246 "", []>, 247 Sched<[]> { 248 bits<5> Zm; 249 bits<3> Pm; 250 bits<3> Pn; 251 bits<5> Zn; 252 let Inst{31-25} = 0b1000000; 253 let Inst{24} = op{1}; 254 let Inst{23} = 0b1; 255 let Inst{22-21} = sz; 256 let Inst{20-16} = Zm; 257 let Inst{15-13} = Pm; 258 let Inst{12-10} = Pn; 259 let Inst{9-5} = Zn; 260 let Inst{4} = S; 261 let Inst{3} = op{0}; 262 263 let Constraints = "$ZAda = $_ZAda"; 264} 265 266multiclass sme_outer_product_fp32<bit S, bits<2> sz, ZPRRegOp zpr_ty, string mnemonic, SDPatternOperator op> { 267 def NAME : sme_fp_outer_product_inst<S, sz, 0b00, TileOp32, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1> { 268 bits<2> ZAda; 269 let Inst{1-0} = ZAda; 270 let Inst{2} = 0b0; 271 } 272 273 def NAME # _PSEUDO : sme_outer_product_pseudo<zpr_ty, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 274 275 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>; 276} 277 278multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> { 279 def NAME : sme_fp_outer_product_inst<S, 0b10, 0b00, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> { 280 bits<3> ZAda; 281 let Inst{2-0} = ZAda; 282 } 283 284 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 285 286 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>; 287} 288 289multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s, bits<2> op, ZPRRegOp zpr_ty>{ 290 def NAME : sme_fp_outer_product_inst<s, {0,bf}, op, TileOp16, zpr_ty, mnemonic> { 291 bits<1> ZAda; 292 let Inst{2-1} = 0b00; 293 let Inst{0} = ZAda; 294 } 295} 296 297class sme_int_outer_product_inst<bits<3> opc, bit sz, bit sme2, 298 MatrixTileOperand za_ty, ZPRRegOp zpr_ty, 299 string mnemonic> 300 : I<(outs za_ty:$ZAda), 301 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 302 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 303 "", []>, 304 Sched<[]> { 305 bits<5> Zm; 306 bits<3> Pm; 307 bits<3> Pn; 308 bits<5> Zn; 309 let Inst{31-25} = 0b1010000; 310 let Inst{24} = opc{2}; // u0 311 let Inst{23} = 0b1; 312 let Inst{22} = sz; 313 let Inst{21} = opc{1}; // u1 314 let Inst{20-16} = Zm; 315 let Inst{15-13} = Pm; 316 let Inst{12-10} = Pn; 317 let Inst{9-5} = Zn; 318 let Inst{4} = opc{0}; //S; 319 let Inst{3} = sme2; 320 321 let Constraints = "$ZAda = $_ZAda"; 322} 323 324multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic, 325 SDPatternOperator op> { 326 def NAME : sme_int_outer_product_inst<opc, 0b0, 0b0, TileOp32, 327 ZPR8, mnemonic>, SMEPseudo2Instr<NAME, 1> { 328 bits<2> ZAda; 329 let Inst{1-0} = ZAda; 330 let Inst{2} = 0b0; 331 } 332 333 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 334 335 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>; 336} 337 338multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic, 339 SDPatternOperator op> { 340 def NAME : sme_int_outer_product_inst<opc, 0b1, 0b0, TileOp64, 341 ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { 342 bits<3> ZAda; 343 let Inst{2-0} = ZAda; 344 } 345 346 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 347 348 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>; 349} 350 351class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic> 352 : I<(outs TileOp32:$ZAda), 353 (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 354 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 355 "", []>, 356 Sched<[]> { 357 bits<5> Zm; 358 bits<3> Pm; 359 bits<3> Pn; 360 bits<5> Zn; 361 bits<2> ZAda; 362 let Inst{31-25} = 0b1000000; 363 let Inst{24} = !if(opc{2}, 0, 1); 364 let Inst{23-22} = 0b10; 365 let Inst{21} = opc{1}; 366 let Inst{20-16} = Zm; 367 let Inst{15-13} = Pm; 368 let Inst{12-10} = Pn; 369 let Inst{9-5} = Zn; 370 let Inst{4} = opc{0}; 371 let Inst{3} = opc{2}; 372 let Inst{2} = 0b0; 373 let Inst{1-0} = ZAda; 374 375 let Constraints = "$ZAda = $_ZAda"; 376} 377 378multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> { 379 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>; 380 381 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 382 383 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>; 384} 385 386multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> { 387 def NAME : sme_outer_product_widening_inst<opc, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1>; 388 389 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 390 391 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>; 392} 393 394//===----------------------------------------------------------------------===// 395// SME Add Vector to Tile 396//===----------------------------------------------------------------------===// 397 398class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty, 399 ZPRRegOp zpr_ty, string mnemonic> 400 : I<(outs tile_ty:$ZAda), 401 (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), 402 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", 403 "", []>, Sched<[]> { 404 bits<3> Pm; 405 bits<3> Pn; 406 bits<5> Zn; 407 let Inst{31-23} = 0b110000001; 408 let Inst{22} = op; 409 let Inst{21-17} = 0b01000; 410 let Inst{16} = V; 411 let Inst{15-13} = Pm; 412 let Inst{12-10} = Pn; 413 let Inst{9-5} = Zn; 414 let Inst{4-3} = 0b00; 415 416 let Constraints = "$ZAda = $_ZAda"; 417} 418 419class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty, SMEMatrixTypeEnum za_flag> 420 : Pseudo<(outs), 421 (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, 422 Sched<[]> { 423 // Translated to the actual instructions in AArch64ISelLowering.cpp 424 let SMEMatrixType = za_flag; 425 let usesCustomInserter = 1; 426} 427 428multiclass sme_add_vector_to_tile_u32<bit V, string mnemonic, SDPatternOperator op> { 429 def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1> { 430 bits<2> ZAda; 431 let Inst{2} = 0b0; 432 let Inst{1-0} = ZAda; 433 } 434 435 def _PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 436 437 def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), 438 (nxv4i32 ZPR32:$zn)), 439 (!cast<Instruction>(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>; 440} 441 442multiclass sme_add_vector_to_tile_u64<bit V, string mnemonic, SDPatternOperator op> { 443 def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr<NAME, 1> { 444 bits<3> ZAda; 445 let Inst{2-0} = ZAda; 446 } 447 448 def _PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>; 449 450 let Predicates = [HasSMEI16I64] in { 451 def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), 452 (nxv2i64 ZPR64:$zn)), 453 (!cast<Instruction>(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>; 454 } 455} 456 457//===----------------------------------------------------------------------===// 458// SME Contiguous Loads 459//===----------------------------------------------------------------------===// 460 461class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins, 462 string mnemonic, string argstr> 463 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 464 bits<5> Rm; 465 bits<2> Rv; 466 bits<3> Pg; 467 bits<5> Rn; 468 let Inst{31-25} = 0b1110000; 469 let Inst{24} = Q; 470 let Inst{23-22} = msz; 471 let Inst{21} = 0b0; 472 let Inst{20-16} = Rm; 473 let Inst{15} = V; 474 let Inst{14-13} = Rv; 475 let Inst{12-10} = Pg; 476 let Inst{9-5} = Rn; 477 let Inst{4} = 0b0; 478 479 let mayLoad = 1; 480} 481 482class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic, 483 MatrixTileVectorOperand tile_ty, bit is_col, 484 Operand imm_ty, RegisterOperand gpr_ty> 485 : sme_mem_ld_ss_base< 486 Q, is_col, msz, (outs tile_ty:$ZAt), 487 (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, 488 gpr_ty:$Rm), 489 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">; 490 491multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst, 492 MatrixTileVectorOperand tile_ty, 493 Operand imm_ty, RegisterOperand gpr_ty, 494 string pg_suffix=""> { 495 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]", 496 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>; 497 // Default XZR offset aliases 498 def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]", 499 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; 500 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]", 501 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; 502} 503 504multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col, 505 string pg_suffix=""> { 506 defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B), 507 !if(is_col, TileVectorOpV8, TileVectorOpH8), 508 sme_elm_idx0_15, GPR64shifted8, pg_suffix>; 509 defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H), 510 !if(is_col, TileVectorOpV16, TileVectorOpH16), 511 sme_elm_idx0_7, GPR64shifted16, pg_suffix>; 512 defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S), 513 !if(is_col, TileVectorOpV32, TileVectorOpH32), 514 sme_elm_idx0_3, GPR64shifted32, pg_suffix>; 515 defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D), 516 !if(is_col, TileVectorOpV64, TileVectorOpH64), 517 sme_elm_idx0_1, GPR64shifted64, pg_suffix>; 518 defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q), 519 !if(is_col, TileVectorOpV128, TileVectorOpH128), 520 sme_elm_idx0_0, GPR64shifted128, pg_suffix>; 521} 522 523multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> { 524 defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; 525} 526 527multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load, 528 Operand tile_ty, Operand offset_ty, 529 ComplexPattern addr, 530 ComplexPattern tileslice> { 531 // base, tileslice 532 def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile, 533 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 534 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>; 535 536 // reg + reg, tileslice 537 let AddedComplexity = 1 in { 538 def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 539 tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 540 offset_ty:$imm))), 541 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>; 542 } 543} 544 545class sme_load_pseudo 546 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, 547 i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, 548 Sched<[]> { 549 // Translated to the actual instructions in AArch64ISelLowering.cpp 550 let usesCustomInserter = 1; 551 let mayLoad = 1; 552} 553 554multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> { 555 def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", 556 !if(is_col, TileVectorOpV8, TileVectorOpH8), 557 is_col, sme_elm_idx0_15, GPR64shifted8> { 558 bits<4> imm; 559 let Inst{3-0} = imm; 560 } 561 def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h", 562 !if(is_col, TileVectorOpV16, TileVectorOpH16), 563 is_col, sme_elm_idx0_7, GPR64shifted16> { 564 bits<1> ZAt; 565 bits<3> imm; 566 let Inst{3} = ZAt; 567 let Inst{2-0} = imm; 568 } 569 def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w", 570 !if(is_col, TileVectorOpV32, TileVectorOpH32), 571 is_col, sme_elm_idx0_3, GPR64shifted32> { 572 bits<2> ZAt; 573 bits<2> imm; 574 let Inst{3-2} = ZAt; 575 let Inst{1-0} = imm; 576 } 577 def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d", 578 !if(is_col, TileVectorOpV64, TileVectorOpH64), 579 is_col, sme_elm_idx0_1, GPR64shifted64> { 580 bits<3> ZAt; 581 bits<1> imm; 582 let Inst{3-1} = ZAt; 583 let Inst{0} = imm; 584 } 585 def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q", 586 !if(is_col, TileVectorOpV128, TileVectorOpH128), 587 is_col, sme_elm_idx0_0, GPR64shifted128> { 588 bits<4> ZAt; 589 let Inst{3-0} = ZAt; 590 } 591 592 defm : sme_mem_ld_ss_aliases<NAME, is_col>; 593 594 // Pseudo instructions for lowering intrinsics, using immediates instead of 595 // tile registers. 596 def _PSEUDO_B : sme_load_pseudo; 597 def _PSEUDO_H : sme_load_pseudo; 598 def _PSEUDO_S : sme_load_pseudo; 599 def _PSEUDO_D : sme_load_pseudo; 600 def _PSEUDO_Q : sme_load_pseudo; 601 602 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 603 !if(is_col, int_aarch64_sme_ld1b_vert, 604 int_aarch64_sme_ld1b_horiz), 605 sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0, 606 tileslice8>; 607 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 608 !if(is_col, int_aarch64_sme_ld1h_vert, 609 int_aarch64_sme_ld1h_horiz), 610 timm32_0_1, timm32_0_7, am_sve_regreg_lsl1, 611 tileslice16>; 612 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 613 !if(is_col, int_aarch64_sme_ld1w_vert, 614 int_aarch64_sme_ld1w_horiz), 615 timm32_0_3, timm32_0_3, am_sve_regreg_lsl2, 616 tileslice32>; 617 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 618 !if(is_col, int_aarch64_sme_ld1d_vert, 619 int_aarch64_sme_ld1d_horiz), 620 timm32_0_7, timm32_0_1, am_sve_regreg_lsl3, 621 tileslice64>; 622 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 623 !if(is_col, int_aarch64_sme_ld1q_vert, 624 int_aarch64_sme_ld1q_horiz), 625 timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, 626 tileslice128>; 627} 628 629multiclass sme_mem_ld_ss<string mnemonic> { 630 defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>; 631 defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>; 632} 633 634//===----------------------------------------------------------------------===// 635// SME Contiguous Stores 636//===----------------------------------------------------------------------===// 637 638class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins, 639 string mnemonic, string argstr> 640 : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> { 641 bits<5> Rm; 642 bits<2> Rv; 643 bits<3> Pg; 644 bits<5> Rn; 645 let Inst{31-25} = 0b1110000; 646 let Inst{24} = Q; 647 let Inst{23-22} = msz; 648 let Inst{21} = 0b1; 649 let Inst{20-16} = Rm; 650 let Inst{15} = V; 651 let Inst{14-13} = Rv; 652 let Inst{12-10} = Pg; 653 let Inst{9-5} = Rn; 654 let Inst{4} = 0b0; 655 656 let mayStore = 1; 657 let hasSideEffects = 1; 658} 659 660class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic, 661 MatrixTileVectorOperand tile_ty, bit is_col, 662 Operand imm_ty, RegisterOperand gpr_ty> 663 : sme_mem_st_ss_base< 664 Q, is_col, msz, 665 (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, 666 GPR64sp:$Rn, gpr_ty:$Rm), 667 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">; 668 669multiclass sme_mem_st_ss_aliases<string inst, bit is_col> { 670 defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; 671} 672 673multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store, 674 Operand offset_ty, 675 ComplexPattern imm2tile, 676 ComplexPattern addr, 677 ComplexPattern tileslice> { 678 // base, tileslice 679 def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile), 680 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 681 (Inst $tile, $idx, $imm, $pg, $base, XZR)>; 682 683 // reg + reg, tileslice 684 let AddedComplexity = 1 in { 685 def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 686 (imm2tile untyped:$tile), 687 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 688 (Inst $tile, $idx, $imm, $pg, $base, $offset)>; 689 } 690} 691 692multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> { 693 def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", 694 !if(is_col, TileVectorOpV8, TileVectorOpH8), 695 is_col, sme_elm_idx0_15, GPR64shifted8> { 696 bits<4> imm; 697 let Inst{3-0} = imm; 698 } 699 def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h", 700 !if(is_col, TileVectorOpV16, TileVectorOpH16), 701 is_col, sme_elm_idx0_7, GPR64shifted16> { 702 bits<1> ZAt; 703 bits<3> imm; 704 let Inst{3} = ZAt; 705 let Inst{2-0} = imm; 706 } 707 def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w", 708 !if(is_col, TileVectorOpV32, TileVectorOpH32), 709 is_col, sme_elm_idx0_3, GPR64shifted32> { 710 bits<2> ZAt; 711 bits<2> imm; 712 let Inst{3-2} = ZAt; 713 let Inst{1-0} = imm; 714 } 715 def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d", 716 !if(is_col, TileVectorOpV64, TileVectorOpH64), 717 is_col, sme_elm_idx0_1, GPR64shifted64> { 718 bits<3> ZAt; 719 bits<1> imm; 720 let Inst{3-1} = ZAt; 721 let Inst{0} = imm; 722 } 723 def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q", 724 !if(is_col, TileVectorOpV128, TileVectorOpH128), 725 is_col, sme_elm_idx0_0, GPR64shifted128> { 726 bits<4> ZAt; 727 let Inst{3-0} = ZAt; 728 } 729 730 defm : sme_mem_st_ss_aliases<NAME, is_col>; 731 732 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B), 733 !if(is_col, int_aarch64_sme_st1b_vert, 734 int_aarch64_sme_st1b_horiz), 735 timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0, 736 tileslice8>; 737 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H), 738 !if(is_col, int_aarch64_sme_st1h_vert, 739 int_aarch64_sme_st1h_horiz), 740 timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1, 741 tileslice16>; 742 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S), 743 !if(is_col, int_aarch64_sme_st1w_vert, 744 int_aarch64_sme_st1w_horiz), 745 timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2, 746 tileslice32>; 747 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D), 748 !if(is_col, int_aarch64_sme_st1d_vert, 749 int_aarch64_sme_st1d_horiz), 750 timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3, 751 tileslice64>; 752 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q), 753 !if(is_col, int_aarch64_sme_st1q_vert, 754 int_aarch64_sme_st1q_horiz), 755 sme_elm_idx0_0, imm_to_tile128, 756 am_sve_regreg_lsl4, tileslice128>; 757} 758 759multiclass sme_mem_st_ss<string mnemonic> { 760 defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>; 761 defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>; 762} 763 764//===----------------------------------------------------------------------===// 765// SME Save and Restore Array 766//===----------------------------------------------------------------------===// 767 768class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr> 769 : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "", 770 []>, 771 Sched<[]> { 772 bits<2> Rv; 773 bits<5> Rn; 774 bits<4> imm4; 775 let Inst{31-22} = 0b1110000100; 776 let Inst{21} = isStore; 777 let Inst{20-15} = 0b000000; 778 let Inst{14-13} = Rv; 779 let Inst{12-10} = 0b000; 780 let Inst{9-5} = Rn; 781 let Inst{4} = 0b0; 782 let Inst{3-0} = imm4; 783} 784 785let mayStore = 1 in 786class sme_spill_inst<string opcodestr> 787 : sme_spill_fill_base<0b1, (outs), 788 (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, 789 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 790 imm32_0_15:$offset), 791 opcodestr>; 792let mayLoad = 1 in 793class sme_fill_inst<string opcodestr> 794 : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), 795 (ins MatrixIndexGPR32Op12_15:$Rv, 796 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 797 imm32_0_15:$offset), 798 opcodestr>; 799multiclass sme_spill<string opcodestr> { 800 def NAME : sme_spill_inst<opcodestr>; 801 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 802 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 803 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 804 805 def : Pat<(AArch64SMEStr (i32 MatrixIndexGPR32Op12_15:$slice), (i64 GPR64sp:$base), (i32 sme_elm_idx0_15:$imm)), 806 (!cast<Instruction>(NAME) ZA, MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base, imm32_0_15:$imm)>; 807} 808 809multiclass sme_fill<string opcodestr> { 810 def NAME : sme_fill_inst<opcodestr>; 811 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 812 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 813 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 814 def NAME # _PSEUDO 815 : Pseudo<(outs), 816 (ins MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm4, 817 GPR64sp:$base), []>, 818 Sched<[]> { 819 // Translated to actual instruction in AArch64ISelLowering.cpp 820 let usesCustomInserter = 1; 821 let mayLoad = 1; 822 } 823 def : Pat<(AArch64SMELdr MatrixIndexGPR32Op12_15:$slice, GPR64sp:$base, sme_elm_idx0_15:$imm), 824 (!cast<Instruction>(NAME # _PSEUDO) MatrixIndexGPR32Op12_15:$slice, sme_elm_idx0_15:$imm, GPR64sp:$base)>; 825} 826 827//===----------------------------------------------------------------------===// 828// Move instructions 829//===----------------------------------------------------------------------===// 830 831class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 832 string mnemonic, string argstr> 833 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 834 bits<2> Rv; 835 bits<3> Pg; 836 bits<5> Zn; 837 let Inst{31-24} = 0b11000000; 838 let Inst{23-22} = sz; 839 let Inst{21-17} = 0b00000; 840 let Inst{16} = Q; 841 let Inst{15} = V; 842 let Inst{14-13} = Rv; 843 let Inst{12-10} = Pg; 844 let Inst{9-5} = Zn; 845 let Inst{4} = 0b0; 846} 847 848class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty, 849 bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, 850 string mnemonic> 851 : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd), 852 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 853 mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{ 854 855 let Constraints = "$ZAd = $_ZAd"; 856} 857 858 859multiclass sme_vector_to_tile_aliases<Instruction inst, 860 MatrixTileVectorOperand tile_ty, 861 ZPRRegOp zpr_ty, Operand imm_ty> { 862 def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", 863 (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; 864} 865 866multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt, 867 ValueType ppr_vt, Operand imm_ty, 868 Operand offset_ty, 869 SDPatternOperator op, 870 ComplexPattern tileslice> { 871 def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 872 offset_ty:$imm)), 873 (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), 874 (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; 875} 876 877class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag> 878 : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, 879 i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, 880 Sched<[]> { 881 // Translated to the actual instructions in AArch64ISelLowering.cpp 882 let SMEMatrixType = za_flag; 883 let usesCustomInserter = 1; 884} 885 886multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> { 887 def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, 888 TileVectorOpH8), 889 is_col, sme_elm_idx0_15, ZPR8, mnemonic>, 890 SMEPseudo2Instr<NAME # _B, 1> { 891 bits<4> imm; 892 let Inst{3-0} = imm; 893 } 894 def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, 895 TileVectorOpH16), 896 is_col, sme_elm_idx0_7, ZPR16, mnemonic>, 897 SMEPseudo2Instr<NAME # _H, 1> { 898 bits<1> ZAd; 899 bits<3> imm; 900 let Inst{3} = ZAd; 901 let Inst{2-0} = imm; 902 } 903 def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, 904 TileVectorOpH32), 905 is_col, sme_elm_idx0_3, ZPR32, mnemonic>, 906 SMEPseudo2Instr<NAME # _S, 1> { 907 bits<2> ZAd; 908 bits<2> imm; 909 let Inst{3-2} = ZAd; 910 let Inst{1-0} = imm; 911 } 912 def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, 913 TileVectorOpH64), 914 is_col, sme_elm_idx0_1, ZPR64, mnemonic>, 915 SMEPseudo2Instr<NAME # _D, 1> { 916 bits<3> ZAd; 917 bits<1> imm; 918 let Inst{3-1} = ZAd; 919 let Inst{0} = imm; 920 } 921 def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, 922 TileVectorOpH128), 923 is_col, sme_elm_idx0_0, ZPR128, mnemonic>, 924 SMEPseudo2Instr<NAME # _Q, 1> { 925 bits<4> ZAd; 926 bits<1> imm; 927 let Inst{3-0} = ZAd; 928 } 929 930 // Pseudo instructions for lowering intrinsics, using immediates instead of 931 // tile registers. 932 def _PSEUDO_B : sme_mova_insert_pseudo<SMEMatrixTileB>, SMEPseudo2Instr<NAME # _B, 0>; 933 def _PSEUDO_H : sme_mova_insert_pseudo<SMEMatrixTileH>, SMEPseudo2Instr<NAME # _H, 0>; 934 def _PSEUDO_S : sme_mova_insert_pseudo<SMEMatrixTileS>, SMEPseudo2Instr<NAME # _S, 0>; 935 def _PSEUDO_D : sme_mova_insert_pseudo<SMEMatrixTileD>, SMEPseudo2Instr<NAME # _D, 0>; 936 def _PSEUDO_Q : sme_mova_insert_pseudo<SMEMatrixTileQ>, SMEPseudo2Instr<NAME # _Q, 0>; 937 938 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B), 939 !if(is_col, TileVectorOpV8, 940 TileVectorOpH8), 941 ZPR8, sme_elm_idx0_15>; 942 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H), 943 !if(is_col, TileVectorOpV16, 944 TileVectorOpH16), 945 ZPR16, sme_elm_idx0_7>; 946 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S), 947 !if(is_col, TileVectorOpV32, 948 TileVectorOpH32), 949 ZPR32, sme_elm_idx0_3>; 950 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D), 951 !if(is_col, TileVectorOpV64, 952 TileVectorOpH64), 953 ZPR64, sme_elm_idx0_1>; 954 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q), 955 !if(is_col, TileVectorOpV128, 956 TileVectorOpH128), 957 ZPR128, sme_elm_idx0_0>; 958 959 defvar op = !if(is_col, int_aarch64_sme_write_vert, 960 int_aarch64_sme_write_horiz); 961 962 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 963 nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15, 964 op, tileslice8>; 965 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 966 nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 967 op, tileslice16>; 968 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 969 nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 970 op, tileslice16>; 971 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 972 nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, 973 op, tileslice16>; 974 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 975 nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, 976 op, tileslice32>; 977 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 978 nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, 979 op, tileslice32>; 980 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 981 nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, 982 op, tileslice64>; 983 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 984 nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, 985 op, tileslice64>; 986 987 defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, 988 int_aarch64_sme_writeq_horiz); 989 990 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 991 nxv16i8, nxv16i1, sme_elm_idx0_15, 992 sme_elm_idx0_0, opq, tileslice128>; 993 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 994 nxv8i16, nxv8i1, sme_elm_idx0_15, 995 sme_elm_idx0_0, opq, tileslice128>; 996 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 997 nxv8f16, nxv8i1, sme_elm_idx0_15, 998 sme_elm_idx0_0, opq, tileslice128>; 999 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1000 nxv8bf16, nxv8i1, sme_elm_idx0_15, 1001 sme_elm_idx0_0, opq, tileslice128>; 1002 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1003 nxv4i32, nxv4i1, sme_elm_idx0_15, 1004 sme_elm_idx0_0, opq, tileslice128>; 1005 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1006 nxv4f32, nxv4i1, sme_elm_idx0_15, 1007 sme_elm_idx0_0, opq, tileslice128>; 1008 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1009 nxv2i64, nxv2i1, sme_elm_idx0_15, 1010 sme_elm_idx0_0, opq, tileslice128>; 1011 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 1012 nxv2f64, nxv2i1, sme_elm_idx0_15, 1013 sme_elm_idx0_0, opq, tileslice128>; 1014} 1015 1016multiclass sme_vector_to_tile<string mnemonic> { 1017 defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>; 1018 defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>; 1019} 1020 1021class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 1022 string mnemonic, string argstr> 1023 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 1024 bits<2> Rv; 1025 bits<3> Pg; 1026 bits<5> Zd; 1027 let Inst{31-24} = 0b11000000; 1028 let Inst{23-22} = sz; 1029 let Inst{21-17} = 0b00001; 1030 let Inst{16} = Q; 1031 let Inst{15} = V; 1032 let Inst{14-13} = Rv; 1033 let Inst{12-10} = Pg; 1034 let Inst{9} = 0b0; 1035 let Inst{4-0} = Zd; 1036} 1037 1038class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty, 1039 MatrixTileVectorOperand tile_ty, 1040 bit is_col, Operand imm_ty, string mnemonic> 1041 : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd), 1042 (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1043 mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> { 1044 1045 let Constraints = "$Zd = $_Zd"; 1046} 1047 1048multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty, 1049 MatrixTileVectorOperand tile_ty, 1050 Operand imm_ty > { 1051 def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]", 1052 (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>; 1053} 1054 1055multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt, 1056 ValueType ppr_vt, Operand offset_ty, 1057 ComplexPattern imm2tile, 1058 ComplexPattern tileslice, 1059 SDPatternOperator op> { 1060 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 1061 (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)), 1062 (inst $passthru, $pg, $tile, $idx, 0)>; 1063 let AddedComplexity = 1 in { 1064 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 1065 (imm2tile untyped:$tile), 1066 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 1067 offset_ty:$imm)))), 1068 (inst $passthru, $pg, $tile, $idx, $imm)>; 1069 } 1070} 1071 1072multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> { 1073 def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, 1074 TileVectorOpH8), 1075 is_col, sme_elm_idx0_15, mnemonic> { 1076 bits<4> imm; 1077 let Inst{8-5} = imm; 1078 } 1079 def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16, 1080 TileVectorOpH16), 1081 is_col, sme_elm_idx0_7, mnemonic> { 1082 bits<1> ZAn; 1083 bits<3> imm; 1084 let Inst{8} = ZAn; 1085 let Inst{7-5} = imm; 1086 } 1087 def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32, 1088 TileVectorOpH32), 1089 is_col, sme_elm_idx0_3, mnemonic> { 1090 bits<2> ZAn; 1091 bits<2> imm; 1092 let Inst{8-7} = ZAn; 1093 let Inst{6-5} = imm; 1094 } 1095 def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64, 1096 TileVectorOpH64), 1097 is_col, sme_elm_idx0_1, mnemonic> { 1098 bits<3> ZAn; 1099 bits<1> imm; 1100 let Inst{8-6} = ZAn; 1101 let Inst{5} = imm; 1102 } 1103 def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128, 1104 TileVectorOpH128), 1105 is_col, sme_elm_idx0_0, mnemonic> { 1106 bits<4> ZAn; 1107 let Inst{8-5} = ZAn; 1108 } 1109 1110 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8, 1111 !if(is_col, TileVectorOpV8, 1112 TileVectorOpH8), sme_elm_idx0_15>; 1113 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16, 1114 !if(is_col, TileVectorOpV16, 1115 TileVectorOpH16), sme_elm_idx0_7>; 1116 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32, 1117 !if(is_col, TileVectorOpV32, 1118 TileVectorOpH32), sme_elm_idx0_3>; 1119 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64, 1120 !if(is_col, TileVectorOpV64, 1121 TileVectorOpH64), sme_elm_idx0_1>; 1122 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128, 1123 !if(is_col, TileVectorOpV128, 1124 TileVectorOpH128), sme_elm_idx0_0>; 1125 1126 defvar op = !if(is_col, int_aarch64_sme_read_vert, 1127 int_aarch64_sme_read_horiz); 1128 1129 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B), 1130 nxv16i8, nxv16i1, sme_elm_idx0_15, 1131 imm_to_tile8, tileslice8, op>; 1132 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1133 nxv8i16, nxv8i1, sme_elm_idx0_7, 1134 imm_to_tile16, tileslice16, op>; 1135 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1136 nxv8f16, nxv8i1, sme_elm_idx0_7, 1137 imm_to_tile16, tileslice16, op>; 1138 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 1139 nxv8bf16, nxv8i1, sme_elm_idx0_7, 1140 imm_to_tile16, tileslice16, op>; 1141 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 1142 nxv4i32, nxv4i1, sme_elm_idx0_3, 1143 imm_to_tile32, tileslice32, op>; 1144 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 1145 nxv4f32, nxv4i1, sme_elm_idx0_3, 1146 imm_to_tile32, tileslice32, op>; 1147 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 1148 nxv2i64, nxv2i1, sme_elm_idx0_1, 1149 imm_to_tile64, tileslice64, op>; 1150 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 1151 nxv2f64, nxv2i1, sme_elm_idx0_1, 1152 imm_to_tile64, tileslice64, op>; 1153 1154 defvar opq = !if(is_col, int_aarch64_sme_readq_vert, 1155 int_aarch64_sme_readq_horiz); 1156 1157 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1158 nxv16i8, nxv16i1, sme_elm_idx0_0, 1159 imm_to_tile128, tileslice128, opq>; 1160 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1161 nxv8i16, nxv8i1, sme_elm_idx0_0, 1162 imm_to_tile128, tileslice128, opq>; 1163 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1164 nxv8f16, nxv8i1, sme_elm_idx0_0, 1165 imm_to_tile128, tileslice128, opq>; 1166 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1167 nxv8bf16, nxv8i1, sme_elm_idx0_0, 1168 imm_to_tile128, tileslice128, opq>; 1169 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1170 nxv4i32, nxv4i1, sme_elm_idx0_0, 1171 imm_to_tile128, tileslice128, opq>; 1172 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1173 nxv4f32, nxv4i1, sme_elm_idx0_0, 1174 imm_to_tile128, tileslice128, opq>; 1175 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1176 nxv2i64, nxv2i1, sme_elm_idx0_0, 1177 imm_to_tile128, tileslice128, opq>; 1178 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 1179 nxv2f64, nxv2i1, sme_elm_idx0_0, 1180 imm_to_tile128, tileslice128, opq>; 1181} 1182 1183multiclass sme_tile_to_vector<string mnemonic> { 1184 defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>; 1185 defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>; 1186} 1187 1188//===----------------------------------------------------------------------===// 1189// SME Zero 1190//===----------------------------------------------------------------------===// 1191 1192// NOTE: This definition isn't really correct because there are outputs, i.e. 1193// the tile registers being zeroed. We fix this up in a custom inserter that 1194// marks the appropriate registers as being implicitly defined. 1195class sme_zero_inst<string mnemonic> 1196 : I<(outs), (ins MatrixTileList:$imm), 1197 mnemonic, "\t$imm", "", []>, Sched<[]> { 1198 bits<8> imm; 1199 let Inst{31-8} = 0b110000000000100000000000; 1200 let Inst{7-0} = imm; 1201} 1202 1203multiclass sme_zero<string mnemonic> { 1204 def NAME : sme_zero_inst<mnemonic>; 1205 1206 def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>; 1207 def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>; 1208 def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>; 1209 def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>; 1210 def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>; 1211 def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>; 1212 def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>; 1213 def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>; 1214 def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>; 1215 def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>; 1216 def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>; 1217 def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>; 1218 def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>; 1219 def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>; 1220 def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>; 1221 1222 def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>, 1223 Sched<[]> { 1224 // Translated to the actual instructions in AArch64ISelLowering.cpp 1225 let usesCustomInserter = 1; 1226 } 1227 1228 def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm), 1229 (!cast<Instruction>(NAME # _PSEUDO) timm32_0_255:$imm)>; 1230} 1231 1232//===----------------------------------------------------------------------===// 1233// SVE2 Instructions 1234//===----------------------------------------------------------------------===// 1235 1236class sve2_int_perm_revd<string asm> 1237 : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn), 1238 asm, "\t$Zd, $Pg/m, $Zn", "", []>, 1239 Sched<[]> { 1240 bits<5> Zd; 1241 bits<3> Pg; 1242 bits<5> Zn; 1243 let Inst{31-24} = 0b00000101; 1244 let Inst{23-22} = 0b00; // size 1245 let Inst{21-13} = 0b101110100; 1246 let Inst{12-10} = Pg; 1247 let Inst{9-5} = Zn; 1248 let Inst{4-0} = Zd; 1249 1250 let Constraints = "$Zd = $_Zd"; 1251 let DestructiveInstType = DestructiveUnary; 1252 let ElementSize = ZPR128.ElementSize; 1253} 1254 1255multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { 1256 def NAME : sve2_int_perm_revd<asm>; 1257 1258 def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>; 1259 def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>; 1260 def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>; 1261 def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>; 1262 1263 def : SVE_1_Op_Passthru_Pat<nxv8bf16, op, nxv8i1, nxv8bf16, !cast<Instruction>(NAME)>; 1264 def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME)>; 1265 def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>; 1266 def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME)>; 1267 1268} 1269 1270class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> 1271 : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), 1272 asm, "\t$Zd, $Zn, $Zm", "", []>, 1273 Sched<[]> { 1274 bits<5> Zm; 1275 bits<5> Zn; 1276 bits<5> Zd; 1277 let Inst{31-24} = 0b01000100; 1278 let Inst{23-22} = sz; 1279 let Inst{21} = 0b0; 1280 let Inst{20-16} = Zm; 1281 let Inst{15-11} = 0b11000; 1282 let Inst{10} = U; 1283 let Inst{9-5} = Zn; 1284 let Inst{4-0} = Zd; 1285 1286 let Constraints = "$Zd = $_Zd"; 1287 let DestructiveInstType = DestructiveOther; 1288 let ElementSize = zpr_ty.ElementSize; 1289} 1290 1291multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> { 1292 def _B : sve2_clamp<asm, 0b00, U, ZPR8>; 1293 def _H : sve2_clamp<asm, 0b01, U, ZPR16>; 1294 def _S : sve2_clamp<asm, 0b10, U, ZPR32>; 1295 def _D : sve2_clamp<asm, 0b11, U, ZPR64>; 1296 1297 def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; 1298 def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; 1299 def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; 1300 def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; 1301} 1302 1303class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty> 1304 : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm, 1305 MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1306 asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, 1307 Sched<[]> { 1308 bits<2> Rv; 1309 bits<4> Pn; 1310 bits<4> Pm; 1311 bits<4> Pd; 1312 let Inst{31-24} = 0b00100101; 1313 let Inst{21} = 0b1; 1314 let Inst{17-16} = Rv; 1315 let Inst{15-14} = 0b01; 1316 let Inst{13-10} = Pn; 1317 let Inst{9} = 0b0; 1318 let Inst{8-5} = Pm; 1319 let Inst{4} = 0b0; 1320 let Inst{3-0} = Pd; 1321} 1322 1323multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> { 1324 def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> { 1325 bits<4> imm; 1326 let Inst{23-22} = imm{3-2}; 1327 let Inst{20-19} = imm{1-0}; 1328 let Inst{18} = 0b1; 1329 } 1330 def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> { 1331 bits<3> imm; 1332 let Inst{23-22} = imm{2-1}; 1333 let Inst{20} = imm{0}; 1334 let Inst{19-18} = 0b10; 1335 } 1336 def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> { 1337 bits<2> imm; 1338 let Inst{23-22} = imm{1-0}; 1339 let Inst{20-18} = 0b100; 1340 } 1341 def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> { 1342 bits<1> imm; 1343 let Inst{23} = imm; 1344 let Inst{22} = 0b1; 1345 let Inst{20-18} = 0b000; 1346 } 1347 1348 def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]", 1349 (!cast<Instruction>(NAME # _B) PNRasPPRAny:$Pd, 1350 PNRasPPRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>; 1351 def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]", 1352 (!cast<Instruction>(NAME # _H) PNRasPPRAny:$Pd, 1353 PNRasPPRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>; 1354 def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]", 1355 (!cast<Instruction>(NAME # _S) PNRasPPRAny:$Pd, 1356 PNRasPPRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>; 1357 def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]", 1358 (!cast<Instruction>(NAME # _D) PNRasPPRAny:$Pd, 1359 PNRasPPRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>; 1360 1361 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), 1362 MatrixIndexGPR32Op12_15:$idx)), 1363 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>; 1364 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), 1365 MatrixIndexGPR32Op12_15:$idx)), 1366 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>; 1367 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), 1368 MatrixIndexGPR32Op12_15:$idx)), 1369 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>; 1370 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), 1371 MatrixIndexGPR32Op12_15:$idx)), 1372 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>; 1373 1374 let AddedComplexity = 1 in { 1375 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), 1376 (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), 1377 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>; 1378 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm), 1379 (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), 1380 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>; 1381 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm), 1382 (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), 1383 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>; 1384 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm), 1385 (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), 1386 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>; 1387 } 1388} 1389 1390//===----------------------------------------------------------------------===// 1391// SME2 Instructions 1392//===----------------------------------------------------------------------===// 1393 1394//===----------------------------------------------------------------------===// 1395// SME2 single-multi ternary int/fp, two/four registers 1396 1397class sme2_dot_mla_add_sub_array_vg24_single<bits<7> op, 1398 MatrixOperand matrix_ty, 1399 RegisterOperand multi_vector_ty, 1400 ZPRRegOp zpr_ty, 1401 string mnemonic> 1402 : I<(outs matrix_ty:$ZAd), 1403 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1404 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 1405 mnemonic,"\t$ZAd[$Rv, $imm3, " # !if(op{5}, "vgx4", "vgx2") # "], $Zn, $Zm", 1406 "", []> , Sched<[]> { 1407 bits<4> Zm; 1408 bits<5> Zn; 1409 bits<2> Rv; 1410 bits<3> imm3; 1411 let Inst{31-23} = 0b110000010; 1412 let Inst{22} = op{6}; //sz 1413 let Inst{21} = 0b1; 1414 let Inst{20} = op{5}; //vgx4 1415 let Inst{19-16} = Zm; 1416 let Inst{15} = 0b0; 1417 let Inst{14-13} = Rv; 1418 let Inst{12-10} = op{4-2}; 1419 let Inst{9-5} = Zn; 1420 let Inst{4-3} = op{1-0}; 1421 let Inst{2-0} = imm3; 1422 let Constraints = "$ZAd = $_ZAd"; 1423} 1424 1425multiclass sme2_dot_mla_add_sub_array_vg24_single<string mnemonic, bits<7> op, 1426 MatrixOperand matrix_ty, 1427 RegisterOperand multi_vector_ty, 1428 ZPRRegOp zpr_ty>{ 1429 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1430 1431 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1432 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1433} 1434 1435multiclass sme2_dot_mla_add_sub_array_vg2_single<string mnemonic, bits<7> op, 1436 MatrixOperand matrix_ty, 1437 RegisterOperand multi_vector_ty, 1438 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ 1439 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1440 1441 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1442 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1443 1444 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>; 1445 1446 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>; 1447} 1448 1449multiclass sme2_dot_mla_add_sub_array_vg4_single<string mnemonic, bits<7> op, 1450 MatrixOperand matrix_ty, 1451 RegisterOperand multi_vector_ty, 1452 ZPRRegOp zpr_ty, ValueType vty, SDPatternOperator intrinsic>{ 1453 def NAME: sme2_dot_mla_add_sub_array_vg24_single<op, matrix_ty, multi_vector_ty, zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1454 1455 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1456 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 1457 1458 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, zpr_ty, SMEMatrixArray>; 1459 1460 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, vty, tileslice16>; 1461} 1462 1463//===----------------------------------------------------------------------===// 1464// SME2 multiple vectors ternary INT/FP two and four registers 1465class sme2_dot_mla_add_sub_array_vg2_multi<bits<7> op, 1466 MatrixOperand matrix_ty, 1467 RegisterOperand multi_vector_ty, 1468 string mnemonic> 1469 : I<(outs matrix_ty:$ZAd), 1470 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1471 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 1472 mnemonic, "\t$ZAd[$Rv, $imm3, vgx2], $Zn, $Zm", 1473 "", []>, Sched<[]>{ 1474 bits<4> Zm; 1475 bits<4> Zn; 1476 bits<2> Rv; 1477 bits<3> imm3; 1478 let Inst{31-23} = 0b110000011; 1479 let Inst{22} = op{6}; //sz 1480 let Inst{21} = 0b1; 1481 let Inst{20-17} = Zm; 1482 let Inst{16-15} = 0b00; 1483 let Inst{14-13} = Rv; 1484 let Inst{12-10} = op{5-3}; 1485 let Inst{9-6} = Zn; 1486 let Inst{5-3} = op{2-0}; 1487 let Inst{2-0} = imm3; 1488 let Constraints = "$ZAd = $_ZAd"; 1489} 1490 1491multiclass sme2_dot_mla_add_sub_array_vg2_multi<string mnemonic, bits<7> op, 1492 MatrixOperand matrix_ty, 1493 RegisterOperand multi_vector_ty, ValueType zpr_ty, 1494 SDPatternOperator intrinsic> { 1495 def NAME : sme2_dot_mla_add_sub_array_vg2_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1496 1497 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>; 1498 1499 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>; 1500 1501 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1502 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 1503} 1504 1505class sme2_dot_mla_add_sub_array_vg4_multi<bits<7> op, 1506 MatrixOperand matrix_ty, 1507 RegisterOperand multi_vector_ty, 1508 string mnemonic> 1509 : I<(outs matrix_ty:$ZAd), 1510 (ins matrix_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, 1511 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 1512 mnemonic, "\t$ZAd[$Rv, $imm3, vgx4], $Zn, $Zm", 1513 "", []>, Sched<[]>{ 1514 bits<3> Zm; 1515 bits<3> Zn; 1516 bits<2> Rv; 1517 bits<3> imm3; 1518 let Inst{31-23} = 0b110000011; 1519 let Inst{22} = op{6}; //sz 1520 let Inst{21} = 0b1; 1521 let Inst{20-18} = Zm; 1522 let Inst{17-15} = 0b010; 1523 let Inst{14-13} = Rv; 1524 let Inst{12-10} = op{5-3}; 1525 let Inst{9-7} = Zn; 1526 let Inst{6} = 0b0; 1527 let Inst{5-3} = op{2-0}; 1528 let Inst{2-0} = imm3; 1529 let Constraints = "$ZAd = $_ZAd"; 1530} 1531 1532multiclass sme2_dot_mla_add_sub_array_vg4_multi<string mnemonic, bits<7> op, 1533 MatrixOperand matrix_ty, 1534 RegisterOperand multi_vector_ty, 1535 ValueType zpr_ty, SDPatternOperator intrinsic>{ 1536 def NAME : sme2_dot_mla_add_sub_array_vg4_multi<op, matrix_ty, multi_vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 1537 1538 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, SMEMatrixArray>; 1539 1540 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, sme_elm_idx0_7, zpr_ty, tileslice16>; 1541 1542 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm3], $Zn, $Zm", 1543 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 1544} 1545 1546//===----------------------------------------------------------------------===// 1547// SME2 multiple vectors binary two or four registers 1548 1549class sme2_multivec_accum_add_sub<string mnemonic, bit sz, bit vg4, bits<3> op, 1550 MatrixOperand matrix_ty, 1551 RegisterOperand vector_ty> 1552 : I<(outs matrix_ty:$ZAdn), 1553 (ins matrix_ty:$_ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 1554 mnemonic, "\t$ZAdn[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zm", 1555 "", []>, Sched<[]> { 1556 bits<2> Rv; 1557 bits<3> imm3; 1558 let Inst{31-23} = 0b110000011; 1559 let Inst{22} = sz; 1560 let Inst{21-19} = 0b100; 1561 let Inst{18} = op{2}; 1562 let Inst{17} = 0b0; 1563 let Inst{16} = vg4; 1564 let Inst{15} = 0b0; 1565 let Inst{14-13} = Rv; 1566 let Inst{12-10} = 0b111; 1567 let Inst{5} = 0b0; 1568 let Inst{4-3} = op{1-0}; 1569 let Inst{2-0} = imm3; 1570 1571 let Constraints = "$ZAdn = $_ZAdn"; 1572} 1573 1574class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op, 1575 MatrixOperand matrix_ty, 1576 RegisterOperand vector_ty> 1577 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b0, op, matrix_ty, vector_ty> { 1578 bits<4> Zm; 1579 let Inst{9-6} = Zm; 1580} 1581 1582 1583multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op, 1584 MatrixOperand matrix_ty, 1585 RegisterOperand vector_ty, 1586 ValueType vty, 1587 SDPatternOperator intrinsic> { 1588 def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>, 1589 SMEPseudo2Instr<NAME, 1>; 1590 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm", 1591 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; 1592 1593 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>; 1594 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>; 1595} 1596 1597class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op, 1598 MatrixOperand matrix_ty, 1599 RegisterOperand vector_ty> 1600 : sme2_multivec_accum_add_sub<mnemonic, sz, 0b1, op, matrix_ty, vector_ty> { 1601 bits<3> Zm; 1602 let Inst{9-7} = Zm; 1603 let Inst{6} = 0b0; 1604} 1605 1606multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op, 1607 MatrixOperand matrix_ty, 1608 RegisterOperand vector_ty, 1609 ValueType vty, 1610 SDPatternOperator intrinsic> { 1611 def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>, 1612 SMEPseudo2Instr<NAME, 1>; 1613 def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm", 1614 (!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>; 1615 1616 def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>; 1617 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>; 1618} 1619 1620//===----------------------------------------------------------------------===// 1621// SME2 Multi-vector - Multiple and Single SVE Destructive 1622// Two and Four registers 1623 1624class sme2_sve_destructive_vector_vg2_single<bits<2> sz, bits<7> op, 1625 RegisterOperand vector_ty, 1626 ZPRRegOp zpr_ty, 1627 string mnemonic> 1628 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), 1629 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1630 "", []>, Sched<[]> { 1631 bits<4> Zm; 1632 bits<4> Zdn; 1633 let Inst{31-24} = 0b11000001; 1634 let Inst{23-22} = sz; 1635 let Inst{21-20} = 0b10; 1636 let Inst{19-16} = Zm; 1637 let Inst{15-11} = 0b10100; 1638 let Inst{10-5} = op{6-1}; 1639 let Inst{4-1} = Zdn; 1640 let Inst{0} = op{0}; 1641 1642 let Constraints = "$Zdn = $_Zdn"; 1643} 1644 1645multiclass sme2_fp_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> { 1646 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; 1647 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; 1648 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; 1649} 1650 1651multiclass sme2_int_sve_destructive_vector_vg2_single<string mnemonic, bits<7> op> { 1652 def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>; 1653 def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; 1654 def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; 1655 def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; 1656} 1657 1658// SME2.1 fmax/fmin instructions. 1659multiclass sme2p1_bf_max_min_vector_vg2_single<string mnemonic, bits<7>op> { 1660 def _H : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_h_mul_r, 1661 ZPR4b16, mnemonic>; 1662} 1663 1664class sme2_sve_destructive_vector_vg4_single<bits<2> sz, bits<7> op, 1665 RegisterOperand vector_ty, 1666 ZPRRegOp zpr_ty, 1667 string mnemonic> 1668 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), 1669 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1670 "", []>, Sched<[]> { 1671 bits<4> Zm; 1672 bits<3> Zdn; 1673 let Inst{31-24} = 0b11000001; 1674 let Inst{23-22} = sz; 1675 let Inst{21-20} = 0b10; 1676 let Inst{19-16} = Zm; 1677 let Inst{15-11} = 0b10101; 1678 let Inst{10-5} = op{6-1}; 1679 let Inst{4-2} = Zdn; 1680 let Inst{1} = 0b0; 1681 let Inst{0} = op{0}; 1682 1683 let Constraints = "$Zdn = $_Zdn"; 1684} 1685 1686multiclass sme2_fp_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> { 1687 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; 1688 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; 1689 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; 1690} 1691 1692multiclass sme2_int_sve_destructive_vector_vg4_single<string mnemonic, bits<7> op> { 1693 def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; 1694 def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; 1695 def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; 1696 def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; 1697} 1698 1699// SME2.1 fmax/fmin instructions. 1700multiclass sme2p1_bf_max_min_vector_vg4_single<string mnemonic, bits<7>op> { 1701 def _H : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_h_mul_r, 1702 ZPR4b16, mnemonic>; 1703} 1704 1705class sme2_sve_destructive_vector_vg2_multi<bits<2> sz, bits<7> op, 1706 RegisterOperand vector_ty, 1707 string mnemonic> 1708 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), 1709 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1710 "", []>, Sched<[]> { 1711 bits<4> Zm; 1712 bits<4> Zdn; 1713 let Inst{31-24} = 0b11000001; 1714 let Inst{23-22} = sz; 1715 let Inst{21} = 0b1; 1716 let Inst{20-17} = Zm; 1717 let Inst{16-11} = 0b010110; 1718 let Inst{10-5} = op{6-1}; 1719 let Inst{4-1} = Zdn; 1720 let Inst{0} = op{0}; 1721 1722 let Constraints = "$Zdn = $_Zdn"; 1723} 1724 1725multiclass sme2_fp_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> { 1726 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; 1727 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; 1728 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; 1729} 1730 1731multiclass sme2_int_sve_destructive_vector_vg2_multi<string mnemonic, bits<7> op> { 1732 def _B : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_b_mul_r, mnemonic>; 1733 def _H : sme2_sve_destructive_vector_vg2_multi<0b01, op, ZZ_h_mul_r, mnemonic>; 1734 def _S : sme2_sve_destructive_vector_vg2_multi<0b10, op, ZZ_s_mul_r, mnemonic>; 1735 def _D : sme2_sve_destructive_vector_vg2_multi<0b11, op, ZZ_d_mul_r, mnemonic>; 1736} 1737 1738// SME2.1 fmax/fmin instructions. 1739multiclass sme2p1_bf_max_min_vector_vg2_multi<string mnemonic, bits<7>op> { 1740 def _H : sme2_sve_destructive_vector_vg2_multi<0b00, op, ZZ_h_mul_r, 1741 mnemonic>; 1742} 1743 1744class sme2_sve_destructive_vector_vg4_multi<bits<2> sz, bits<7> op, 1745 RegisterOperand vector_ty, 1746 string mnemonic> 1747 : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), 1748 mnemonic, "\t$Zdn, $_Zdn, $Zm", 1749 "", []>, Sched<[]> { 1750 bits<3> Zm; 1751 bits<3> Zdn; 1752 let Inst{31-24} = 0b11000001; 1753 let Inst{23-22} = sz; 1754 let Inst{21} = 0b1; 1755 let Inst{20-18} = Zm; 1756 let Inst{17-11} = 0b0010111; 1757 let Inst{10-5} = op{6-1}; 1758 let Inst{4-2} = Zdn; 1759 let Inst{1} = 0b0; 1760 let Inst{0} = op{0}; 1761 1762 let Constraints = "$Zdn = $_Zdn"; 1763} 1764 1765multiclass sme2_fp_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> { 1766 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; 1767 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; 1768 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; 1769} 1770 1771multiclass sme2_int_sve_destructive_vector_vg4_multi<string mnemonic, bits<7> op> { 1772 def _B : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_b_mul_r, mnemonic>; 1773 def _H : sme2_sve_destructive_vector_vg4_multi<0b01, op, ZZZZ_h_mul_r, mnemonic>; 1774 def _S : sme2_sve_destructive_vector_vg4_multi<0b10, op, ZZZZ_s_mul_r, mnemonic>; 1775 def _D : sme2_sve_destructive_vector_vg4_multi<0b11, op, ZZZZ_d_mul_r, mnemonic>; 1776} 1777 1778// SME2.1 fmax/fmin instructions. 1779multiclass sme2p1_bf_max_min_vector_vg4_multi<string mnemonic, bits<7>op> { 1780 def _H : sme2_sve_destructive_vector_vg4_multi<0b00, op, ZZZZ_h_mul_r, 1781 mnemonic>; 1782} 1783 1784//===----------------------------------------------------------------------===// 1785// SME2 Multi-vector - Index/Single/Multi Array Vectors FMA sources 1786 1787class sme2_mla_long_array_index_base<bits<2> op0, bits<2> op, Operand index_ty, 1788 RegisterOperand multi_vector_ty, 1789 string mnemonic, string vg_acronym=""> 1790 : I<(outs MatrixOp32:$ZAda), 1791 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 1792 mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3", 1793 "", []>, Sched<[]> { 1794 bits<4> Zm; 1795 bits<2> Rv; 1796 let Inst{31-24} = 0b11000001; 1797 let Inst{23-22} = op0; 1798 let Inst{21} = 0b0; 1799 let Inst{20} = !if(!eq(vg_acronym, ""), 0, 1); 1800 let Inst{19-16} = Zm; 1801 let Inst{14-13} = Rv; 1802 let Inst{12} = 0b1; 1803 let Inst{4-3} = op; 1804 1805 let Constraints = "$ZAda = $_ZAda"; 1806} 1807 1808multiclass sme2_mla_long_array_index<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 1809 def _HtoS : sme2_mla_long_array_index_base<op0, op, uimm3s2range, ZPR16, 1810 mnemonic>, SMEPseudo2Instr<NAME # _HtoS, 1> { 1811 bits<3> i3; 1812 bits<5> Zn; 1813 bits<3> imm; 1814 let Inst{15} = i3{2}; 1815 let Inst{11-10} = i3{1-0}; 1816 let Inst{9-5} = Zn; 1817 let Inst{2-0} = imm; 1818 } 1819 1820 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 1821 1822 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange3s2>; 1823} 1824 1825class sme2_mla_long_array_vg2_index<string mnemonic, bits<2> op0, bits<2> op> 1826 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZ_h_mul_r, 1827 mnemonic, "vgx2"> { 1828 bits<3> i3; 1829 bits<4> Zn; 1830 bits<2> imm; 1831 let Inst{15} = 0b0; 1832 let Inst{11-10} = i3{2-1}; 1833 let Inst{9-6} = Zn; 1834 let Inst{5} = 0b0; 1835 let Inst{2} = i3{0}; 1836 let Inst{1-0} = imm; 1837} 1838 1839multiclass sme2_fp_mla_long_array_vg2_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 1840 def _HtoS : sme2_mla_long_array_vg2_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 1841 1842 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 1843 1844 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>; 1845 1846 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 1847 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 1848} 1849 1850multiclass sme2_int_mla_long_array_vg2_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 1851 def _S : sme2_mla_long_array_vg2_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _S, 1>; 1852 1853 def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _S, uimm2s2range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 1854 1855 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME # _S, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>; 1856 1857 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 1858 (!cast<Instruction>(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 1859} 1860 1861class sme2_mla_long_array_vg4_index<string mnemonic, bits<2> op0, bits<2> op> 1862 : sme2_mla_long_array_index_base<op0, op, uimm2s2range, ZZZZ_h_mul_r, 1863 mnemonic, "vgx4"> { 1864 bits<3> i3; 1865 bits<3> Zn; 1866 bits<2> imm; 1867 let Inst{15} = 0b1; 1868 let Inst{11-10} = i3{2-1}; 1869 let Inst{9-7} = Zn; 1870 let Inst{6-5} = 0b00; 1871 let Inst{2} = i3{0}; 1872 let Inst{1-0} = imm; 1873} 1874 1875multiclass sme2_fp_mla_long_array_vg4_index<string mnemonic, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 1876 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b10, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 1877 1878 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 1879 1880 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, zpr_ty, VectorIndexH32b_timm, tileslicerange2s2>; 1881 1882 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 1883 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 1884} 1885 1886multiclass sme2_int_mla_long_array_vg4_index<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 1887 def _HtoS : sme2_mla_long_array_vg4_index<mnemonic, 0b11, op>, SMEPseudo2Instr<NAME # _HtoS, 1>; 1888 1889 def _HtoS_PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 1890 1891 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s2>; 1892 1893 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i3", 1894 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; 1895} 1896 1897class sme2_mla_long_array<bits<2>op0, bits<2> op, 1898 MatrixOperand matrix_ty, 1899 Operand index_ty, 1900 RegisterOperand first_vector_ty, 1901 RegisterOperand second_vector_ty, 1902 string mnemonic, string vg_acronym=""> 1903 : I<(outs matrix_ty:$ZAda), 1904 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, 1905 index_ty:$imm, first_vector_ty:$Zn, second_vector_ty:$Zm), 1906 mnemonic,"\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm", 1907 "", []> , Sched<[]> { 1908 bits<2> Rv; 1909 let Inst{31-24} = 0b11000001; 1910 let Inst{23-22} = op0; 1911 let Inst{21} = 0b1; 1912 let Inst{15} = 0b0; 1913 let Inst{14-13} = Rv; 1914 let Inst{12-11} = 0b01; 1915 let Inst{10} = !if(!eq(vg_acronym, ""), 1, 0); 1916 let Inst{4-3} = op; 1917 1918 let Constraints = "$ZAda = $_ZAda"; 1919} 1920 1921multiclass sme2_mla_long_array_single<string mnemonic, bits<2> op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { 1922 def _HtoS : sme2_mla_long_array<op0, op, MatrixOp32, uimm3s2range, ZPR16, ZPR4b16, 1923 mnemonic> , SMEPseudo2Instr<NAME # _HtoS, 1>{ 1924 bits<4> Zm; 1925 bits<5> Zn; 1926 bits<3> imm; 1927 let Inst{20} = 0b0; 1928 let Inst{19-16} = Zm; 1929 let Inst{9-5} = Zn; 1930 let Inst{2-0} = imm; 1931 } 1932 1933 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm3s2range, ZPR16, ZPR4b16, SMEMatrixArray>; 1934 1935 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm3s2range, ZPR4b16, zpr_ty, tileslicerange3s2>; 1936} 1937 1938class sme2_mla_long_array_single_16b<string mnemonic> 1939 : sme2_mla_long_array<0b00, 0b00, MatrixOp16, uimm3s2range, ZPR8, ZPR4b8, mnemonic> { 1940 bits<4> Zm; 1941 bits<5> Zn; 1942 bits<3> imm; 1943 let Inst{20} = 0b1; 1944 let Inst{19-16} = Zm; 1945 let Inst{9-5} = Zn; 1946 let Inst{2-0} = imm; 1947} 1948 1949class sme2_mla_long_array_vg24_single<bits<2> op0, bit vg4, bits<2> op, bit o2, 1950 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty, 1951 ZPRRegOp zpr_ty, string mnemonic, string vg_acronym> 1952 : sme2_mla_long_array<op0, op, matrix_ty, uimm2s2range, multi_vector_ty, zpr_ty, 1953 mnemonic, vg_acronym> { 1954 bits<4> Zm; 1955 bits<5> Zn; 1956 bits<2> imm; 1957 let Inst{20} = vg4; 1958 let Inst{19-16} = Zm; 1959 let Inst{9-5} = Zn; 1960 let Inst{2} = o2; 1961 let Inst{1-0} = imm; 1962} 1963 1964multiclass sme2_fp_mla_long_array_vg2_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 1965 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, 1966 ValueType zpr_ty, SDPatternOperator intrinsic> { 1967 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b0, op{2-1}, op{0}, matrix_ty, multi_vector_ty, 1968 vector_ty, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1>; 1969 1970 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, 1971 vector_ty, SMEMatrixArray>; 1972 1973 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty, 1974 tileslicerange2s2>; 1975 1976 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 1977 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 1978 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; 1979} 1980 1981multiclass sme2_int_mla_long_array_vg2_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 1982 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b0, op, 0b0, MatrixOp32, ZZ_h, ZPR4b16, mnemonic, 1983 "vgx2">, SMEPseudo2Instr<NAME # _HtoS, 1>; 1984 1985 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h, ZPR4b16, SMEMatrixArray>; 1986 1987 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>; 1988 1989 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 1990 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; 1991} 1992 1993multiclass sme2_fp_mla_long_array_vg4_single<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 1994 RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, 1995 ValueType zpr_ty, SDPatternOperator intrinsic> { 1996 def NAME : sme2_mla_long_array_vg24_single<0b00, 0b1, op{2-1}, op{0}, matrix_ty, multi_vector_ty, 1997 vector_ty, mnemonic, "vgx4">, 1998 SMEPseudo2Instr<NAME, 1>; 1999 2000 def _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s2range, multi_vector_ty, vector_ty, 2001 SMEMatrixArray>; 2002 2003 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm2s2range, vector_ty, zpr_ty, 2004 tileslicerange2s2>; 2005 2006 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2007 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2008 uimm2s2range:$imm, multi_vector_ty:$Zn, vector_ty:$Zm), 0>; 2009} 2010 2011multiclass sme2_int_mla_long_array_vg4_single<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2012 def _HtoS : sme2_mla_long_array_vg24_single<0b01, 0b1, op, 0b0, MatrixOp32, ZZZZ_h, ZPR4b16, mnemonic, 2013 "vgx4">, SMEPseudo2Instr<NAME # _HtoS, 1>; 2014 2015 def _HtoS_PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h, ZPR4b16, SMEMatrixArray>; 2016 2017 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME # _HtoS, intrinsic, uimm2s2range, ZPR4b16, nxv8i16, tileslicerange2s2>; 2018 2019 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2020 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; 2021} 2022 2023class sme2_mla_long_array_vg2_multi<string mnemonic, bits<2> op0, bits<3> op, 2024 MatrixOperand matrix_ty, RegisterOperand multi_vector_ty> 2025 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty, 2026 mnemonic, "vgx2"> { 2027 bits<4> Zm; 2028 bits<4> Zn; 2029 bits<2> imm; 2030 let Inst{20-17} = Zm; 2031 let Inst{16} = 0b0; 2032 let Inst{9-6} = Zn; 2033 let Inst{5} = op{2}; // fp8 2034 let Inst{2} = 0b0; 2035 let Inst{1-0} = imm; 2036} 2037 2038multiclass sme2_fp_mla_long_array_vg2_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2039 RegisterOperand multi_vector_ty, 2040 ValueType zpr_ty, SDPatternOperator intrinsic> { 2041 2042 def NAME : sme2_mla_long_array_vg2_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>, 2043 SMEPseudo2Instr<NAME, 1>; 2044 2045 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>; 2046 2047 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>; 2048 2049 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2050 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2051 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 2052} 2053 2054multiclass sme2_int_mla_long_array_vg2_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2055 def _HtoS : sme2_mla_long_array_vg2_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZ_h_mul_r>, 2056 SMEPseudo2Instr<NAME # _HtoS, 1>; 2057 2058 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZ_h_mul_r, SMEMatrixArray>; 2059 2060 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>; 2061 2062 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm", 2063 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; 2064} 2065 2066class sme2_mla_long_array_vg4_multi<string mnemonic, bits<2> op0, bits<3> op, 2067 MatrixOperand matrix_ty, 2068 RegisterOperand multi_vector_ty> 2069 : sme2_mla_long_array<op0, op{1-0}, matrix_ty, uimm2s2range, multi_vector_ty, multi_vector_ty, 2070 mnemonic, "vgx4"> { 2071 bits<3> Zm; 2072 bits<3> Zn; 2073 bits<2> imm; 2074 let Inst{20-18} = Zm; 2075 let Inst{17} = 0b0; 2076 let Inst{16} = 0b1; 2077 let Inst{9-7} = Zn; 2078 let Inst{6} = 0b0; 2079 let Inst{5} = op{2}; //fp8 2080 let Inst{2} = 0b0; 2081 let Inst{1-0} = imm; 2082} 2083 2084multiclass sme2_fp_mla_long_array_vg4_multi<string mnemonic, bits<3> op, MatrixOperand matrix_ty, 2085 RegisterOperand multi_vector_ty, ValueType zpr_ty, 2086 SDPatternOperator intrinsic> { 2087 def NAME : sme2_mla_long_array_vg4_multi<mnemonic, 0b10, op, matrix_ty, multi_vector_ty>, 2088 SMEPseudo2Instr<NAME, 1>; 2089 2090 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm2s2range, multi_vector_ty, SMEMatrixArray>; 2091 2092 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm2s2range, zpr_ty, tileslicerange2s2>; 2093 2094 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2095 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2096 uimm2s2range:$imm, multi_vector_ty:$Zn, multi_vector_ty:$Zm), 0>; 2097} 2098 2099multiclass sme2_int_mla_long_array_vg4_multi<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2100 def _HtoS : sme2_mla_long_array_vg4_multi<mnemonic, 0b11, {0b0, op}, MatrixOp32, ZZZZ_h_mul_r>, 2101 SMEPseudo2Instr<NAME # _HtoS, 1>; 2102 2103 def _HtoS_PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME # _HtoS, uimm2s2range, ZZZZ_h_mul_r, SMEMatrixArray>; 2104 2105 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME # _HtoS, intrinsic, uimm2s2range, nxv8i16, tileslicerange2s2>; 2106 2107 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm", 2108 (!cast<Instruction>(NAME #_HtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; 2109} 2110 2111//===----------------------------------------------------------------------===// 2112class sme2_frint_cvt_vg2_multi<bits<2>sz, bits<5>op, RegisterOperand first_ty, 2113 RegisterOperand second_ty, string mnemonic> 2114 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2115 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2116 bits<4> Zn; 2117 bits<4> Zd; 2118 let Inst{31-24} = 0b11000001; 2119 let Inst{23-22} = sz; 2120 let Inst{21-20} = 0b10; 2121 let Inst{19-16} = op{4-1}; 2122 let Inst{15-10} = 0b111000; 2123 let Inst{9-6} = Zn; 2124 let Inst{5} = op{0}; 2125 let Inst{4-1} = Zd; 2126 let Inst{0} = 0b0; 2127} 2128 2129// SME2 multi-vec FP to int convert two registers 2130// SME2 multi-vec int to FP two registers 2131multiclass sme2_fp_cvt_vg2_multi<string mnemonic, bits<5> op> { 2132 def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; 2133} 2134 2135// SME2 multi-vec FRINT two registers 2136multiclass sme2_frint_vector_vg2_multi<string mnemonic, bits<5> op> { 2137 def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; 2138} 2139 2140class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_ty, 2141 RegisterOperand second_ty, string mnemonic> 2142 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2143 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2144 bits<3> Zn; 2145 bits<3> Zd; 2146 let Inst{31-24} = 0b11000001; 2147 let Inst{23-22} = sz; 2148 let Inst{21-20} = 0b11; 2149 let Inst{19-16} = op{6-3}; 2150 let Inst{15-10} = 0b111000; 2151 let Inst{9-7} = Zn; 2152 let Inst{6-5} = op{2-1}; 2153 let Inst{4-2} = Zd; 2154 let Inst{1} = op{0}; 2155 let Inst{0} = 0b0; 2156} 2157 2158// SME2 multi-vec FP to int convert four registers 2159// SME2 multi-vec int to FP four registers 2160multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> { 2161 def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; 2162} 2163 2164// SME2 multi-vec quadwords ZIP four registers 2165multiclass sme2_zip_vector_vg4<string mnemonic, bits<7> op> { 2166 def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, 2167 mnemonic>; 2168 def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, 2169 mnemonic>; 2170 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, 2171 mnemonic>; 2172 def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, 2173 mnemonic>; 2174} 2175 2176// SME2 multi-vec quadwords ZIP four registers 2177multiclass sme2_zip_vector_vg4_Q<string mnemonic, bits<7> op> { 2178 def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, 2179 mnemonic>; 2180} 2181 2182// SME2 multi-vec FRINT four registers 2183multiclass sme2_frint_vector_vg4_multi<string mnemonic, bits<7> op> { 2184 def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, 2185 mnemonic>; 2186} 2187 2188class sme2_cvt_vg2_single<string mnemonic, bits<5> op, 2189 RegisterOperand first_ty, RegisterOperand second_ty> 2190 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2191 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2192 bits<4> Zn; 2193 bits<5> Zd; 2194 let Inst{31-23} = 0b110000010; 2195 let Inst{22} = op{4}; 2196 let Inst{21-19} = 0b100; 2197 let Inst{18-16} = op{3-1}; 2198 let Inst{15-10} = 0b111000; 2199 let Inst{9-6} = Zn; 2200 let Inst{5} = op{0}; 2201 let Inst{4-0} = Zd; 2202} 2203 2204// SME2 multi-vec FP down convert two registers 2205// SME2 multi-vec int down convert two registers 2206multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt, 2207 ValueType in_vt, SDPatternOperator intrinsic> { 2208 def NAME : sme2_cvt_vg2_single<mnemonic, op, ZPR16, ZZ_s_mul_r>; 2209 def : SVE2p1_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>; 2210} 2211 2212// SME2 multi-vec FP8 down convert two registers 2213multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> { 2214 def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>; 2215} 2216 2217class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty, 2218 RegisterOperand second_ty, string mnemonic> 2219 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2220 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2221 bits<5> Zn; 2222 bits<4> Zd; 2223 let Inst{31-24} = 0b11000001; 2224 let Inst{23-22} = sz; 2225 let Inst{21-19} = 0b100; 2226 let Inst{18-16} = op; 2227 let Inst{15-10} = 0b111000; 2228 let Inst{9-5} = Zn; 2229 let Inst{4-1} = Zd; 2230 let Inst{0} = u; 2231} 2232 2233// SME2 multi-vec unpack two registers 2234multiclass sme2_unpk_vector_vg2<string mnemonic, bit u> { 2235 def _H : sme2_cvt_unpk_vector_vg2<0b01, 0b101, u, ZZ_h_mul_r, ZPR8, mnemonic>; 2236 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b101, u, ZZ_s_mul_r, ZPR16, mnemonic>; 2237 def _D : sme2_cvt_unpk_vector_vg2<0b11, 0b101, u, ZZ_d_mul_r, ZPR32, mnemonic>; 2238} 2239 2240// SME2.1 multi-vec convert two registers 2241multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> { 2242 def _S : sme2_cvt_unpk_vector_vg2<0b10, 0b000, l, ZZ_s_mul_r, ZPR16, mnemonic>; 2243} 2244 2245// SME2 multi-vec FP8 up convert two registers 2246multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> { 2247 def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>; 2248} 2249 2250 2251class sme2_cvt_vg4_single<bit sz, bits<3> op, bits<4>op2, RegisterOperand first_ty, 2252 RegisterOperand second_ty, string mnemonic> 2253 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2254 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2255 bits<3> Zn; 2256 bits<5> Zd; 2257 let Inst{31-24} = 0b11000001; 2258 let Inst{23} = sz; 2259 let Inst{22} = op{2}; 2260 let Inst{21-20} = 0b11; 2261 let Inst{19-16} = op2; 2262 let Inst{15-10} = 0b111000; 2263 let Inst{9-7} = Zn; 2264 let Inst{6-5} = op{1-0}; 2265 let Inst{4-0} = Zd; 2266} 2267 2268// SME2 multi-vec int down convert four registers 2269multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 2270 def _StoB : sme2_cvt_vg4_single<0, op, 0b0011, ZPR8, ZZZZ_s_mul_r, mnemonic>; 2271 def _DtoH : sme2_cvt_vg4_single<1, op, 0b0011, ZPR16, ZZZZ_d_mul_r, mnemonic>; 2272 2273 def : SME2_Cvt_VG4_Pat<NAME # _StoB, intrinsic, nxv16i8, nxv4i32>; 2274 def : SME2_Cvt_VG4_Pat<NAME # _DtoH, intrinsic, nxv8i16, nxv2i64>; 2275} 2276 2277//SME2 multi-vec FP8 down convert four registers 2278multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> { 2279 def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>; 2280} 2281 2282class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty, 2283 RegisterOperand second_ty, string mnemonic> 2284 : I<(outs first_ty:$Zd), (ins second_ty:$Zn), 2285 mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { 2286 bits<4> Zn; 2287 bits<3> Zd; 2288 let Inst{31-24} = 0b11000001; 2289 let Inst{23-22} = sz; 2290 let Inst{21-10} = 0b110101111000; 2291 let Inst{9-6} = Zn; 2292 let Inst{5} = 0b0; 2293 let Inst{4-2} = Zd; 2294 let Inst{1} = 0b0; 2295 let Inst{0} = u; 2296} 2297 2298// SME2 multi-vec UNPK four registers 2299multiclass sme2_unpk_vector_vg4<string mnemonic, bit u> { 2300 def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; 2301 def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; 2302 def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; 2303} 2304 2305//===----------------------------------------------------------------------===// 2306// SME2 multi-vec CLAMP registers 2307 2308class sme2_clamp_vector_vg24_multi<bits<2> sz, bits<3> op1, bit u, 2309 RegisterOperand multi_vector_ty, 2310 ZPRRegOp vector_ty, string mnemonic> 2311 : I<(outs multi_vector_ty:$Zd), 2312 (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), 2313 mnemonic, "\t$Zd, $Zn, $Zm", 2314 "", []>, Sched<[]>{ 2315 bits<5> Zm; 2316 bits<5> Zn; 2317 let Inst{31-24} = 0b11000001; 2318 let Inst{23-22} = sz; 2319 let Inst{21} = 0b1; 2320 let Inst{20-16} = Zm; 2321 let Inst{15-13} = 0b110; 2322 let Inst{12-10} = op1; 2323 let Inst{9-5} = Zn; 2324 let Inst{0} = u; 2325 2326 let Constraints = "$Zd = $_Zd"; 2327} 2328 2329class sme2_clamp_vector_vg2_multi<bits<2> sz, bits<3> op1, bit u, 2330 RegisterOperand multi_vector_ty, 2331 ZPRRegOp vector_ty, string mnemonic> 2332 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty, 2333 mnemonic>{ 2334 bits<4> Zd; 2335 let Inst{4-1} = Zd; 2336} 2337 2338multiclass sme2_fp_clamp_vector_vg2_multi<string mnemonic>{ 2339 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; 2340 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; 2341 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; 2342} 2343 2344multiclass sme2_int_clamp_vector_vg2_multi<string mnemonic, bit u>{ 2345 def _B : sme2_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>; 2346 def _H : sme2_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>; 2347 def _S : sme2_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>; 2348 def _D : sme2_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>; 2349} 2350 2351// SME2.1 multi-vec FCLAMP two registers 2352multiclass sme2p1_bfclamp_vector_vg2_multi<string mnemonic> { 2353 def _H : sme2_clamp_vector_vg2_multi<0b00, 0b000, 0b0, ZZ_h_mul_r, ZPR16, 2354 mnemonic>; 2355} 2356 2357class sme2_clamp_vector_vg4_multi<bits<2> sz, bits<3> op1, bit u, 2358 RegisterOperand multi_vector_ty, 2359 ZPRRegOp vector_ty, string mnemonic> 2360 : sme2_clamp_vector_vg24_multi<sz, op1, u, multi_vector_ty, vector_ty, 2361 mnemonic>{ 2362 bits<3> Zd; 2363 let Inst{4-2} = Zd; 2364 let Inst{1} = 0b0; 2365} 2366 2367multiclass sme2_fp_clamp_vector_vg4_multi<string mnemonic>{ 2368 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; 2369 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; 2370 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; 2371} 2372 2373multiclass sme2_int_clamp_vector_vg4_multi<string mnemonic, bit u>{ 2374 def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; 2375 def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; 2376 def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; 2377 def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; 2378} 2379 2380// SME2.1 multi-vec FCLAMP four registers 2381multiclass sme2p1_bfclamp_vector_vg4_multi<string mnemonic> { 2382 def _H : sme2_clamp_vector_vg4_multi<0b00, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, 2383 mnemonic>; 2384} 2385 2386// SME2 multi-vec ZIP two registers 2387class sme2_zip_vector_vg2<bits<2> sz, bit q, bit u, 2388 RegisterOperand multi_vector_ty, 2389 ZPRRegOp vector_ty, string mnemonic> 2390 : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), 2391 mnemonic, "\t$Zd, $Zn, $Zm", 2392 "", []>, Sched<[]>{ 2393 bits<4> Zd; 2394 bits<5> Zm; 2395 bits<5> Zn; 2396 let Inst{31-24} = 0b11000001; 2397 let Inst{23-22} = sz; 2398 let Inst{21} = 0b1; 2399 let Inst{20-16} = Zm; 2400 let Inst{15-11} = 0b11010; 2401 let Inst{10} = q; 2402 let Inst{9-5} = Zn; 2403 let Inst{4-1} = Zd; 2404 let Inst{0} = u; 2405} 2406 2407multiclass sme2_zip_vector_vg2<string mnemonic, bit op> { 2408 def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>; 2409 def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>; 2410 def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>; 2411 def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>; 2412 def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>; 2413} 2414 2415//===----------------------------------------------------------------------===// 2416// SME2 Dot Products and MLA 2417class sme2_multi_vec_array_vg2_index<bits<2> sz, bits<6> op, MatrixOperand matrix_ty, 2418 RegisterOperand multi_vector_ty, 2419 ZPRRegOp vector_ty, Operand index_ty, 2420 string mnemonic> 2421 : I<(outs matrix_ty:$ZAda), 2422 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2423 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), 2424 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i", 2425 "", []>, Sched<[]> { 2426 bits<4> Zm; 2427 bits<2> Rv; 2428 bits<4> Zn; 2429 bits<3> imm3; 2430 let Inst{31-24} = 0b11000001; 2431 let Inst{23-22} = sz; 2432 let Inst{21-20} = 0b01; 2433 let Inst{19-16} = Zm; 2434 let Inst{15} = 0b0; 2435 let Inst{14-13} = Rv; 2436 let Inst{12-10} = op{5-3}; 2437 let Inst{9-6} = Zn; 2438 let Inst{5-3} = op{2-0}; 2439 let Inst{2-0} = imm3; 2440 2441 let Constraints = "$ZAda = $_ZAda"; 2442} 2443 2444// SME2 multi-vec ternary indexed two registers 32-bit 2445multiclass sme2_multi_vec_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<4> op, 2446 RegisterOperand multi_vector_ty, 2447 ZPRRegOp vector_ty, ValueType vt, 2448 SDPatternOperator intrinsic> { 2449 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{3},?,?,op{2-0}}, MatrixOp32, multi_vector_ty, vector_ty, 2450 VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2451 bits<2> i; 2452 let Inst{11-10} = i; 2453 } 2454 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>; 2455 2456 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>; 2457 2458 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2459 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2460 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; 2461} 2462 2463// SME2.1 multi-vec ternary indexed two registers 16-bit 2464// SME2 multi-vec indexed FP8 two-way dot product to FP16 two registers 2465multiclass sme2p1_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3> op, 2466 RegisterOperand multi_vector_ty, ZPRRegOp zpr_ty> { 2467 def NAME : sme2_multi_vec_array_vg2_index<sz, {op{2},?,?,op{1-0},?}, MatrixOp16, 2468 multi_vector_ty, zpr_ty, 2469 VectorIndexH, mnemonic> { 2470 bits<3> i; 2471 let Inst{11-10} = i{2-1}; 2472 let Inst{3} = i{0}; 2473 } 2474 2475 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2476 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2477 multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; 2478} 2479 2480// SME2 multi-vec indexed FP8 two-way vertical dot product to single precision 2481// two registers 2482class sme2_fp8_multi_vec_array_vg4_index<string mnemonic, bit T> 2483 : sme2_multi_vec_array_vg2_index<0b11, {0b01,?,0b0, T,?}, MatrixOp32, 2484 ZZ_b_mul_r, ZPR4b8, VectorIndexS, mnemonic> { 2485 2486 bits<2> i; 2487 let Inst{10} = i{1}; 2488 let Inst{3} = i{0}; 2489 let AsmString = !strconcat(mnemonic, "{\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i}"); 2490} 2491 2492// SME2 multi-vec ternary indexed two registers 64-bit 2493 2494class sme2_multi_vec_array_vg2_index_64b<bits<2> op, 2495 RegisterOperand multi_vector_ty, 2496 ZPRRegOp vector_ty, 2497 string mnemonic> 2498 : I<(outs MatrixOp64:$ZAda), 2499 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2500 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 2501 mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1", 2502 "", []>, Sched<[]> { 2503 bits<4> Zm; 2504 bits<2> Rv; 2505 bits<1> i1; 2506 bits<4> Zn; 2507 bits<3> imm3; 2508 let Inst{31-20} = 0b110000011101; 2509 let Inst{19-16} = Zm; 2510 let Inst{15} = 0b0; 2511 let Inst{14-13} = Rv; 2512 let Inst{12-11} = 0b00; 2513 let Inst{10} = i1; 2514 let Inst{9-6} = Zn; 2515 let Inst{5} = 0b0; 2516 let Inst{4-3} = op; 2517 let Inst{2-0} = imm3; 2518 2519 let Constraints = "$ZAda = $_ZAda"; 2520} 2521 2522multiclass sme2_multi_vec_array_vg2_index_64b<string mnemonic, bits<2> op, 2523 RegisterOperand multi_vector_ty, 2524 ZPRRegOp vector_ty, ValueType vt, 2525 SDPatternOperator intrinsic> { 2526 def NAME : sme2_multi_vec_array_vg2_index_64b<op, multi_vector_ty, vector_ty, 2527 mnemonic>, SMEPseudo2Instr<NAME, 1>; 2528 2529 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>; 2530 2531 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexD32b_timm, tileslice16>; 2532 2533 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1", 2534 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2535 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; 2536} 2537 2538class sme2_multi_vec_array_vg4_index<bit sz, bits<7> op, MatrixOperand matrix_ty, 2539 RegisterOperand multi_vector_ty, 2540 ZPRRegOp vector_ty, Operand index_ty, 2541 string mnemonic> 2542 : I<(outs matrix_ty:$ZAda), 2543 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2544 multi_vector_ty:$Zn, vector_ty:$Zm, index_ty:$i), 2545 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i", 2546 "", []>, Sched<[]> { 2547 bits<4> Zm; 2548 bits<2> Rv; 2549 bits<3> Zn; 2550 bits<3> imm3; 2551 let Inst{31-23} = 0b110000010; 2552 let Inst{22} = sz; 2553 let Inst{21-20} = 0b01; 2554 let Inst{19-16} = Zm; 2555 let Inst{15} = 0b1; 2556 let Inst{14-13} = Rv; 2557 let Inst{12-10} = op{6-4}; 2558 let Inst{9-7} = Zn; 2559 let Inst{6-3} = op{3-0}; 2560 let Inst{2-0} = imm3; 2561 2562 let Constraints = "$ZAda = $_ZAda"; 2563} 2564 2565// SME2 multi-vec ternary indexed four registers 32-bit 2566multiclass sme2_multi_vec_array_vg4_index_32b<string mnemonic, bits<4> op, 2567 RegisterOperand multi_vector_ty, 2568 ZPRRegOp vector_ty, ValueType vt, 2569 SDPatternOperator intrinsic> { 2570 def NAME : sme2_multi_vec_array_vg4_index<0b1, {op{3},?,?,0b0, op{2-0}}, MatrixOp32, multi_vector_ty, 2571 vector_ty, VectorIndexS32b_timm, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2572 bits<2> i; 2573 let Inst{11-10} = i; 2574 } 2575 2576 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexS32b_timm, SMEMatrixArray>; 2577 2578 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vt, VectorIndexS32b_timm, tileslice16>; 2579 2580 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2581 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2582 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i), 0>; 2583} 2584 2585// SME2.1 multi-vec ternary indexed four registers 16-bit 2586multiclass sme2p1_multi_vec_array_vg4_index_16b<string mnemonic, bits<3> op, 2587 RegisterOperand multi_vector_ty, 2588 ZPRRegOp zpr_ty> { 2589 def NAME : sme2_multi_vec_array_vg4_index<0b0,{0b1,?,?,op,?}, MatrixOp16, 2590 multi_vector_ty, zpr_ty, 2591 VectorIndexH, mnemonic>{ 2592 bits<3> i; 2593 let Inst{11-10} = i{2-1}; 2594 let Inst{3} = i{0}; 2595 } 2596 2597 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2598 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2599 sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, VectorIndexH:$i), 0>; 2600} 2601 2602// SME2 multi-vec ternary indexed four registers 64-bit 2603class sme2_multi_vec_array_vg4_index_64b<bits<3> op, 2604 RegisterOperand multi_vector_ty, 2605 ZPRRegOp vector_ty, 2606 string mnemonic> 2607 : I<(outs MatrixOp64:$ZAda), 2608 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2609 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 2610 mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1", 2611 "", []>, Sched<[]> { 2612 bits<4> Zm; 2613 bits<2> Rv; 2614 bits<1> i1; 2615 bits<3> Zn; 2616 bits<3> imm3; 2617 let Inst{31-20} = 0b110000011101; 2618 let Inst{19-16} = Zm; 2619 let Inst{15} = 0b1; 2620 let Inst{14-13} = Rv; 2621 let Inst{12} = 0b0; 2622 let Inst{11} = op{2}; 2623 let Inst{10} = i1; 2624 let Inst{9-7} = Zn; 2625 let Inst{6-5} = 0b00; 2626 let Inst{4-3} = op{1-0}; 2627 let Inst{2-0} = imm3; 2628 2629 let Constraints = "$ZAda = $_ZAda"; 2630} 2631 2632multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op, 2633 RegisterOperand multi_vector_ty, 2634 ZPRRegOp vector_ty, ValueType vty, 2635 SDPatternOperator intrinsic> { 2636 def NAME : sme2_multi_vec_array_vg4_index_64b<op, multi_vector_ty, vector_ty, 2637 mnemonic>, SMEPseudo2Instr<NAME, 1>; 2638 2639 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, multi_vector_ty, vector_ty, VectorIndexD32b_timm, SMEMatrixArray>; 2640 2641 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, vector_ty, vty, VectorIndexD32b_timm, tileslice16>; 2642 2643 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i1", 2644 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, 2645 multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), 0>; 2646} 2647 2648// FMLAL (multiple and indexed vector, FP8 to FP16) 2649class sme2_multi_vec_array_vg24_index_16b<bits<2> sz, bit vg4, bits<3> op, 2650 RegisterOperand multi_vector_ty, string mnemonic> 2651 : I<(outs MatrixOp16:$ZAda), 2652 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, 2653 multi_vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 2654 mnemonic, "\t$ZAda[$Rv, $imm2, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 2655 "", []>, Sched<[]> { 2656 bits<4> Zm; 2657 bits<2> Rv; 2658 bits<4> i; 2659 bits<2> imm2; 2660 let Inst{31-24} = 0b11000001; 2661 let Inst{23-22} = sz; 2662 let Inst{21-20} = 0b01; 2663 let Inst{19-16} = Zm; 2664 let Inst{15} = vg4; 2665 let Inst{14-13} = Rv; 2666 let Inst{12} = op{2}; 2667 let Inst{11-10} = i{3-2}; 2668 let Inst{5-4} = op{1-0}; 2669 let Inst{3-2} = i{1-0}; 2670 let Inst{1-0} = imm2; 2671 2672 let Constraints = "$ZAda = $_ZAda"; 2673} 2674 2675multiclass sme2_multi_vec_array_vg2_index_16b<string mnemonic, bits<2> sz, bits<3>op> { 2676 def NAME : sme2_multi_vec_array_vg24_index_16b<sz, 0b0, op, ZZ_b_mul_r, mnemonic> { 2677 bits<4> Zn; 2678 let Inst{9-6} = Zn; 2679 } 2680 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 2681 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2682 uimm2s2range:$imm2, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; 2683} 2684 2685multiclass sme2_multi_vec_array_vg4_index_16b<string mnemonic, bits<2>sz, bits<3>op> { 2686 def NAME: sme2_multi_vec_array_vg24_index_16b<sz, 0b1, op, ZZZZ_b_mul_r, mnemonic> { 2687 bits<3> Zn; 2688 let Inst{9-7} = Zn; 2689 let Inst{6} = 0b0; 2690 } 2691 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 2692 (!cast<Instruction>(NAME) MatrixOp16:$ZAda, MatrixIndexGPR32Op8_11:$Rv, 2693 uimm2s2range:$imm2, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; 2694} 2695 2696//===----------------------------------------------------------------------===// 2697// SME2 multi-vec indexed long long MLA one source 16-bit 2698class sme2_mla_ll_array_index_16b<string mnemonic, bits<2> sz,bits<2> op> 2699 : I<(outs MatrixOp16:$ZAda), 2700 (ins MatrixOp16:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm3s2range:$imm3, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 2701 mnemonic, "\t$ZAda[$Rv, $imm3], $Zn, $Zm$i", 2702 "", []>, Sched<[]> { 2703 bits<4> Zm; 2704 bits<2> Rv; 2705 bits<4> i; 2706 bits<5> Zn; 2707 bits<3> imm3; 2708 let Inst{31-24} = 0b11000001; 2709 let Inst{23-22} = sz; 2710 let Inst{21-20} = 0b00; 2711 let Inst{19-16} = Zm; 2712 let Inst{15} = i{3}; 2713 let Inst{14-13} = Rv; 2714 let Inst{12} = op{1}; 2715 let Inst{11-10} = i{2-1}; 2716 let Inst{9-5} = Zn; 2717 let Inst{4} = op{0}; 2718 let Inst{3} = i{0}; 2719 let Inst{2-0} = imm3; 2720 2721 let Constraints = "$ZAda = $_ZAda"; 2722} 2723 2724// SME2 multi-vec indexed long long MLA one source 32-bit 2725class sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op> 2726 : I<(outs MatrixOp32:$ZAda), 2727 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 2728 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 2729 "", []>, Sched<[]> { 2730 bits<4> Zm; 2731 bits<2> Rv; 2732 bits<4> i; 2733 bits<5> Zn; 2734 bits<2> imm2; 2735 let Inst{31-24} = 0b11000001; 2736 let Inst{23-22} = sz; 2737 let Inst{21-20} = 0b00; 2738 let Inst{19-16} = Zm; 2739 let Inst{15} = i{3}; 2740 let Inst{14-13} = Rv; 2741 let Inst{12-10} = i{2-0}; 2742 let Inst{9-5} = Zn; 2743 let Inst{4-2} = op; 2744 let Inst{1-0} = imm2; 2745 2746 let Constraints = "$ZAda = $_ZAda"; 2747} 2748 2749multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> { 2750 def NAME : sme2_mla_ll_array_index_32b<mnemonic, sz, op>, SMEPseudo2Instr<NAME, 1>; 2751 2752 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 2753 2754 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>; 2755} 2756 2757// SME2 multi-vec indexed long long MLA one source 64-bit 2758 2759class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op> 2760 : I<(outs MatrixOp64:$ZAda), 2761 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 2762 mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", 2763 "", []>, Sched<[]> { 2764 bits<4> Zm; 2765 bits<2> Rv; 2766 bits<3> i; 2767 bits<5> Zn; 2768 bits<2> imm2; 2769 let Inst{31-20} = 0b110000011000; 2770 let Inst{19-16} = Zm; 2771 let Inst{15} = i{2}; 2772 let Inst{14-13} = Rv; 2773 let Inst{12} = 0b0; 2774 let Inst{11-10} = i{1-0}; 2775 let Inst{9-5} = Zn; 2776 let Inst{4-3} = op; 2777 let Inst{2} = 0b0; 2778 let Inst{1-0} = imm2; 2779 2780 let Constraints = "$ZAda = $_ZAda"; 2781} 2782 2783multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2784 def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>; 2785 2786 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2787 2788 def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>; 2789} 2790 2791class sme2_mla_ll_array_vg24_index_32b<bits<2> sz, bit vg4, bits<3> op, 2792 RegisterOperand vector_ty, 2793 string mnemonic> 2794 : I<(outs MatrixOp32:$ZAda), 2795 (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 2796 vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 2797 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 2798 "", []>, Sched<[]> { 2799 bits<4> Zm; 2800 bits<2> Rv; 2801 bits<4> i; 2802 bit imm; 2803 let Inst{31-24} = 0b11000001; 2804 let Inst{23-22} = sz; 2805 let Inst{21-20} = 0b01; 2806 let Inst{19-16} = Zm; 2807 let Inst{15} = vg4; 2808 let Inst{14-13} = Rv; 2809 let Inst{12} = 0b0; 2810 let Inst{11-10} = i{3-2}; 2811 let Inst{5-3} = op; 2812 let Inst{2-1} = i{1-0}; 2813 let Inst{0} = imm; 2814 2815 let Constraints = "$ZAda = $_ZAda"; 2816} 2817 2818//SME2 multi-vec indexed long long MLA two sources 32-bit 2819 2820multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<2> sz, bits<3> op, SDPatternOperator intrinsic> { 2821 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2822 bits<4> Zn; 2823 let Inst{9-6} = Zn; 2824 } 2825 2826 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 2827 2828 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>; 2829 2830 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 2831 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 2832} 2833 2834// SME2 multi-vec indexed long long MLA four sources 32-bit 2835 2836multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<2> sz, bits<4> op, SDPatternOperator intrinsic> { 2837 def NAME: sme2_mla_ll_array_vg24_index_32b<sz, 0b1, op{2-0}, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2838 bits<3> Zn; 2839 let Inst{9-7} = Zn; 2840 let Inst{6} = op{3}; 2841 } 2842 2843 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>; 2844 2845 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>; 2846 2847 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 2848 (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>; 2849} 2850class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op, 2851 RegisterOperand vector_ty, 2852 string mnemonic> 2853 : I<(outs MatrixOp64:$ZAda), 2854 (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 2855 vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 2856 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", 2857 "", []>, Sched<[]> { 2858 bits<4> Zm; 2859 bits<2> Rv; 2860 bits<3> i; 2861 bit imm; 2862 let Inst{31-20} = 0b110000011001; 2863 let Inst{19-16} = Zm; 2864 let Inst{15} = vg4; 2865 let Inst{14-13} = Rv; 2866 let Inst{12-11} = 0b00; 2867 let Inst{10} = i{2}; 2868 let Inst{5} = 0b0; 2869 let Inst{4-3} = op; 2870 let Inst{2-1} = i{1-0}; 2871 let Inst{0} = imm; 2872 2873 let Constraints = "$ZAda = $_ZAda"; 2874} 2875 2876// SME2 multi-vec indexed long long MLA two sources 64-bit 2877 2878multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2879 def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2880 bits<4> Zn; 2881 let Inst{9-6} = Zn; 2882 } 2883 2884 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2885 2886 def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>; 2887 2888 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 2889 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; 2890} 2891 2892// SME2 multi-vec indexed long long MLA four sources 64-bit 2893 2894multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> { 2895 def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> { 2896 bits<3> Zn; 2897 let Inst{9-7} = Zn; 2898 let Inst{6} = 0b0; 2899 } 2900 2901 def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>; 2902 2903 def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>; 2904 2905 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i", 2906 (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>; 2907} 2908 2909 2910//SME2 multiple and single vector long long FMA one source 2911 2912class sme2_mla_ll_array_single<string mnemonic, bits<5> op, 2913 MatrixOperand matrix_ty, ZPRRegOp vector_ty, 2914 ZPRRegOp zpr_ty> 2915 : I<(outs matrix_ty:$ZAda), 2916 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm, 2917 vector_ty:$Zn, zpr_ty:$Zm), 2918 mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm", 2919 "", []>, Sched<[]> { 2920 bits<4> Zm; 2921 bits<2> Rv; 2922 bits<5> Zn; 2923 bits<2> imm; 2924 let Inst{31-23} = 0b110000010; 2925 let Inst{22} = op{4}; //sz 2926 let Inst{21} = 0b1; 2927 let Inst{20} = op{3}; //fp8 2928 let Inst{19-16} = Zm; 2929 let Inst{15} = 0b0; 2930 let Inst{14-13} = Rv; 2931 let Inst{12-10} = 0b001; 2932 let Inst{9-5} = Zn; 2933 let Inst{4-2} = op{2-0}; 2934 let Inst{1-0} = imm; 2935 2936 let Constraints = "$ZAda = $_ZAda"; 2937} 2938 2939multiclass sme2_mla_ll_array_single<string mnemonic, bits<5> op, 2940 MatrixOperand matrix_ty, ZPRRegOp vector_ty, 2941 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { 2942 def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1>; 2943 2944 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>; 2945 2946 def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>; 2947} 2948 2949class sme2_mla_ll_array_vg24_single<bits<6> op, MatrixOperand matrix_ty, 2950 RegisterOperand vector_ty, ZPRRegOp zpr_ty, 2951 string mnemonic> 2952 : I<(outs matrix_ty:$ZAda), 2953 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 2954 vector_ty:$Zn, zpr_ty:$Zm), 2955 mnemonic, "\t$ZAda[$Rv, $imm, " # !if(op{4}, "vgx4", "vgx2") # "], $Zn, $Zm", 2956 "", []>, Sched<[]> { 2957 bits<4> Zm; 2958 bits<2> Rv; 2959 bits<5> Zn; 2960 bit imm; 2961 let Inst{31-23} = 0b110000010; 2962 let Inst{22} = op{5}; //sz 2963 let Inst{21} = 0b1; 2964 let Inst{20} = op{4}; //vg4 2965 let Inst{19-16} = Zm; 2966 let Inst{15} = 0b0; 2967 let Inst{14-13} = Rv; 2968 let Inst{12-10} = 0b000; 2969 let Inst{9-5} = Zn; 2970 let Inst{4-1} = op{3-0}; 2971 let Inst{0} = imm; 2972 2973 let Constraints = "$ZAda = $_ZAda"; 2974} 2975 2976//SME2 single-multi long long MLA two and four sources 2977 2978multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<6> op, 2979 MatrixOperand matrix_ty, 2980 RegisterOperand multi_vector_ty, 2981 ZPRRegOp zpr_ty> { 2982 def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty, 2983 zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 2984 2985 def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>; 2986 2987 def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm", 2988 (!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>; 2989} 2990 2991multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<5> op, 2992 MatrixOperand matrix_ty, 2993 RegisterOperand multi_vector_ty, 2994 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { 2995 2996 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>; 2997 2998 def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>; 2999} 3000 3001multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<5> op, 3002 MatrixOperand matrix_ty, 3003 RegisterOperand multi_vector_ty, 3004 ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> { 3005 defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, {op, 0b0}, matrix_ty, multi_vector_ty, zpr_ty>; 3006 3007 def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>; 3008} 3009 3010// SME2 multiple vectors long long MLA two sources 3011 3012class sme2_mla_ll_array_vg2_multi<bits<5> op, MatrixOperand matrix_ty, 3013 RegisterOperand vector_ty,string mnemonic> 3014 : I<(outs matrix_ty:$ZAda), 3015 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3016 vector_ty:$Zn, vector_ty:$Zm), 3017 mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm", 3018 "", []>, Sched<[]> { 3019 bits<4> Zm; 3020 bits<2> Rv; 3021 bits<4> Zn; 3022 bit imm; 3023 let Inst{31-23} = 0b110000011; 3024 let Inst{22} = op{4}; // sz 3025 let Inst{21} = 0b1; 3026 let Inst{20-17} = Zm; 3027 let Inst{16-15} = 0b00; 3028 let Inst{14-13} = Rv; 3029 let Inst{12-10} = 0b000; 3030 let Inst{9-6} = Zn; 3031 let Inst{5-2} = op{3-0}; 3032 let Inst{1} = 0b0; 3033 let Inst{0} = imm; 3034 3035 let Constraints = "$ZAda = $_ZAda"; 3036} 3037 3038multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<5> op, 3039 MatrixOperand matrix_ty, 3040 RegisterOperand vector_ty, 3041 ValueType vt, SDPatternOperator intrinsic> { 3042 def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 3043 3044 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>; 3045 3046 def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>; 3047 3048 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 3049 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; 3050} 3051 3052// SME2 multiple vectors long long MLA four sources 3053 3054class sme2_mla_ll_array_vg4_multi<bits<5> op,MatrixOperand matrix_ty, 3055 RegisterOperand vector_ty, 3056 string mnemonic> 3057 : I<(outs matrix_ty:$ZAda), 3058 (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, 3059 vector_ty:$Zn, vector_ty:$Zm), 3060 mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm", 3061 "", []>, Sched<[]> { 3062 bits<3> Zm; 3063 bits<2> Rv; 3064 bits<3> Zn; 3065 bit imm; 3066 let Inst{31-23} = 0b110000011; 3067 let Inst{22} = op{4}; // sz 3068 let Inst{21} = 0b1; 3069 let Inst{20-18} = Zm; 3070 let Inst{17-15} = 0b010; 3071 let Inst{14-13} = Rv; 3072 let Inst{12-10} = 0b000; 3073 let Inst{9-7} = Zn; 3074 let Inst{6} = 0b0; 3075 let Inst{5-2} = op{3-0}; 3076 let Inst{1} = 0b0; 3077 let Inst{0} = imm; 3078 3079 let Constraints = "$ZAda = $_ZAda"; 3080} 3081 3082multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<5> op, 3083 MatrixOperand matrix_ty, 3084 RegisterOperand vector_ty, 3085 ValueType vt, SDPatternOperator intrinsic> { 3086 def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>; 3087 3088 def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>; 3089 3090 def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>; 3091 3092 def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm", 3093 (!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>; 3094} 3095 3096//===----------------------------------------------------------------------===// 3097// SME2 Outer Product and Accumulate 3098 3099multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 3100 def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> { 3101 bits<2> ZAda; 3102 let Inst{1-0} = ZAda; 3103 let Inst{2} = 0b0; 3104 } 3105 3106 def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 3107 3108 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>; 3109} 3110 3111multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 3112 def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>; 3113 3114 def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>; 3115 3116 def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>; 3117} 3118 3119//===----------------------------------------------------------------------===/// 3120// SME2 Zero Lookup Table. 3121class sme2_zero_zt<string mnemonic, bits<4> opc> 3122 : I<(outs ZTR:$ZT), (ins ), 3123 mnemonic, "\t\\{ $ZT \\}", 3124 "", []>, Sched<[]> { 3125 let Inst{31-4} = 0b1100000001001000000000000000; 3126 let Inst{3-0} = opc; 3127} 3128 3129multiclass sme2_zero_zt<string mnemonic, bits<4> opc> { 3130 def NAME : sme2_zero_zt<mnemonic, opc>; 3131 def NAME # _PSEUDO 3132 : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> { 3133 // Translated to actual instruction in AArch64ISelLowering.cpp 3134 let usesCustomInserter = 1; 3135 } 3136 def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)), 3137 (!cast<Instruction>(NAME # _PSEUDO) $zt)>; 3138} 3139 3140//===----------------------------------------------------------------------===// 3141// SME2 lookup table load/store 3142class sme2_spill_fill_vector<string mnemonic, bits<8> opc> 3143 : I<!if(opc{7}, (outs ), (outs ZTR:$ZTt)), 3144 !if(opc{7}, (ins ZTR:$ZTt, GPR64sp:$Rn), (ins GPR64sp:$Rn)), 3145 mnemonic, "\t$ZTt, [$Rn]", 3146 "", []>, Sched<[]> { 3147 bits<5> Rn; 3148 let Inst{31-22} = 0b1110000100; 3149 let Inst{21-16} = opc{7-2}; 3150 let Inst{15-10} = 0b100000; 3151 let Inst{9-5} = Rn; 3152 let Inst{4-2} = 0b000; 3153 let Inst{1-0} = opc{1-0}; 3154 3155 let mayLoad = !not(opc{7}); 3156 let mayStore = opc{7}; 3157} 3158 3159 3160multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> { 3161 def NAME : sme2_spill_fill_vector<mnemonic, opc>; 3162 def NAME # _PSEUDO 3163 : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> { 3164 // Translated to actual instruction in AArch64ISelLowering.cpp 3165 let usesCustomInserter = 1; 3166 } 3167 def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base), 3168 (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>; 3169} 3170 3171//===----------------------------------------------------------------------===/// 3172// SME2 move to/from lookup table 3173class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc> 3174 : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), 3175 mnemonic, "\t$Rt, $ZTt[$imm3]", 3176 "", []>, Sched<[]> { 3177 bits<3> imm3; 3178 bits<5> Rt; 3179 let Inst{31-15} = 0b11000000010011000; 3180 let Inst{14-12} = imm3; 3181 let Inst{11-5} = opc; 3182 let Inst{4-0} = Rt; 3183} 3184 3185class sme2_movt_scalar_to_zt<string mnemonic, bits<7> opc> 3186 : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), 3187 mnemonic, "\t$ZTt[$imm3], $Rt", 3188 "", []>, Sched<[]> { 3189 bits<3> imm3; 3190 bits<5> Rt; 3191 let Inst{31-15} = 0b11000000010011100; 3192 let Inst{14-12} = imm3; 3193 let Inst{11-5} = opc; 3194 let Inst{4-0} = Rt; 3195} 3196 3197// SME2 move vector to lookup table 3198class sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> 3199 : I<(outs ZTR:$ZTt), (ins sme_elm_idx0_3:$off2, ZPRAny:$Zt), 3200 mnemonic, "\t$ZTt[$off2, mul vl], $Zt", 3201 "", []>, Sched<[]> { 3202 bits<5> Zt; 3203 bits<2> off2; 3204 let Inst{31-14} = 0b110000000100111100; 3205 let Inst{13-12} = off2; 3206 let Inst{11-5} = opc; 3207 let Inst{4-0} = Zt; 3208} 3209 3210multiclass sme2_movt_zt_to_zt<string mnemonic, bits<7> opc> { 3211 def NAME : sme2_movt_zt_to_zt<mnemonic, opc>; 3212 def : InstAlias<mnemonic # "\t$ZTt, $Zt", 3213 (!cast<Instruction>(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>; 3214} 3215 3216//===----------------------------------------------------------------------===// 3217// SME2 lookup table expand one register 3218class sme2_luti_vector_index<bits<2> sz, bits<7> opc, RegisterOperand vector_ty, 3219 AsmVectorIndexOpnd index_ty, string mnemonic> 3220 : I<(outs vector_ty:$Zd), 3221 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3222 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3223 "", []>, Sched<[]> { 3224 bits<5> Zn; 3225 bits<5> Zd; 3226 let Inst{31-19} = 0b1100000011001; 3227 let Inst{18-14} = opc{6-2}; 3228 let Inst{13-12} = sz; 3229 let Inst{11-10} = opc{1-0}; 3230 let Inst{9-5} = Zn; 3231 let Inst{4-0} = Zd; 3232} 3233 3234class sme2_luti2_vector_index<bits<2> sz, RegisterOperand vector_ty, 3235 string mnemonic> 3236 : sme2_luti_vector_index<sz, {1,?,?,?,?,0,0}, vector_ty, VectorIndexB32b_timm, mnemonic> { 3237 bits<4> i; 3238 let Inst{17-14} = i; 3239} 3240 3241multiclass sme2_luti2_vector_index<string mnemonic, SDPatternOperator intrinsic> { 3242 def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; 3243 def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; 3244 def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; 3245 3246 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3247 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3248 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3249 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3250 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3251 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3252 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3253 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3254 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3255 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3256 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))), 3257 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexB32b_timm:$imm))>; 3258} 3259 3260class sme2_luti4_vector_index<bits<2> sz, RegisterOperand vector_ty, 3261 string mnemonic> 3262 : sme2_luti_vector_index<sz, {0,1,?,?,?,0,0}, vector_ty, VectorIndexH32b_timm, mnemonic> { 3263 bits<3> i; 3264 let Inst{16-14} = i; 3265} 3266 3267multiclass sme2_luti4_vector_index<string mnemonic, SDPatternOperator intrinsic> { 3268 def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; 3269 def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; 3270 def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; 3271 3272 def : Pat<(nxv16i8 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3273 (!cast<Instruction>(NAME # _B) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3274 def : Pat<(nxv8i16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3275 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3276 def : Pat<(nxv4i32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3277 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3278 def : Pat<(nxv8f16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3279 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3280 def : Pat<(nxv8bf16 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3281 (!cast<Instruction>(NAME # _H) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3282 def : Pat<(nxv4f32 (intrinsic (imm_to_zt untyped:$zt), nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))), 3283 (!cast<Instruction>(NAME # _S) $zt, nxv16i8:$zn, (i32 VectorIndexH32b_timm:$imm))>; 3284} 3285 3286// SME2 lookup table expand two contiguous registers 3287class sme2_luti_vector_vg2_index<bits<2> sz, bits<6> opc, RegisterOperand vector_ty, 3288 AsmVectorIndexOpnd index_ty, string mnemonic> 3289 : I<(outs vector_ty:$Zd), 3290 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3291 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3292 "", []>, Sched<[]> { 3293 bits<5> Zn; 3294 bits<4> Zd; 3295 let Inst{31-19} = 0b1100000010001; 3296 let Inst{18-15} = opc{5-2}; 3297 let Inst{14} = 0b1; 3298 let Inst{13-12} = sz; 3299 let Inst{11-10} = opc{1-0}; 3300 let Inst{9-5} = Zn; 3301 let Inst{4-1} = Zd; 3302 let Inst{0} = 0b0; 3303} 3304 3305class sme2_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 3306 string mnemonic> 3307 : sme2_luti_vector_vg2_index<sz, {1,?,?,?,0,0}, vector_ty, VectorIndexH, mnemonic> { 3308 bits<3> i; 3309 let Inst{17-15} = i; 3310} 3311 3312multiclass sme2_luti2_vector_vg2_index<string mnemonic> { 3313 def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; 3314 def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; 3315 def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; 3316} 3317 3318class sme2_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 3319 string mnemonic> 3320 : sme2_luti_vector_vg2_index<sz, {0,1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> { 3321 bits<2> i; 3322 let Inst{16-15} = i; 3323} 3324 3325multiclass sme2_luti4_vector_vg2_index<string mnemonic> { 3326 def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; 3327 def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; 3328 def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; 3329} 3330 3331// SME2 lookup table expand four contiguous registers 3332class sme2_luti_vector_vg4_index<bits<2> sz, bits<5>opc, RegisterOperand vector_ty, 3333 AsmVectorIndexOpnd index_ty, string mnemonic> 3334 : I<(outs vector_ty:$Zd), 3335 (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 3336 mnemonic, "\t$Zd, $ZTt, $Zn$i", 3337 "", []>, Sched<[]> { 3338 bits<5> Zn; 3339 bits<3> Zd; 3340 let Inst{31-19} = 0b1100000010001; 3341 let Inst{18-16} = opc{4-2}; 3342 let Inst{15-14} = 0b10; 3343 let Inst{13-12} = sz; 3344 let Inst{11-10} = opc{1-0}; 3345 let Inst{9-5} = Zn; 3346 let Inst{4-2} = Zd; 3347 let Inst{1-0} = 0b00; 3348} 3349 3350class sme2_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 3351 string mnemonic> 3352 : sme2_luti_vector_vg4_index<sz, {1,?,?,0,0}, vector_ty, VectorIndexS, mnemonic> { 3353 bits<2> i; 3354 let Inst{17-16} = i; 3355} 3356 3357multiclass sme2_luti2_vector_vg4_index<string mnemonic> { 3358 def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; 3359 def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; 3360 def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; 3361} 3362 3363class sme2_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 3364 string mnemonic> 3365 : sme2_luti_vector_vg4_index<sz, {0,1,?,0,0}, vector_ty, VectorIndexD, mnemonic> { 3366 bits<1> i; 3367 let Inst{16} = i; 3368} 3369 3370multiclass sme2_luti4_vector_vg4_index<string mnemonic> { 3371 def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; 3372 def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; 3373} 3374 3375//===----------------------------------------------------------------------===// 3376// SME2 MOV 3377class sme2_mova_vec_to_tile_vg2_multi_base<bits<2> sz, bit v, 3378 RegisterOperand tile_ty, 3379 Operand index_ty, 3380 RegisterOperand vector_ty, 3381 string mnemonic> 3382 : I<(outs tile_ty:$ZAd), 3383 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn), 3384 mnemonic, "\t$ZAd[$Rs, $imm], $Zn", 3385 "", []>, Sched<[]> { 3386 bits<2> Rs; 3387 bits<4> Zn; 3388 let Inst{31-24} = 0b11000000; 3389 let Inst{23-22} = sz; 3390 let Inst{21-16} = 0b000100; 3391 let Inst{15} = v; 3392 let Inst{14-13} = Rs; 3393 let Inst{12-10} = 0b000; 3394 let Inst{9-6} = Zn; 3395 let Inst{5-3} = 0b000; 3396 3397 let Constraints = "$ZAd = $_ZAd"; 3398} 3399 3400multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst, 3401 RegisterOperand tile_or_array_ty, 3402 RegisterOperand rv_ty, 3403 Operand index_ty, 3404 RegisterOperand vector_ty, 3405 string mnemonic, 3406 string vg_acronym=""> { 3407 def : InstAlias<mnemonic # "\t$ZAd[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn", 3408 (inst tile_or_array_ty:$ZAd, rv_ty:$Rs, index_ty:$imm, vector_ty:$Zn), prefer>; 3409 3410} 3411 3412// SME2 move vector to tile, two registers 3413multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> { 3414 3415 def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v, 3416 !if(v, TileVectorOpV8, 3417 TileVectorOpH8), 3418 uimm3s2range, ZZ_b_mul_r, 3419 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 3420 bits<3> imm; 3421 let Inst{2-0} = imm; 3422 } 3423 3424 def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v, 3425 !if(v, TileVectorOpV16, 3426 TileVectorOpH16), 3427 uimm2s2range, ZZ_h_mul_r, 3428 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 3429 bits<1> ZAd; 3430 bits<2> imm; 3431 let Inst{2} = ZAd; 3432 let Inst{1-0} = imm; 3433 } 3434 3435 def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v, 3436 !if(v, TileVectorOpV32, 3437 TileVectorOpH32), 3438 uimm1s2range, ZZ_s_mul_r, 3439 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 3440 bits<2> ZAd; 3441 bits<1> imm; 3442 let Inst{2-1} = ZAd; 3443 let Inst{0} = imm; 3444 } 3445 3446 def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v, 3447 !if(v, TileVectorOpV64, 3448 TileVectorOpH64), 3449 uimm0s2range, ZZ_d_mul_r, 3450 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 3451 bits<3> ZAd; 3452 let Inst{2-0} = ZAd; 3453 } 3454 3455 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>; 3456 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>; 3457 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>; 3458 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>; 3459 3460 def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>; 3461 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>; 3462 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>; 3463 def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>; 3464 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>; 3465 def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>; 3466 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>; 3467 def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>; 3468 3469 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B), 3470 !if(v, TileVectorOpV8, 3471 TileVectorOpH8), 3472 MatrixIndexGPR32Op12_15, 3473 uimm3s2range, ZZ_b_mul_r, 3474 "mov">; 3475 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H), 3476 !if(v, TileVectorOpV16, 3477 TileVectorOpH16), 3478 MatrixIndexGPR32Op12_15, 3479 uimm2s2range, ZZ_h_mul_r, 3480 "mov">; 3481 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S), 3482 !if(v, TileVectorOpV32, 3483 TileVectorOpH32), 3484 MatrixIndexGPR32Op12_15, 3485 uimm1s2range, ZZ_s_mul_r, 3486 "mov">; 3487 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D), 3488 !if(v, TileVectorOpV64, 3489 TileVectorOpH64), 3490 MatrixIndexGPR32Op12_15, 3491 uimm0s2range, ZZ_d_mul_r, 3492 "mov">; 3493 3494 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 3495 !if(v, TileVectorOpV8, 3496 TileVectorOpH8), 3497 MatrixIndexGPR32Op12_15, 3498 uimm3s2range, ZZ_b_mul_r, 3499 "mova">; 3500 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 3501 !if(v, TileVectorOpV16, 3502 TileVectorOpH16), 3503 MatrixIndexGPR32Op12_15, 3504 uimm2s2range, ZZ_h_mul_r, 3505 "mova">; 3506 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 3507 !if(v, TileVectorOpV32, 3508 TileVectorOpH32), 3509 MatrixIndexGPR32Op12_15, 3510 uimm1s2range, ZZ_s_mul_r, 3511 "mova">; 3512 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 3513 !if(v, TileVectorOpV64, 3514 TileVectorOpH64), 3515 MatrixIndexGPR32Op12_15, 3516 uimm0s2range, ZZ_d_mul_r, 3517 "mova">; 3518 3519 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 3520 !if(v, TileVectorOpV8, 3521 TileVectorOpH8), 3522 MatrixIndexGPR32Op12_15, 3523 uimm3s2range, ZZ_b_mul_r, 3524 "mova">; 3525 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 3526 !if(v, TileVectorOpV16, 3527 TileVectorOpH16), 3528 MatrixIndexGPR32Op12_15, 3529 uimm2s2range, ZZ_h_mul_r, 3530 "mova">; 3531 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 3532 !if(v, TileVectorOpV32, 3533 TileVectorOpH32), 3534 MatrixIndexGPR32Op12_15, 3535 uimm1s2range, ZZ_s_mul_r, 3536 "mova">; 3537 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 3538 !if(v, TileVectorOpV64, 3539 TileVectorOpH64), 3540 MatrixIndexGPR32Op12_15, 3541 uimm0s2range, ZZ_d_mul_r, 3542 "mova">; 3543} 3544 3545multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic, 3546 SDPatternOperator int_h, SDPatternOperator int_v>{ 3547 defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>; 3548 defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>; 3549} 3550 3551class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op, 3552 RegisterOperand tile_ty, 3553 Operand index_ty, 3554 RegisterOperand vector_ty, 3555 string mnemonic> 3556 : I<(outs tile_ty:$ZAd), 3557 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, 3558 vector_ty:$Zn), 3559 mnemonic, 3560 "\t$ZAd[$Rs, $imm], $Zn", 3561 "", []>, Sched<[]> { 3562 bits<2> Rs; 3563 bits<3> Zn; 3564 let Inst{31-24} = 0b11000000; 3565 let Inst{23-22} = sz; 3566 let Inst{21-16} = 0b000100; 3567 let Inst{15} = v; 3568 let Inst{14-13} = Rs; 3569 let Inst{12-10} = 0b001; 3570 let Inst{9-7} = Zn; 3571 let Inst{6-3} = 0b0000; 3572 let Inst{2-0} = op; 3573 let Constraints = "$ZAd = $_ZAd"; 3574} 3575 3576// SME2 move vector to tile, four registers 3577multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> { 3578 3579 def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?}, 3580 !if(v, TileVectorOpV8, 3581 TileVectorOpH8), 3582 uimm2s4range, ZZZZ_b_mul_r, 3583 mnemonic>, SMEPseudo2Instr<NAME # _B, 1> { 3584 bits<2> imm; 3585 let Inst{1-0} = imm; 3586 } 3587 3588 def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, {0,?,?}, 3589 !if(v, TileVectorOpV16, 3590 TileVectorOpH16), 3591 uimm1s4range, ZZZZ_h_mul_r, 3592 mnemonic>, SMEPseudo2Instr<NAME # _H, 1> { 3593 bits<1> ZAd; 3594 bits<1> imm; 3595 let Inst{1} = ZAd; 3596 let Inst{0} = imm; 3597 } 3598 3599 def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, {0,?,?}, 3600 !if(v, TileVectorOpV32, 3601 TileVectorOpH32), 3602 uimm0s4range, ZZZZ_s_mul_r, 3603 mnemonic>, SMEPseudo2Instr<NAME # _S, 1> { 3604 bits<2> ZAd; 3605 let Inst{1-0} = ZAd; 3606 } 3607 3608 def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, {?,?,?}, 3609 !if(v, TileVectorOpV64, 3610 TileVectorOpH64), 3611 uimm0s4range, ZZZZ_d_mul_r, 3612 mnemonic>, SMEPseudo2Instr<NAME # _D, 1> { 3613 bits<3> ZAd; 3614 let Inst{2-0} = ZAd; 3615 } 3616 3617 def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>; 3618 def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>; 3619 def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>; 3620 def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>; 3621 3622 def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>; 3623 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>; 3624 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>; 3625 def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>; 3626 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>; 3627 def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>; 3628 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>; 3629 def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>; 3630 3631 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B), 3632 !if(v, TileVectorOpV8, 3633 TileVectorOpH8), 3634 MatrixIndexGPR32Op12_15, 3635 uimm2s4range, ZZZZ_b_mul_r, 3636 "mov">; 3637 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _H), 3638 !if(v, TileVectorOpV16, 3639 TileVectorOpH16), 3640 MatrixIndexGPR32Op12_15, 3641 uimm1s4range, ZZZZ_h_mul_r, 3642 "mov">; 3643 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _S), 3644 !if(v, TileVectorOpV32, 3645 TileVectorOpH32), 3646 MatrixIndexGPR32Op12_15, 3647 uimm0s4range, ZZZZ_s_mul_r, 3648 "mov">; 3649 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _D), 3650 !if(v, TileVectorOpV64, 3651 TileVectorOpH64), 3652 MatrixIndexGPR32Op12_15, 3653 uimm0s4range, ZZZZ_d_mul_r, 3654 "mov">; 3655 3656 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _B), 3657 !if(v, TileVectorOpV8, 3658 TileVectorOpH8), 3659 MatrixIndexGPR32Op12_15, 3660 uimm2s4range, ZZZZ_b_mul_r, 3661 "mova">; 3662 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _H), 3663 !if(v, TileVectorOpV16, 3664 TileVectorOpH16), 3665 MatrixIndexGPR32Op12_15, 3666 uimm1s4range, ZZZZ_h_mul_r, 3667 "mova">; 3668 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _S), 3669 !if(v, TileVectorOpV32, 3670 TileVectorOpH32), 3671 MatrixIndexGPR32Op12_15, 3672 uimm0s4range, ZZZZ_s_mul_r, 3673 "mova">; 3674 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME # _D), 3675 !if(v, TileVectorOpV64, 3676 TileVectorOpH64), 3677 MatrixIndexGPR32Op12_15, 3678 uimm0s4range, ZZZZ_d_mul_r, 3679 "mova">; 3680 3681} 3682 3683multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic, 3684 SDPatternOperator int_h, SDPatternOperator int_v>{ 3685 defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>; 3686 defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>; 3687} 3688 3689// SME Move into Array 3690class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty, 3691 RegisterOperand vector_ty, 3692 string mnemonic, 3693 string vg_acronym=""> 3694 : I<(outs array_ty:$ZAd), 3695 (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm, 3696 vector_ty:$Zn), 3697 mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn", 3698 "", []>, Sched<[]> { 3699 bits<2> Rs; 3700 bits<3> imm; 3701 let Inst{31-15} = 0b11000000000001000; 3702 let Inst{14-13} = Rs; 3703 let Inst{12-11} = 0b01; 3704 let Inst{10-6} = op; 3705 let Inst{5-3} = 0b000; 3706 let Inst{2-0} = imm; 3707 3708 let Constraints = "$ZAd = $_ZAd"; 3709} 3710 3711// MOVA (vector to array, two registers) 3712multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> { 3713 def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64, 3714 ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> { 3715 bits<4> Zn; 3716 let Inst{9-6} = Zn; 3717 } 3718 3719 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>; 3720 3721 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; 3722 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; 3723 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; 3724 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; 3725 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; 3726 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; 3727 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; 3728 def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; 3729 3730 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3731 MatrixOp8, 3732 MatrixIndexGPR32Op8_11, 3733 sme_elm_idx0_7, ZZ_b_mul_r, 3734 "mova">; 3735 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3736 MatrixOp16, 3737 MatrixIndexGPR32Op8_11, 3738 sme_elm_idx0_7, ZZ_h_mul_r, 3739 "mova">; 3740 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3741 MatrixOp32, 3742 MatrixIndexGPR32Op8_11, 3743 sme_elm_idx0_7, ZZ_s_mul_r, 3744 "mova">; 3745 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3746 MatrixOp64, 3747 MatrixIndexGPR32Op8_11, 3748 sme_elm_idx0_7, ZZ_d_mul_r, 3749 "mova">; 3750 3751 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3752 MatrixOp8, 3753 MatrixIndexGPR32Op8_11, 3754 sme_elm_idx0_7, ZZ_b_mul_r, 3755 "mova", "vgx2">; 3756 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3757 MatrixOp16, 3758 MatrixIndexGPR32Op8_11, 3759 sme_elm_idx0_7, ZZ_h_mul_r, 3760 "mova", "vgx2">; 3761 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3762 MatrixOp32, 3763 MatrixIndexGPR32Op8_11, 3764 sme_elm_idx0_7, ZZ_s_mul_r, 3765 "mova", "vgx2">; 3766 3767 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3768 MatrixOp8, 3769 MatrixIndexGPR32Op8_11, 3770 sme_elm_idx0_7, ZZ_b_mul_r, 3771 "mov">; 3772 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3773 MatrixOp16, 3774 MatrixIndexGPR32Op8_11, 3775 sme_elm_idx0_7, ZZ_h_mul_r, 3776 "mov">; 3777 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3778 MatrixOp32, 3779 MatrixIndexGPR32Op8_11, 3780 sme_elm_idx0_7, ZZ_s_mul_r, 3781 "mov">; 3782 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3783 MatrixOp64, 3784 MatrixIndexGPR32Op8_11, 3785 sme_elm_idx0_7, ZZ_d_mul_r, 3786 "mov">; 3787 3788 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3789 MatrixOp8, 3790 MatrixIndexGPR32Op8_11, 3791 sme_elm_idx0_7, ZZ_b_mul_r, 3792 "mov", "vgx2">; 3793 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3794 MatrixOp16, 3795 MatrixIndexGPR32Op8_11, 3796 sme_elm_idx0_7, ZZ_h_mul_r, 3797 "mov", "vgx2">; 3798 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3799 MatrixOp32, 3800 MatrixIndexGPR32Op8_11, 3801 sme_elm_idx0_7, ZZ_s_mul_r, 3802 "mov", "vgx2">; 3803 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME), 3804 MatrixOp64, 3805 MatrixIndexGPR32Op8_11, 3806 sme_elm_idx0_7, ZZ_d_mul_r, 3807 "mov", "vgx2">; 3808} 3809 3810// MOVA (vector to array, four registers) 3811multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> { 3812 def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64, 3813 ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> { 3814 bits<3> Zn; 3815 let Inst{9-7} = Zn; 3816 } 3817 3818 def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>; 3819 3820 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv16i8, sme_elm_idx0_7, tileslice16>; 3821 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8i16, sme_elm_idx0_7, tileslice16>; 3822 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8f16, sme_elm_idx0_7, tileslice16>; 3823 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv8bf16, sme_elm_idx0_7, tileslice16>; 3824 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4i32, sme_elm_idx0_7, tileslice16>; 3825 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv4f32, sme_elm_idx0_7, tileslice16>; 3826 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>; 3827 def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>; 3828 3829 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3830 MatrixOp8, 3831 MatrixIndexGPR32Op8_11, 3832 sme_elm_idx0_7, ZZZZ_b_mul_r, 3833 "mova">; 3834 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3835 MatrixOp16, 3836 MatrixIndexGPR32Op8_11, 3837 sme_elm_idx0_7, ZZZZ_h_mul_r, 3838 "mova">; 3839 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3840 MatrixOp32, 3841 MatrixIndexGPR32Op8_11, 3842 sme_elm_idx0_7, ZZZZ_s_mul_r, 3843 "mova">; 3844 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3845 MatrixOp64, 3846 MatrixIndexGPR32Op8_11, 3847 sme_elm_idx0_7, ZZZZ_d_mul_r, 3848 "mova">; 3849 3850 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3851 MatrixOp8, 3852 MatrixIndexGPR32Op8_11, 3853 sme_elm_idx0_7, ZZZZ_b_mul_r, 3854 "mova", "vgx4">; 3855 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3856 MatrixOp16, 3857 MatrixIndexGPR32Op8_11, 3858 sme_elm_idx0_7, ZZZZ_h_mul_r, 3859 "mova", "vgx4">; 3860 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3861 MatrixOp32, 3862 MatrixIndexGPR32Op8_11, 3863 sme_elm_idx0_7, ZZZZ_s_mul_r, 3864 "mova", "vgx4">; 3865 3866 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3867 MatrixOp8, 3868 MatrixIndexGPR32Op8_11, 3869 sme_elm_idx0_7, ZZZZ_b_mul_r, 3870 "mov">; 3871 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3872 MatrixOp16, 3873 MatrixIndexGPR32Op8_11, 3874 sme_elm_idx0_7, ZZZZ_h_mul_r, 3875 "mov">; 3876 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3877 MatrixOp32, 3878 MatrixIndexGPR32Op8_11, 3879 sme_elm_idx0_7, ZZZZ_s_mul_r, 3880 "mov">; 3881 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3882 MatrixOp64, 3883 MatrixIndexGPR32Op8_11, 3884 sme_elm_idx0_7, ZZZZ_d_mul_r, 3885 "mov">; 3886 3887 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3888 MatrixOp8, 3889 MatrixIndexGPR32Op8_11, 3890 sme_elm_idx0_7, ZZZZ_b_mul_r, 3891 "mov", "vgx4">; 3892 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3893 MatrixOp16, 3894 MatrixIndexGPR32Op8_11, 3895 sme_elm_idx0_7, ZZZZ_h_mul_r, 3896 "mov", "vgx4">; 3897 defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME), 3898 MatrixOp32, 3899 MatrixIndexGPR32Op8_11, 3900 sme_elm_idx0_7, ZZZZ_s_mul_r, 3901 "mov", "vgx4">; 3902 defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME), 3903 MatrixOp64, 3904 MatrixIndexGPR32Op8_11, 3905 sme_elm_idx0_7, ZZZZ_d_mul_r, 3906 "mov", "vgx4">; 3907 3908} 3909 3910class sme2_mova_tile_to_vec_vg2_multi_base<bits<2> sz, bit v, bits<3> op, 3911 RegisterOperand vector_ty, 3912 RegisterOperand tile_ty, 3913 Operand index_ty, 3914 string mnemonic> 3915 : I<!if(op{1}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)), 3916 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 3917 mnemonic, 3918 "\t$Zd, $ZAn[$Rs, $imm]", 3919 "", []>, Sched<[]> { 3920 bits<4> Zd; 3921 bits<2> Rs; 3922 let Inst{31-24} = 0b11000000; 3923 let Inst{23-22} = sz; 3924 let Inst{21-16} = 0b000110; 3925 let Inst{15} = v; 3926 let Inst{14-13} = Rs; 3927 let Inst{12-11} = 0b00; 3928 let Inst{10-8} = op; 3929 let Inst{4-1} = Zd; 3930 let Inst{0} = 0b0; 3931 3932 let Constraints = !if(op{1}, "$ZAn = $_ZAn", ""); 3933} 3934 3935multiclass sme2_mova_tile_or_array_to_vec_aliases<int op, Instruction inst, 3936 RegisterOperand vector_ty, 3937 RegisterOperand tile_or_array_ty, 3938 RegisterOperand rv_ty, 3939 Operand index_ty, 3940 string mnemonic, 3941 string vg_acronym=""> { 3942def : InstAlias<mnemonic # "\t$Zd, $ZAn[$Rs, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]", 3943 (inst vector_ty:$Zd, tile_or_array_ty:$ZAn, rv_ty:$Rs, index_ty:$imm), op>; 3944 3945} 3946 3947multiclass sme2_mova_tile_to_vec_vg2_multi_inst<bit v, bits<3> opc, string mnemonic> { 3948 3949 def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, opc, ZZ_b_mul_r, 3950 !if(v, TileVectorOpV8, 3951 TileVectorOpH8), 3952 uimm3s2range, mnemonic> { 3953 bits<3> imm; 3954 let Inst{7-5} = imm; 3955 } 3956 3957 def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, opc, ZZ_h_mul_r, 3958 !if(v, TileVectorOpV16, 3959 TileVectorOpH16), 3960 uimm2s2range, mnemonic> { 3961 bits<1> ZAn; 3962 bits<2> imm; 3963 let Inst{7} = ZAn; 3964 let Inst{6-5} = imm; 3965 } 3966 3967 def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, opc, ZZ_s_mul_r, 3968 !if(v, TileVectorOpV32, 3969 TileVectorOpH32), 3970 uimm1s2range, mnemonic> { 3971 bits<2> ZAn; 3972 bits<1> imm; 3973 let Inst{7-6} = ZAn; 3974 let Inst{5} = imm; 3975 } 3976 3977 def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, opc, ZZ_d_mul_r, 3978 !if(v, TileVectorOpV64, 3979 TileVectorOpH64), 3980 uimm0s2range, mnemonic> { 3981 bits<3> ZAn; 3982 let Inst{7-5} = ZAn; 3983 } 3984 3985 if !eq(mnemonic, "mova") then { 3986 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _B), 3987 ZZ_b_mul_r, 3988 !if(v, TileVectorOpV8, 3989 TileVectorOpH8), 3990 MatrixIndexGPR32Op12_15, 3991 uimm3s2range, "mov">; 3992 defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast<Instruction>(NAME # _H), 3993 ZZ_h_mul_r, 3994 !if(v, TileVectorOpV16, 3995 TileVectorOpH16), 3996 MatrixIndexGPR32Op12_15, 3997 uimm2s2range, "mov">; 3998 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S), 3999 ZZ_s_mul_r, 4000 !if(v, TileVectorOpV32, 4001 TileVectorOpH32), 4002 MatrixIndexGPR32Op12_15, 4003 uimm1s2range, "mov">; 4004 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D), 4005 ZZ_d_mul_r, 4006 !if(v, TileVectorOpV64, 4007 TileVectorOpH64), 4008 MatrixIndexGPR32Op12_15, 4009 uimm0s2range, "mov">; 4010 } 4011 4012 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _B), 4013 ZZ_b_mul_r, 4014 !if(v, TileVectorOpV8, 4015 TileVectorOpH8), 4016 MatrixIndexGPR32Op12_15, 4017 uimm3s2range, mnemonic>; 4018 defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast<Instruction>(NAME # _H), 4019 ZZ_h_mul_r, 4020 !if(v, TileVectorOpV16, 4021 TileVectorOpH16), 4022 MatrixIndexGPR32Op12_15, 4023 uimm2s2range, mnemonic>; 4024 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S), 4025 ZZ_s_mul_r, 4026 !if(v, TileVectorOpV32, 4027 TileVectorOpH32), 4028 MatrixIndexGPR32Op12_15, 4029 uimm1s2range, mnemonic>; 4030 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D), 4031 ZZ_d_mul_r, 4032 !if(v, TileVectorOpV64, 4033 TileVectorOpH64), 4034 MatrixIndexGPR32Op12_15, 4035 uimm0s2range, mnemonic>; 4036 4037} 4038 4039// SME2 move tile to vector, two registers 4040multiclass sme2_mova_tile_to_vec_vg2_multi<string mnemonic>{ 4041 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b000, mnemonic>; 4042 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b000, mnemonic>; 4043} 4044 4045// SME2p1 move tile to vector and zero tile, two registers 4046multiclass sme2p1_movaz_tile_to_vec_vg2<string mnemonic>{ 4047 defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, 0b010, mnemonic>; 4048 defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, 0b010, mnemonic>; 4049} 4050 4051class sme2_mova_tile_to_vec_vg4_multi_base<bits<2> sz, bit v, bits<6> op, 4052 RegisterOperand vector_ty, 4053 RegisterOperand tile_ty, 4054 Operand index_ty, 4055 string mnemonic> 4056 : I<!if(op{4}, (outs vector_ty:$Zd, tile_ty:$_ZAn), (outs vector_ty:$Zd)), 4057 (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 4058 mnemonic, 4059 "\t$Zd, $ZAn[$Rs, $imm]", 4060 "", []>, Sched<[]> { 4061 bits<3> Zd; 4062 bits<2> Rs; 4063 let Inst{31-24} = 0b11000000; 4064 let Inst{23-22} = sz; 4065 let Inst{21-16} = 0b000110; 4066 let Inst{15} = v; 4067 let Inst{14-13} = Rs; 4068 let Inst{12-11} = 0b00; 4069 let Inst{10-5} = op{5-0}; 4070 let Inst{4-2} = Zd; 4071 let Inst{1-0} = 0b00; 4072 4073 let Constraints = !if(op{4}, "$ZAn = $_ZAn", ""); 4074} 4075 4076multiclass sme2_mova_tile_to_vec_vg4_multi_base<bit v, bits<3> opc, string mnemonic> { 4077 4078 def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, {opc,0,?,?}, 4079 ZZZZ_b_mul_r, 4080 !if(v, TileVectorOpV8, 4081 TileVectorOpH8), 4082 uimm2s4range, mnemonic> { 4083 bits<2> imm; 4084 let Inst{6-5} = imm; 4085 } 4086 4087 def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, {opc,0,?,?}, 4088 ZZZZ_h_mul_r, 4089 !if(v, TileVectorOpV16, 4090 TileVectorOpH16), 4091 uimm1s4range, mnemonic> { 4092 bits<1> ZAn; 4093 bits<1> imm; 4094 let Inst{6} = ZAn; 4095 let Inst{5} = imm; 4096 } 4097 4098 def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, {opc,0,?,?}, 4099 ZZZZ_s_mul_r, 4100 !if(v, TileVectorOpV32, 4101 TileVectorOpH32), 4102 uimm0s4range, mnemonic> { 4103 bits<2> ZAn; 4104 let Inst{6-5} = ZAn; 4105 } 4106 4107 def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, {opc,?,?,?}, 4108 ZZZZ_d_mul_r, 4109 !if(v, TileVectorOpV64, 4110 TileVectorOpH64), 4111 uimm0s4range, mnemonic> { 4112 bits<3> ZAn; 4113 let Inst{7-5} = ZAn; 4114 } 4115 4116 if !eq(mnemonic, "mova") then { 4117 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _B), 4118 ZZZZ_b_mul_r, 4119 !if(v, TileVectorOpV8, 4120 TileVectorOpH8), 4121 MatrixIndexGPR32Op12_15, 4122 uimm2s4range, "mov">; 4123 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _H), 4124 ZZZZ_h_mul_r, 4125 !if(v, TileVectorOpV16, 4126 TileVectorOpH16), 4127 MatrixIndexGPR32Op12_15, 4128 uimm1s4range, "mov">; 4129 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _S), 4130 ZZZZ_s_mul_r, 4131 !if(v, TileVectorOpV32, 4132 TileVectorOpH32), 4133 MatrixIndexGPR32Op12_15, 4134 uimm0s4range, "mov">; 4135 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME # _D), 4136 ZZZZ_d_mul_r, 4137 !if(v, TileVectorOpV64, 4138 TileVectorOpH64), 4139 MatrixIndexGPR32Op12_15, 4140 uimm0s4range, "mov">; 4141 } 4142 4143 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _B), 4144 ZZZZ_b_mul_r, 4145 !if(v, TileVectorOpV8, 4146 TileVectorOpH8), 4147 MatrixIndexGPR32Op12_15, 4148 uimm2s4range, mnemonic>; 4149 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _H), 4150 ZZZZ_h_mul_r, 4151 !if(v, TileVectorOpV16, 4152 TileVectorOpH16), 4153 MatrixIndexGPR32Op12_15, 4154 uimm1s4range, mnemonic>; 4155 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _S), 4156 ZZZZ_s_mul_r, 4157 !if(v, TileVectorOpV32, 4158 TileVectorOpH32), 4159 MatrixIndexGPR32Op12_15, 4160 uimm0s4range, mnemonic>; 4161 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME # _D), 4162 ZZZZ_d_mul_r, 4163 !if(v, TileVectorOpV64, 4164 TileVectorOpH64), 4165 MatrixIndexGPR32Op12_15, 4166 uimm0s4range, mnemonic>; 4167 4168} 4169 4170// SME2 move tile to vector, four registers 4171multiclass sme2_mova_tile_to_vec_vg4_multi<string mnemonic>{ 4172 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b100, mnemonic>; 4173 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b100, mnemonic>; 4174} 4175 4176// SME2p1 move tile to vector and zero tile, four registers 4177multiclass sme2p1_movaz_tile_to_vec_vg4<string mnemonic>{ 4178 defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, 0b110, mnemonic>; 4179 defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, 0b110, mnemonic>; 4180} 4181 4182 4183class sme2_mova_array_to_vec_vg24_multi<bits<4>op, RegisterOperand vector_ty, 4184 RegisterOperand array_ty, 4185 string mnemonic, string vg_acronym> 4186 : I<!if(op{2}, (outs vector_ty:$Zd, array_ty:$_ZAn), (outs vector_ty:$Zd)), 4187 (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm), 4188 mnemonic, 4189 "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]", 4190 "", []>, Sched<[]> { 4191 bits<2> Rs; 4192 bits<3> imm; 4193 let Inst{31-15} = 0b11000000000001100; 4194 let Inst{14-13} = Rs; 4195 let Inst{12-11} = 0b01; 4196 let Inst{10-8} = op{3-1}; 4197 let Inst{7-5} = imm; 4198 let Inst{1} = op{0}; 4199 let Inst{0} = 0b0; 4200 let Constraints = !if(op{2}, "$ZAn = $_ZAn", ""); 4201} 4202 4203// move array to vector, two registers. 4204multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> { 4205 def NAME : sme2_mova_array_to_vec_vg24_multi<{opc,?}, ZZ_d_mul_r, MatrixOp64, 4206 mnemonic, "vgx2"> { 4207 bits<4> Zd; 4208 let Inst{4-1} = Zd; 4209 } 4210 4211 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4212 ZZ_b_mul_r, MatrixOp8, 4213 MatrixIndexGPR32Op8_11, 4214 sme_elm_idx0_7, mnemonic>; 4215 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4216 ZZ_h_mul_r, MatrixOp16, 4217 MatrixIndexGPR32Op8_11, 4218 sme_elm_idx0_7, mnemonic>; 4219 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4220 ZZ_s_mul_r, MatrixOp32, 4221 MatrixIndexGPR32Op8_11, 4222 sme_elm_idx0_7, mnemonic>; 4223 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4224 ZZ_d_mul_r, MatrixOp64, 4225 MatrixIndexGPR32Op8_11, 4226 sme_elm_idx0_7, mnemonic>; 4227 4228 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4229 ZZ_b_mul_r, MatrixOp8, 4230 MatrixIndexGPR32Op8_11, 4231 sme_elm_idx0_7, mnemonic, "vgx2">; 4232 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4233 ZZ_h_mul_r, MatrixOp16, 4234 MatrixIndexGPR32Op8_11, 4235 sme_elm_idx0_7, mnemonic, "vgx2">; 4236 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4237 ZZ_s_mul_r, MatrixOp32, 4238 MatrixIndexGPR32Op8_11, 4239 sme_elm_idx0_7, mnemonic, "vgx2">; 4240 4241 if !eq(mnemonic, "mova") then { 4242 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4243 ZZ_b_mul_r, MatrixOp8, 4244 MatrixIndexGPR32Op8_11, 4245 sme_elm_idx0_7, "mov">; 4246 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4247 ZZ_h_mul_r, MatrixOp16, 4248 MatrixIndexGPR32Op8_11, 4249 sme_elm_idx0_7, "mov">; 4250 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4251 ZZ_s_mul_r, MatrixOp32, 4252 MatrixIndexGPR32Op8_11, 4253 sme_elm_idx0_7, "mov">; 4254 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4255 ZZ_d_mul_r, MatrixOp64, 4256 MatrixIndexGPR32Op8_11, 4257 sme_elm_idx0_7, "mov">; 4258 4259 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4260 ZZ_b_mul_r, MatrixOp8, 4261 MatrixIndexGPR32Op8_11, 4262 sme_elm_idx0_7, "mov", "vgx2">; 4263 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4264 ZZ_h_mul_r, MatrixOp16, 4265 MatrixIndexGPR32Op8_11, 4266 sme_elm_idx0_7, "mov", "vgx2">; 4267 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4268 ZZ_s_mul_r, MatrixOp32, 4269 MatrixIndexGPR32Op8_11, 4270 sme_elm_idx0_7, "mov", "vgx2">; 4271 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME), 4272 ZZ_d_mul_r, MatrixOp64, 4273 MatrixIndexGPR32Op8_11, 4274 sme_elm_idx0_7, "mov", "vgx2">; 4275 } 4276} 4277 4278// move array to vector, four registers 4279multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> { 4280 def NAME : sme2_mova_array_to_vec_vg24_multi<opc, ZZZZ_d_mul_r, MatrixOp64, 4281 mnemonic, "vgx4"> { 4282 bits<3> Zd; 4283 let Inst{4-2} = Zd; 4284 } 4285 4286 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4287 ZZZZ_b_mul_r, MatrixOp8, 4288 MatrixIndexGPR32Op8_11, 4289 sme_elm_idx0_7, mnemonic>; 4290 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4291 ZZZZ_h_mul_r, MatrixOp16, 4292 MatrixIndexGPR32Op8_11, 4293 sme_elm_idx0_7, mnemonic>; 4294 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4295 ZZZZ_s_mul_r, MatrixOp32, 4296 MatrixIndexGPR32Op8_11, 4297 sme_elm_idx0_7, mnemonic>; 4298 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4299 ZZZZ_d_mul_r, MatrixOp64, 4300 MatrixIndexGPR32Op8_11, 4301 sme_elm_idx0_7, mnemonic>; 4302 4303 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4304 ZZZZ_b_mul_r, MatrixOp8, 4305 MatrixIndexGPR32Op8_11, 4306 sme_elm_idx0_7, mnemonic, "vgx4">; 4307 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4308 ZZZZ_h_mul_r, MatrixOp16, 4309 MatrixIndexGPR32Op8_11, 4310 sme_elm_idx0_7, mnemonic, "vgx4">; 4311 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4312 ZZZZ_s_mul_r, MatrixOp32, 4313 MatrixIndexGPR32Op8_11, 4314 sme_elm_idx0_7, mnemonic, "vgx4">; 4315 4316 if !eq(mnemonic, "mova") then { 4317 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4318 ZZZZ_b_mul_r, MatrixOp8, 4319 MatrixIndexGPR32Op8_11, 4320 sme_elm_idx0_7, "mov">; 4321 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4322 ZZZZ_h_mul_r, MatrixOp16, 4323 MatrixIndexGPR32Op8_11, 4324 sme_elm_idx0_7, "mov">; 4325 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4326 ZZZZ_s_mul_r, MatrixOp32, 4327 MatrixIndexGPR32Op8_11, 4328 sme_elm_idx0_7, "mov">; 4329 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4330 ZZZZ_d_mul_r, MatrixOp64, 4331 MatrixIndexGPR32Op8_11, 4332 sme_elm_idx0_7, "mov">; 4333 4334 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4335 ZZZZ_b_mul_r, MatrixOp8, 4336 MatrixIndexGPR32Op8_11, 4337 sme_elm_idx0_7, "mov", "vgx4">; 4338 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4339 ZZZZ_h_mul_r, MatrixOp16, 4340 MatrixIndexGPR32Op8_11, 4341 sme_elm_idx0_7, "mov", "vgx4">; 4342 defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast<Instruction>(NAME), 4343 ZZZZ_s_mul_r, MatrixOp32, 4344 MatrixIndexGPR32Op8_11, 4345 sme_elm_idx0_7, "mov", "vgx4">; 4346 defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast<Instruction>(NAME), 4347 ZZZZ_d_mul_r, MatrixOp64, 4348 MatrixIndexGPR32Op8_11, 4349 sme_elm_idx0_7, "mov", "vgx4">; 4350 } 4351} 4352 4353//===----------------------------------------------------------------------===// 4354// SME2 multi-vec saturating shift right narrow 4355class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> 4356 : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), 4357 mnemonic, "\t$Zd, $Zn, $imm4", 4358 "", []>, Sched<[]> { 4359 bits<4> imm4; 4360 bits<4> Zn; 4361 bits<5> Zd; 4362 let Inst{31-21} = 0b11000001111; 4363 let Inst{20} = op; 4364 let Inst{19-16} = imm4; 4365 let Inst{15-10} = 0b110101; 4366 let Inst{9-6} = Zn; 4367 let Inst{5} = u; 4368 let Inst{4-0} = Zd; 4369} 4370 4371multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> { 4372 def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>; 4373 4374 def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>; 4375} 4376 4377class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty, 4378 RegisterOperand vector_ty, Operand imm_ty, 4379 string mnemonic> 4380 : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm), 4381 mnemonic, "\t$Zd, $Zn, $imm", 4382 "", []>, Sched<[]> { 4383 bits<3> Zn; 4384 bits<5> Zd; 4385 let Inst{31-24} = 0b11000001; 4386 let Inst{23-22} = sz; 4387 let Inst{21} = 0b1; 4388 // Inst{20-16} = imm5; 4389 let Inst{15-11} = 0b11011; 4390 let Inst{10} = op{2}; 4391 let Inst{9-7} = Zn; 4392 let Inst{6-5} = op{1-0}; 4393 let Inst{4-0} = Zd; 4394} 4395 4396multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> { 4397 def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32, 4398 mnemonic>{ 4399 bits<5> imm; 4400 let Inst{20-16} = imm; 4401 } 4402 def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64, 4403 mnemonic> { 4404 bits<6> imm; 4405 let Inst{22} = imm{5}; 4406 let Inst{20-16} = imm{4-0}; 4407 } 4408 4409 def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>; 4410 def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>; 4411} 4412 4413//===----------------------------------------------------------------------===// 4414// SME2 Multi-vector - SVE Select 4415class sme2_sel_vector_vg24<bits<2> sz, bits<4> op, RegisterOperand vector_ty, 4416 string mnemonic> 4417 : I<(outs vector_ty:$Zd), 4418 (ins PNRAny_p8to15:$PNg, vector_ty:$Zn, vector_ty:$Zm), 4419 mnemonic, "\t$Zd, $PNg, $Zn, $Zm", 4420 "", []>, Sched<[]> { 4421 bits<3> PNg; 4422 let Inst{31-24} = 0b11000001; 4423 let Inst{23-22} = sz; 4424 let Inst{21} = 0b1; 4425 let Inst{17-16} = op{3-2}; 4426 let Inst{15-13} = 0b100; 4427 let Inst{12-10} = PNg; 4428 let Inst{6} = op{1}; 4429 let Inst{5} = 0b0; 4430 let Inst{1} = op{0}; 4431 let Inst{0} = 0b0; 4432} 4433 4434class sme2_sel_vector_vg2<bits<2> sz, RegisterOperand vector_ty, 4435 string mnemonic> 4436 : sme2_sel_vector_vg24<sz, {?,0,?,?}, vector_ty, mnemonic> { 4437 bits<4> Zm; 4438 bits<4> Zn; 4439 bits<4> Zd; 4440 let Inst{20-17} = Zm; 4441 let Inst{9-6} = Zn; 4442 let Inst{4-1} = Zd; 4443} 4444 4445multiclass sme2_sel_vector_vg2<string mnemonic>{ 4446 def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>; 4447 def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>; 4448 def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>; 4449 def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>; 4450} 4451class sme2_sel_vector_vg4<bits<2> sz, RegisterOperand vector_ty, 4452 string mnemonic> 4453 : sme2_sel_vector_vg24<sz, 0b0100, vector_ty, mnemonic> { 4454 bits<3> Zm; 4455 bits<3> Zn; 4456 bits<3> Zd; 4457 let Inst{20-18} = Zm; 4458 let Inst{9-7} = Zn; 4459 let Inst{4-2} = Zd; 4460} 4461multiclass sme2_sel_vector_vg4<string mnemonic> { 4462 def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>; 4463 def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>; 4464 def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>; 4465 def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>; 4466} 4467 4468//===----------------------------------------------------------------------===// 4469// Non contiguous Load and Store 4470 4471class sme2_ld_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n, 4472 RegisterOperand multi_vector_ty, 4473 RegisterOperand gpr_ty, 4474 string mnemonic> 4475 : I<(outs multi_vector_ty:$Zt), 4476 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4477 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", 4478 "", []>, Sched<[]> { 4479 bits<5> Rm; 4480 bits<3> PNg; 4481 bits<5> Rn; 4482 bits<4> Zt; 4483 let Inst{31-21} = 0b10100001000; 4484 let Inst{20-16} = Rm; 4485 let Inst{15} = 0b0; 4486 let Inst{14-13} = msz; 4487 let Inst{12-10} = PNg; 4488 let Inst{9-5} = Rn; 4489 let Inst{4} = Zt{3}; 4490 let Inst{3} = n; 4491 let Inst{2-0} = Zt{2-0}; 4492 4493 let mayLoad = 1; 4494} 4495 4496class sme2_ld_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n, 4497 RegisterOperand multi_vector_ty, 4498 RegisterOperand gpr_ty, 4499 string mnemonic> 4500 : I<(outs multi_vector_ty:$Zt), 4501 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4502 mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", 4503 "", []>, Sched<[]> { 4504 bits<5> Rm; 4505 bits<3> PNg; 4506 bits<5> Rn; 4507 bits<3> Zt; 4508 let Inst{31-21} = 0b10100001000; 4509 let Inst{20-16} = Rm; 4510 let Inst{15} = 0b1; 4511 let Inst{14-13} = msz; 4512 let Inst{12-10} = PNg; 4513 let Inst{9-5} = Rn; 4514 let Inst{4} = Zt{2}; 4515 let Inst{3} = n; 4516 let Inst{2} = 0b0; 4517 let Inst{1-0} = Zt{1-0}; 4518 4519 let mayLoad = 1; 4520} 4521 4522class sme2_ld_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op, 4523 RegisterOperand multi_vector_ty, 4524 Operand index_ty, 4525 string mnemonic> 4526 : I<(outs multi_vector_ty:$Zt), 4527 (ins PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 4528 mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", 4529 "", []>, Sched<[]> { 4530 bits<4> imm4; 4531 bits<3> PNg; 4532 bits<5> Rn; 4533 let Inst{31-20} = 0b101000010100; 4534 let Inst{19-16} = imm4; 4535 let Inst{15} = op{1}; 4536 let Inst{14-13} = msz; 4537 let Inst{12-10} = PNg; 4538 let Inst{9-5} = Rn; 4539 let Inst{3} = n; 4540 let Inst{2} = op{0}; 4541 4542 let mayLoad = 1; 4543} 4544 4545multiclass sme2_ld_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n, 4546 RegisterOperand multi_vector_ty, 4547 Operand index_ty, 4548 string mnemonic>{ 4549 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, {0,?}, 4550 multi_vector_ty, 4551 index_ty, mnemonic> { 4552 bits<4> Zt; 4553 let Inst{4} = Zt{3}; 4554 let Inst{2-0} = Zt{2-0}; 4555 } 4556 4557 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]", 4558 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; 4559} 4560 4561multiclass sme2_ld_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n, 4562 RegisterOperand multi_vector_ty, 4563 Operand index_ty, 4564 string mnemonic> { 4565 def NAME : sme2_ld_vector_vg24_multi_scalar_immediate<msz, n, 0b10, 4566 multi_vector_ty, 4567 index_ty, mnemonic> { 4568 bits<3> Zt; 4569 let Inst{4} = Zt{2}; 4570 let Inst{1-0} = Zt{1-0}; 4571 } 4572 4573 def : InstAlias<mnemonic # "\t$Zt, $PNg/z, [$Rn]", 4574 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, 0), 1>; 4575} 4576 4577//===----------------------------------------------------------------------===// 4578// SME2 Non-Contiguous Store 4579class sme2_st_vector_vg2_multi_scalar_scalar<bits<2> msz, bit n, 4580 RegisterOperand multi_vector_ty, 4581 RegisterOperand gpr_ty, 4582 string mnemonic> 4583 : I<(outs ), 4584 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4585 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", 4586 "", []>, Sched<[]> { 4587 bits<5> Rm; 4588 bits<3> PNg; 4589 bits<5> Rn; 4590 bits<4> Zt; 4591 let Inst{31-21} = 0b10100001001; 4592 let Inst{20-16} = Rm; 4593 let Inst{15} = 0b0; 4594 let Inst{14-13} = msz; 4595 let Inst{12-10} = PNg; 4596 let Inst{9-5} = Rn; 4597 let Inst{4} = Zt{3}; 4598 let Inst{3} = n; 4599 let Inst{2-0} = Zt{2-0}; 4600 4601 let mayStore = 1; 4602} 4603 4604class sme2_st_vector_vg4_multi_scalar_scalar<bits<2> msz, bit n, 4605 RegisterOperand multi_vector_ty, 4606 RegisterOperand gpr_ty, 4607 string mnemonic> 4608 : I<(outs ), 4609 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), 4610 mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", 4611 "", []>, Sched<[]> { 4612 bits<5> Rm; 4613 bits<3> PNg; 4614 bits<5> Rn; 4615 bits<3> Zt; 4616 let Inst{31-21} = 0b10100001001; 4617 let Inst{20-16} = Rm; 4618 let Inst{15} = 0b1; 4619 let Inst{14-13} = msz; 4620 let Inst{12-10} = PNg; 4621 let Inst{9-5} = Rn; 4622 let Inst{4} = Zt{2}; 4623 let Inst{3} = n; 4624 let Inst{2} = 0b0; 4625 let Inst{1-0} = Zt{1-0}; 4626 4627 let mayStore = 1; 4628} 4629 4630class sme2_st_vector_vg24_multi_scalar_immediate<bits<2> msz, bit n, bits<2> op, 4631 RegisterOperand multi_vector_ty, 4632 Operand index_ty, 4633 string mnemonic> 4634 : I<(outs ), 4635 (ins multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 4636 mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", 4637 "", []>, Sched<[]> { 4638 bits<4> imm4; 4639 bits<3> PNg; 4640 bits<5> Rn; 4641 let Inst{31-20} = 0b101000010110; 4642 let Inst{19-16} = imm4; 4643 let Inst{15} = op{1}; 4644 let Inst{14-13} = msz; 4645 let Inst{12-10} = PNg; 4646 let Inst{9-5} = Rn; 4647 let Inst{3} = n; 4648 let Inst{2} = op{0}; 4649 4650 let mayStore = 1; 4651} 4652 4653 4654multiclass sme2_st_vector_vg2_multi_scalar_immediate<bits<2> msz, bit n, 4655 RegisterOperand multi_vector_ty, 4656 Operand index_ty, 4657 string mnemonic> { 4658 def NAME: sme2_st_vector_vg24_multi_scalar_immediate<msz, n, {0,?}, 4659 multi_vector_ty, 4660 index_ty, mnemonic> { 4661 bits<4> Zt; 4662 let Inst{4} = Zt{3}; 4663 let Inst{2-0} = Zt{2-0}; 4664 } 4665 4666 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]", 4667 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; 4668} 4669 4670multiclass sme2_st_vector_vg4_multi_scalar_immediate<bits<2> msz, bit n, 4671 RegisterOperand multi_vector_ty, 4672 Operand index_ty, 4673 string mnemonic> { 4674 def NAME : sme2_st_vector_vg24_multi_scalar_immediate<msz, n, 0b10, 4675 multi_vector_ty, 4676 index_ty, mnemonic> { 4677 bits<3> Zt; 4678 let Inst{4} = Zt{2}; 4679 let Inst{1-0} = Zt{1-0}; 4680 } 4681 4682 def : InstAlias<mnemonic # "\t$Zt, $PNg, [$Rn]", 4683 (!cast<Instruction>(NAME) multi_vector_ty:$Zt, PNRAny_p8to15:$PNg, GPR64sp:$Rn,0), 1>; 4684} 4685 4686//===----------------------------------------------------------------------===// 4687// SME2.1 4688//===----------------------------------------------------------------------===// 4689// SME zeroing move array to vector 4690class sme2p1_movaz_tile_to_vec_base<bits<2> sz, bit q, bit v, ZPRRegOp vector_ty, 4691 RegisterOperand tile_ty, Operand index_ty, 4692 string mnemonic> 4693 : I<(outs vector_ty:$Zd, tile_ty:$ZAn), 4694 (ins tile_ty:$_ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), 4695 mnemonic, "\t$Zd, $ZAn[$Rs, $imm]", 4696 "", []>, Sched<[]> { 4697 bits<2> Rs; 4698 bits<5> Zd; 4699 let Inst{31-24} = 0b11000000; 4700 let Inst{23-22} = sz; 4701 let Inst{21-17} = 0b00001; 4702 let Inst{16} = q; 4703 let Inst{15} = v; 4704 let Inst{14-13} = Rs; 4705 let Inst{12-9} = 0b0001; 4706 let Inst{4-0} = Zd; 4707 let Constraints = "$ZAn = $_ZAn"; 4708} 4709 4710multiclass sme2p1_movaz_tile_to_vec_base<bit v, string mnemonic> { 4711 def _B : sme2p1_movaz_tile_to_vec_base<0b00, 0b0, v, ZPR8, 4712 !if(v, TileVectorOpV8, TileVectorOpH8), 4713 sme_elm_idx0_15, mnemonic> { 4714 bits<4> imm; 4715 let Inst{8-5} = imm; 4716 } 4717 4718 def _H : sme2p1_movaz_tile_to_vec_base<0b01, 0b0, v, ZPR16, 4719 !if(v, TileVectorOpV16, TileVectorOpH16), 4720 sme_elm_idx0_7, mnemonic> { 4721 bits<1> ZAn; 4722 bits<3> imm; 4723 let Inst{8} = ZAn; 4724 let Inst{7-5} = imm; 4725 } 4726 4727 def _S : sme2p1_movaz_tile_to_vec_base<0b10, 0b0, v, ZPR32, 4728 !if(v, TileVectorOpV32, TileVectorOpH32), 4729 sme_elm_idx0_3, mnemonic> { 4730 bits<2> ZAn; 4731 bits<2> imm; 4732 let Inst{8-7} = ZAn; 4733 let Inst{6-5} = imm; 4734 } 4735 4736 def _D : sme2p1_movaz_tile_to_vec_base<0b11, 0b0, v, ZPR64, 4737 !if(v, TileVectorOpV64, TileVectorOpH64), 4738 sme_elm_idx0_1, mnemonic> { 4739 bits<3> ZAn; 4740 bits<1> imm; 4741 let Inst{8-6} = ZAn; 4742 let Inst{5} = imm; 4743 } 4744 4745 def _Q : sme2p1_movaz_tile_to_vec_base<0b11, 0b1, v, ZPR128, 4746 !if(v, TileVectorOpV128, TileVectorOpH128), 4747 sme_elm_idx0_0, mnemonic> { 4748 bits<4> ZAn; 4749 let Inst{8-5} = ZAn; 4750 } 4751} 4752 4753multiclass sme2p1_movaz_tile_to_vec<string mnemonic>{ 4754 defm _H : sme2p1_movaz_tile_to_vec_base<0b0, mnemonic>; 4755 defm _V : sme2p1_movaz_tile_to_vec_base<0b1, mnemonic>; 4756} 4757 4758//===----------------------------------------------------------------------===// 4759// SME2.1 multiple vectors zero array 4760 4761class sme2p1_zero_matrix<bits<6> opc, Operand index_ty, string mnemonic, 4762 string vg_acronym=""> 4763 : I<(outs MatrixOp64:$ZAd), 4764 (ins MatrixOp64:$_ZAd, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm), 4765 mnemonic, "\t$ZAd[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "]", 4766 "", []>, Sched<[]> { 4767 bits <2> Rv; 4768 let Inst{31-18} = 0b11000000000011; 4769 let Inst{17-15} = opc{5-3}; 4770 let Inst{14-13} = Rv; 4771 let Inst{12-3} = 0b0000000000; 4772 let Inst{2-0} = opc{2-0}; 4773 let Constraints = "$ZAd = $_ZAd"; 4774} 4775 4776multiclass sme2p1_zero_matrix<string mnemonic> { 4777 def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx2"> { 4778 bits<3> imm; 4779 let Inst{2-0} = imm; 4780 } 4781 def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> { 4782 bits<3> imm; 4783 let Inst{2-0} = imm; 4784 } 4785 def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, "vgx2"> { 4786 bits<2> imm; 4787 let Inst{1-0} = imm; 4788 } 4789 def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, "vgx4"> { 4790 bits<2> imm; 4791 let Inst{1-0} = imm; 4792 } 4793 def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, "vgx4"> { 4794 bits<3> imm; 4795 let Inst{2-0} = imm; 4796 } 4797 def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> { 4798 bits<2> imm; 4799 let Inst{1-0} = imm; 4800 } 4801 def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> { 4802 bits<1> imm; 4803 let Inst{0} = imm; 4804 } 4805 def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> { 4806 bits<1> imm; 4807 let Inst{0} = imm; 4808 } 4809} 4810 4811//===----------------------------------------------------------------------===// 4812// SME2.1 lookup table expand two non-contiguous registers 4813 4814class sme2p1_luti_vector_vg2_index<bits<4> op, bits<2> sz, RegisterOperand vector_ty, 4815 AsmVectorIndexOpnd index_ty, 4816 string mnemonic> 4817 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 4818 mnemonic, "\t$Zd, $ZTt, $Zn$i", 4819 "", []>, Sched<[]> { 4820 bits<5> Zn; 4821 bits<4> Zd; 4822 let Inst{31-19} = 0b1100000010011; 4823 let Inst{18-15} = op; 4824 let Inst{14} = 0b1; 4825 let Inst{13-12} = sz; 4826 let Inst{11-10} = 0b00; 4827 let Inst{9-5} = Zn; 4828 let Inst{4} = Zd{3}; 4829 let Inst{3} = 0b0; 4830 let Inst{2-0} = Zd{2-0}; 4831} 4832 4833class sme2p1_luti2_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 4834 AsmVectorIndexOpnd index_ty, 4835 string mnemonic> 4836 : sme2p1_luti_vector_vg2_index<{1,?,?,?}, sz, vector_ty, index_ty, mnemonic> { 4837 bits<3> i; 4838 let Inst{17-15} = i; 4839} 4840 4841multiclass sme2p1_luti2_vector_vg2_index<string mnemonic> { 4842 def _B : sme2p1_luti2_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexH, 4843 mnemonic>; 4844 def _H : sme2p1_luti2_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexH, 4845 mnemonic>; 4846} 4847 4848class sme2p1_luti4_vector_vg2_index<bits<2> sz, RegisterOperand vector_ty, 4849 AsmVectorIndexOpnd index_ty, 4850 string mnemonic> 4851 : sme2p1_luti_vector_vg2_index<{0b01,?,?}, sz, vector_ty, index_ty, mnemonic> { 4852 bits<2> i; 4853 let Inst{16-15} = i; 4854} 4855multiclass sme2p1_luti4_vector_vg2_index<string mnemonic> { 4856 def _B : sme2p1_luti4_vector_vg2_index<0b00, ZZ_b_strided, VectorIndexS, 4857 mnemonic>; 4858 def _H : sme2p1_luti4_vector_vg2_index<0b01, ZZ_h_strided, VectorIndexS, 4859 mnemonic>; 4860} 4861 4862// SME2.1 lookup table expand four non-contiguous registers 4863class sme2p1_luti_vector_vg4_index<bits<3> op, bits<2> sz, RegisterOperand vector_ty, 4864 AsmVectorIndexOpnd index_ty, 4865 string mnemonic> 4866 : I<(outs vector_ty:$Zd), (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), 4867 mnemonic, "\t$Zd, $ZTt, $Zn$i", 4868 "", []>, Sched<[]> { 4869 bits<5> Zn; 4870 bits<3> Zd; 4871 let Inst{31-19} = 0b1100000010011; 4872 let Inst{18-16} = op; 4873 let Inst{15-14} = 0b10; 4874 let Inst{13-12} = sz; 4875 let Inst{11-10} = 0b00; 4876 let Inst{9-5} = Zn; 4877 let Inst{4} = Zd{2}; 4878 let Inst{3-2} = 0b00; 4879 let Inst{1-0} = Zd{1-0}; 4880} 4881 4882class sme2p1_luti2_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 4883 AsmVectorIndexOpnd index_ty, 4884 string mnemonic> 4885 : sme2p1_luti_vector_vg4_index<{1,?,?}, sz, vector_ty, index_ty, mnemonic> { 4886 bits<2> i; 4887 let Inst{17-16} = i; 4888} 4889 4890multiclass sme2p1_luti2_vector_vg4_index<string mnemonic> { 4891 def _B : sme2p1_luti2_vector_vg4_index<0b00, ZZZZ_b_strided, VectorIndexS, 4892 mnemonic>; 4893 def _H : sme2p1_luti2_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexS, 4894 mnemonic>; 4895} 4896 4897class sme2p1_luti4_vector_vg4_index<bits<2> sz, RegisterOperand vector_ty, 4898 AsmVectorIndexOpnd index_ty, 4899 string mnemonic> 4900 : sme2p1_luti_vector_vg4_index<{0b01,?}, sz, vector_ty, index_ty, mnemonic> { 4901 bit i; 4902 let Inst{16} = i; 4903} 4904 4905multiclass sme2p1_luti4_vector_vg4_index<string mnemonic> { 4906 def _H: sme2p1_luti4_vector_vg4_index<0b01, ZZZZ_h_strided, VectorIndexD, mnemonic>; 4907} 4908 4909// SME2 lookup table two source registers expand to four contiguous destination registers 4910class sme2_luti4_vector_vg4<bits<2> sz, bits<2> op, string mnemonic> 4911 : I<(outs ZZZZ_b_mul_r:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), 4912 mnemonic, "\t$Zd, $ZTt, $Zn", 4913 "", []>, Sched<[]> { 4914 bits<4> Zn; 4915 bits<3> Zd; 4916 let Inst{31-14} = 0b110000001000101100; 4917 let Inst{13-12} = sz; 4918 let Inst{11-10} = op; 4919 let Inst{9-6} = Zn; 4920 let Inst{5} = 0b0; 4921 let Inst{4-2} = Zd; 4922 let Inst{1-0} = 0b00; 4923} 4924 4925// SME2 lookup table two source registers expand to four non-contiguous destination registers 4926class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic> 4927 : I<(outs ZZZZ_b_strided:$Zd), (ins ZTR:$ZTt, ZZ_mul_r:$Zn), 4928 mnemonic, "\t$Zd, $ZTt, $Zn", 4929 "", []>, Sched<[]> { 4930 bits<4> Zn; 4931 bits<3> Zd; 4932 let Inst{31-14} = 0b110000001001101100; 4933 let Inst{13-12} = sz; 4934 let Inst{11-10} = op; 4935 let Inst{9-6} = Zn; 4936 let Inst{5} = 0b0; 4937 let Inst{4} = Zd{2}; 4938 let Inst{3-2} = 0b00; 4939 let Inst{1-0} = Zd{1-0}; 4940} 4941