1//=-- SMEInstrFormats.td - AArch64 SME Instruction classes -*- tablegen -*--=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Scalable Matrix Extension (SME) Instruction Class Definitions. 10// 11//===----------------------------------------------------------------------===// 12 13def imm_to_tile8 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAB0>", []>; 14def imm_to_tile16 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAH0>", []>; 15def imm_to_tile32 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAS0>", []>; 16def imm_to_tile64 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAD0>", []>; 17def imm_to_tile128 : ComplexPattern<i64, 1, "ImmToTile<AArch64::ZAQ0>", []>; 18 19def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<4>", []>; 20def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<3>", []>; 21def tileslice32 : ComplexPattern<i32 , 2, "SelectSMETileSlice<2>", []>; 22def tileslice64 : ComplexPattern<i32 , 2, "SelectSMETileSlice<1>", []>; 23def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0>", []>; // nop 24 25def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>; 26 27//===----------------------------------------------------------------------===// 28// SME Outer Products 29//===----------------------------------------------------------------------===// 30 31class sme_outer_product_pseudo<ZPRRegOp zpr_ty> 32 : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, 33 zpr_ty:$zn, zpr_ty:$zm), []>, 34 Sched<[]> { 35 // Translated to the actual instructions in AArch64ISelLowering.cpp 36 let usesCustomInserter = 1; 37} 38 39class sme_fp_outer_product_inst<bit S, bit sz, MatrixTileOperand za_ty, 40 ZPRRegOp zpr_ty, string mnemonic> 41 : I<(outs za_ty:$ZAda), 42 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 43 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 44 "", []>, 45 Sched<[]> { 46 bits<5> Zm; 47 bits<3> Pm; 48 bits<3> Pn; 49 bits<5> Zn; 50 let Inst{31-23} = 0b100000001; 51 let Inst{22} = sz; 52 let Inst{21} = 0b0; 53 let Inst{20-16} = Zm; 54 let Inst{15-13} = Pm; 55 let Inst{12-10} = Pn; 56 let Inst{9-5} = Zn; 57 let Inst{4} = S; 58 let Inst{3} = 0b0; 59 60 let Constraints = "$ZAda = $_ZAda"; 61} 62 63multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op> { 64 def NAME : sme_fp_outer_product_inst<S, 0b0, TileOp32, ZPR32, mnemonic> { 65 bits<2> ZAda; 66 let Inst{1-0} = ZAda; 67 let Inst{2} = 0b0; 68 } 69 70 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32>; 71 72 def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), 73 (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)), 74 (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; 75} 76 77multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> { 78 def NAME : sme_fp_outer_product_inst<S, 0b1, TileOp64, ZPR64, mnemonic> { 79 bits<3> ZAda; 80 let Inst{2-0} = ZAda; 81 } 82 83 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64>; 84 85 def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), 86 (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)), 87 (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; 88} 89 90class sme_int_outer_product_inst<bit u0, bit u1, bit S, bit sz, 91 MatrixTileOperand za_ty, ZPRRegOp zpr_ty, 92 string mnemonic> 93 : I<(outs za_ty:$ZAda), 94 (ins za_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn, zpr_ty:$Zm), 95 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 96 "", []>, 97 Sched<[]> { 98 bits<5> Zm; 99 bits<3> Pm; 100 bits<3> Pn; 101 bits<5> Zn; 102 let Inst{31-25} = 0b1010000; 103 let Inst{24} = u0; 104 let Inst{23} = 0b1; 105 let Inst{22} = sz; 106 let Inst{21} = u1; 107 let Inst{20-16} = Zm; 108 let Inst{15-13} = Pm; 109 let Inst{12-10} = Pn; 110 let Inst{9-5} = Zn; 111 let Inst{4} = S; 112 let Inst{3} = 0b0; 113 114 let Constraints = "$ZAda = $_ZAda"; 115} 116 117multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic, 118 SDPatternOperator op> { 119 def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b0, TileOp32, 120 ZPR8, mnemonic> { 121 bits<2> ZAda; 122 let Inst{1-0} = ZAda; 123 let Inst{2} = 0b0; 124 } 125 126 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8>; 127 128 def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), 129 (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)), 130 (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; 131} 132 133multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic, 134 SDPatternOperator op> { 135 def NAME : sme_int_outer_product_inst<opc{2}, opc{1}, opc{0}, 0b1, TileOp64, 136 ZPR16, mnemonic> { 137 bits<3> ZAda; 138 let Inst{2-0} = ZAda; 139 } 140 141 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>; 142 143 def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), 144 (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)), 145 (!cast<Instruction>(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; 146} 147 148class sme_outer_product_widening_inst<bit op, bit S, string mnemonic> 149 : I<(outs TileOp32:$ZAda), 150 (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, ZPR16:$Zn, ZPR16:$Zm), 151 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", 152 "", []>, 153 Sched<[]> { 154 bits<5> Zm; 155 bits<3> Pm; 156 bits<3> Pn; 157 bits<5> Zn; 158 bits<2> ZAda; 159 let Inst{31-22} = 0b1000000110; 160 let Inst{21} = op; 161 let Inst{20-16} = Zm; 162 let Inst{15-13} = Pm; 163 let Inst{12-10} = Pn; 164 let Inst{9-5} = Zn; 165 let Inst{4} = S; 166 let Inst{3-2} = 0b00; 167 let Inst{1-0} = ZAda; 168 169 let Constraints = "$ZAda = $_ZAda"; 170} 171 172multiclass sme_bf16_outer_product<bit S, string mnemonic, SDPatternOperator op> { 173 def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>; 174 175 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>; 176 177 def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), 178 (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)), 179 (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; 180} 181 182multiclass sme_f16_outer_product<bit S, string mnemonic, SDPatternOperator op> { 183 def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>; 184 185 def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16>; 186 187 def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), 188 (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)), 189 (!cast<Instruction>(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; 190} 191 192//===----------------------------------------------------------------------===// 193// SME Add Vector to Tile 194//===----------------------------------------------------------------------===// 195 196class sme_add_vector_to_tile_inst<bit op, bit V, MatrixTileOperand tile_ty, 197 ZPRRegOp zpr_ty, string mnemonic> 198 : I<(outs tile_ty:$ZAda), 199 (ins tile_ty:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), 200 mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn", 201 "", []>, Sched<[]> { 202 bits<3> Pm; 203 bits<3> Pn; 204 bits<5> Zn; 205 let Inst{31-23} = 0b110000001; 206 let Inst{22} = op; 207 let Inst{21-17} = 0b01000; 208 let Inst{16} = V; 209 let Inst{15-13} = Pm; 210 let Inst{12-10} = Pn; 211 let Inst{9-5} = Zn; 212 let Inst{4-3} = 0b00; 213 214 let Constraints = "$ZAda = $_ZAda"; 215} 216 217class sme_add_vector_to_tile_u32<bit V, string mnemonic> 218 : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> { 219 bits<2> ZAda; 220 let Inst{2} = 0b0; 221 let Inst{1-0} = ZAda; 222} 223 224class sme_add_vector_to_tile_u64<bit V, string mnemonic> 225 : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> { 226 bits<3> ZAda; 227 let Inst{2-0} = ZAda; 228} 229 230class sme_add_vector_to_tile_pseudo<ZPRRegOp zpr_ty> 231 : Pseudo<(outs), 232 (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, 233 Sched<[]> { 234 // Translated to the actual instructions in AArch64ISelLowering.cpp 235 let usesCustomInserter = 1; 236} 237 238def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>; 239def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo<ZPR32>; 240 241def : Pat<(int_aarch64_sme_addha 242 imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), 243 (nxv4i32 ZPR32:$zn)), 244 (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; 245def : Pat<(int_aarch64_sme_addva 246 imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), 247 (nxv4i32 ZPR32:$zn)), 248 (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; 249 250let Predicates = [HasSMEI64] in { 251def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>; 252def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo<ZPR64>; 253 254def : Pat<(int_aarch64_sme_addha 255 imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), 256 (nxv2i64 ZPR64:$zn)), 257 (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; 258def : Pat<(int_aarch64_sme_addva 259 imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), 260 (nxv2i64 ZPR64:$zn)), 261 (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; 262} 263 264//===----------------------------------------------------------------------===// 265// SME Contiguous Loads 266//===----------------------------------------------------------------------===// 267 268class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins, 269 string mnemonic, string argstr> 270 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 271 bits<5> Rm; 272 bits<2> Rv; 273 bits<3> Pg; 274 bits<5> Rn; 275 let Inst{31-25} = 0b1110000; 276 let Inst{24} = Q; 277 let Inst{23-22} = msz; 278 let Inst{21} = 0b0; 279 let Inst{20-16} = Rm; 280 let Inst{15} = V; 281 let Inst{14-13} = Rv; 282 let Inst{12-10} = Pg; 283 let Inst{9-5} = Rn; 284 let Inst{4} = 0b0; 285 286 let mayLoad = 1; 287} 288 289class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic, 290 MatrixTileVectorOperand tile_ty, bit is_col, 291 Operand imm_ty, RegisterOperand gpr_ty> 292 : sme_mem_ld_ss_base< 293 Q, is_col, msz, (outs tile_ty:$ZAt), 294 (ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, 295 gpr_ty:$Rm), 296 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">; 297 298multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst, 299 MatrixTileVectorOperand tile_ty, 300 Operand imm_ty, RegisterOperand gpr_ty, 301 string pg_suffix=""> { 302 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]", 303 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>; 304 // Default XZR offset aliases 305 def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv, $imm]\\}, $Pg" # pg_suffix # ", [$Rn]", 306 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 1>; 307 def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn]", 308 (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>; 309} 310 311multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col, 312 string pg_suffix=""> { 313 defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B), 314 !if(is_col, TileVectorOpV8, TileVectorOpH8), 315 sme_elm_idx0_15, GPR64shifted8, pg_suffix>; 316 defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H), 317 !if(is_col, TileVectorOpV16, TileVectorOpH16), 318 sme_elm_idx0_7, GPR64shifted16, pg_suffix>; 319 defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S), 320 !if(is_col, TileVectorOpV32, TileVectorOpH32), 321 sme_elm_idx0_3, GPR64shifted32, pg_suffix>; 322 defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D), 323 !if(is_col, TileVectorOpV64, TileVectorOpH64), 324 sme_elm_idx0_1, GPR64shifted64, pg_suffix>; 325 defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q), 326 !if(is_col, TileVectorOpV128, TileVectorOpH128), 327 sme_elm_idx0_0, GPR64shifted128, pg_suffix>; 328} 329 330multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> { 331 defm NAME : sme_mem_ss_aliases<"ld1", inst, is_col, "/z">; 332} 333 334multiclass sme_mem_ld_ss_patterns<Instruction Inst, SDPatternOperator Load, 335 Operand tile_ty, Operand offset_ty, 336 ComplexPattern addr, 337 ComplexPattern tileslice> { 338 // base, tileslice 339 def : Pat<(Load PPR3bAny:$pg, GPR64sp:$base, tile_ty:$tile, 340 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 341 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, XZR)>; 342 343 // reg + reg, tileslice 344 let AddedComplexity = 1 in { 345 def : Pat<(Load PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 346 tile_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 347 offset_ty:$imm))), 348 (Inst tile_ty:$tile, $idx, $imm, $pg, $base, $offset)>; 349 } 350} 351 352class sme_load_pseudo 353 : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, 354 i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, 355 Sched<[]> { 356 // Translated to the actual instructions in AArch64ISelLowering.cpp 357 let usesCustomInserter = 1; 358 let mayLoad = 1; 359} 360 361multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> { 362 def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b", 363 !if(is_col, TileVectorOpV8, TileVectorOpH8), 364 is_col, sme_elm_idx0_15, GPR64shifted8> { 365 bits<4> imm; 366 let Inst{3-0} = imm; 367 } 368 def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h", 369 !if(is_col, TileVectorOpV16, TileVectorOpH16), 370 is_col, sme_elm_idx0_7, GPR64shifted16> { 371 bits<1> ZAt; 372 bits<3> imm; 373 let Inst{3} = ZAt; 374 let Inst{2-0} = imm; 375 } 376 def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w", 377 !if(is_col, TileVectorOpV32, TileVectorOpH32), 378 is_col, sme_elm_idx0_3, GPR64shifted32> { 379 bits<2> ZAt; 380 bits<2> imm; 381 let Inst{3-2} = ZAt; 382 let Inst{1-0} = imm; 383 } 384 def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d", 385 !if(is_col, TileVectorOpV64, TileVectorOpH64), 386 is_col, sme_elm_idx0_1, GPR64shifted64> { 387 bits<3> ZAt; 388 bits<1> imm; 389 let Inst{3-1} = ZAt; 390 let Inst{0} = imm; 391 } 392 def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q", 393 !if(is_col, TileVectorOpV128, TileVectorOpH128), 394 is_col, sme_elm_idx0_0, GPR64shifted128> { 395 bits<4> ZAt; 396 let Inst{3-0} = ZAt; 397 } 398 399 defm : sme_mem_ld_ss_aliases<NAME, is_col>; 400 401 // Pseudo instructions for lowering intrinsics, using immediates instead of 402 // tile registers. 403 def _PSEUDO_B : sme_load_pseudo; 404 def _PSEUDO_H : sme_load_pseudo; 405 def _PSEUDO_S : sme_load_pseudo; 406 def _PSEUDO_D : sme_load_pseudo; 407 def _PSEUDO_Q : sme_load_pseudo; 408 409 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 410 !if(is_col, int_aarch64_sme_ld1b_vert, 411 int_aarch64_sme_ld1b_horiz), 412 sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0, 413 tileslice8>; 414 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 415 !if(is_col, int_aarch64_sme_ld1h_vert, 416 int_aarch64_sme_ld1h_horiz), 417 imm0_1, imm0_7, am_sve_regreg_lsl1, 418 tileslice16>; 419 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 420 !if(is_col, int_aarch64_sme_ld1w_vert, 421 int_aarch64_sme_ld1w_horiz), 422 imm0_3, imm0_3, am_sve_regreg_lsl2, 423 tileslice32>; 424 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 425 !if(is_col, int_aarch64_sme_ld1d_vert, 426 int_aarch64_sme_ld1d_horiz), 427 imm0_7, imm0_1, am_sve_regreg_lsl3, 428 tileslice64>; 429 defm : sme_mem_ld_ss_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 430 !if(is_col, int_aarch64_sme_ld1q_vert, 431 int_aarch64_sme_ld1q_horiz), 432 imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, 433 tileslice128>; 434} 435 436multiclass sme_mem_ld_ss<string mnemonic> { 437 defm _H : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b0>; 438 defm _V : sme_mem_ld_v_ss<mnemonic, /*is_col=*/0b1>; 439} 440 441//===----------------------------------------------------------------------===// 442// SME Contiguous Stores 443//===----------------------------------------------------------------------===// 444 445class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins, 446 string mnemonic, string argstr> 447 : I<(outs), ins, mnemonic, argstr, "", []>, Sched<[]> { 448 bits<5> Rm; 449 bits<2> Rv; 450 bits<3> Pg; 451 bits<5> Rn; 452 let Inst{31-25} = 0b1110000; 453 let Inst{24} = Q; 454 let Inst{23-22} = msz; 455 let Inst{21} = 0b1; 456 let Inst{20-16} = Rm; 457 let Inst{15} = V; 458 let Inst{14-13} = Rv; 459 let Inst{12-10} = Pg; 460 let Inst{9-5} = Rn; 461 let Inst{4} = 0b0; 462 463 let mayStore = 1; 464 let hasSideEffects = 1; 465} 466 467class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic, 468 MatrixTileVectorOperand tile_ty, bit is_col, 469 Operand imm_ty, RegisterOperand gpr_ty> 470 : sme_mem_st_ss_base< 471 Q, is_col, msz, 472 (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, 473 GPR64sp:$Rn, gpr_ty:$Rm), 474 mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">; 475 476multiclass sme_mem_st_ss_aliases<string inst, bit is_col> { 477 defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>; 478} 479 480multiclass sme_mem_st_ss_patterns<Instruction Inst, SDPatternOperator Store, 481 Operand offset_ty, 482 ComplexPattern imm2tile, 483 ComplexPattern addr, 484 ComplexPattern tileslice> { 485 // base, tileslice 486 def : Pat<(Store PPR3bAny:$pg, GPR64sp:$base, (imm2tile untyped:$tile), 487 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 488 (Inst $tile, $idx, $imm, $pg, $base, XZR)>; 489 490 // reg + reg, tileslice 491 let AddedComplexity = 1 in { 492 def : Pat<(Store PPR3bAny:$pg, (addr GPR64sp:$base, GPR64:$offset), 493 (imm2tile untyped:$tile), 494 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, offset_ty:$imm))), 495 (Inst $tile, $idx, $imm, $pg, $base, $offset)>; 496 } 497} 498 499multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> { 500 def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b", 501 !if(is_col, TileVectorOpV8, TileVectorOpH8), 502 is_col, sme_elm_idx0_15, GPR64shifted8> { 503 bits<4> imm; 504 let Inst{3-0} = imm; 505 } 506 def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h", 507 !if(is_col, TileVectorOpV16, TileVectorOpH16), 508 is_col, sme_elm_idx0_7, GPR64shifted16> { 509 bits<1> ZAt; 510 bits<3> imm; 511 let Inst{3} = ZAt; 512 let Inst{2-0} = imm; 513 } 514 def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w", 515 !if(is_col, TileVectorOpV32, TileVectorOpH32), 516 is_col, sme_elm_idx0_3, GPR64shifted32> { 517 bits<2> ZAt; 518 bits<2> imm; 519 let Inst{3-2} = ZAt; 520 let Inst{1-0} = imm; 521 } 522 def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d", 523 !if(is_col, TileVectorOpV64, TileVectorOpH64), 524 is_col, sme_elm_idx0_1, GPR64shifted64> { 525 bits<3> ZAt; 526 bits<1> imm; 527 let Inst{3-1} = ZAt; 528 let Inst{0} = imm; 529 } 530 def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q", 531 !if(is_col, TileVectorOpV128, TileVectorOpH128), 532 is_col, sme_elm_idx0_0, GPR64shifted128> { 533 bits<4> ZAt; 534 let Inst{3-0} = ZAt; 535 } 536 537 defm : sme_mem_st_ss_aliases<NAME, is_col>; 538 539 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _B), 540 !if(is_col, int_aarch64_sme_st1b_vert, 541 int_aarch64_sme_st1b_horiz), 542 imm0_15, imm_to_tile8, am_sve_regreg_lsl0, 543 tileslice8>; 544 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _H), 545 !if(is_col, int_aarch64_sme_st1h_vert, 546 int_aarch64_sme_st1h_horiz), 547 imm0_7, imm_to_tile16, am_sve_regreg_lsl1, 548 tileslice16>; 549 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _S), 550 !if(is_col, int_aarch64_sme_st1w_vert, 551 int_aarch64_sme_st1w_horiz), 552 imm0_3, imm_to_tile32, am_sve_regreg_lsl2, 553 tileslice32>; 554 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _D), 555 !if(is_col, int_aarch64_sme_st1d_vert, 556 int_aarch64_sme_st1d_horiz), 557 imm0_1, imm_to_tile64, am_sve_regreg_lsl3, 558 tileslice64>; 559 defm : sme_mem_st_ss_patterns<!cast<Instruction>(NAME # _Q), 560 !if(is_col, int_aarch64_sme_st1q_vert, 561 int_aarch64_sme_st1q_horiz), 562 sme_elm_idx0_0, imm_to_tile128, 563 am_sve_regreg_lsl4, tileslice128>; 564} 565 566multiclass sme_mem_st_ss<string mnemonic> { 567 defm _H : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b0>; 568 defm _V : sme_mem_st_v_ss<mnemonic, /*is_col=*/0b1>; 569} 570 571//===----------------------------------------------------------------------===// 572// SME Save and Restore Array 573//===----------------------------------------------------------------------===// 574 575class sme_spill_fill_base<bit isStore, dag outs, dag ins, string opcodestr> 576 : I<outs, ins, opcodestr, "\t$ZAt[$Rv, $imm4], [$Rn, $offset, mul vl]", "", 577 []>, 578 Sched<[]> { 579 bits<2> Rv; 580 bits<5> Rn; 581 bits<4> imm4; 582 let Inst{31-22} = 0b1110000100; 583 let Inst{21} = isStore; 584 let Inst{20-15} = 0b000000; 585 let Inst{14-13} = Rv; 586 let Inst{12-10} = 0b000; 587 let Inst{9-5} = Rn; 588 let Inst{4} = 0b0; 589 let Inst{3-0} = imm4; 590} 591 592let mayStore = 1 in 593class sme_spill_inst<string opcodestr> 594 : sme_spill_fill_base<0b1, (outs), 595 (ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv, 596 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 597 imm0_15:$offset), 598 opcodestr>; 599let mayLoad = 1 in 600class sme_fill_inst<string opcodestr> 601 : sme_spill_fill_base<0b0, (outs MatrixOp:$ZAt), 602 (ins MatrixIndexGPR32Op12_15:$Rv, 603 sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 604 imm0_15:$offset), 605 opcodestr>; 606multiclass sme_spill<string opcodestr> { 607 def NAME : sme_spill_inst<opcodestr>; 608 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 609 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 610 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 611 // base 612 def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), 613 (!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>; 614 // scalar + immediate (mul vl) 615 let AddedComplexity = 2 in { 616 def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, 617 (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)), 618 (!cast<Instruction>(NAME) ZA, $idx, 0, $base, $imm4)>; 619 } 620} 621 622multiclass sme_fill<string opcodestr> { 623 def NAME : sme_fill_inst<opcodestr>; 624 def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]", 625 (!cast<Instruction>(NAME) MatrixOp:$ZAt, 626 MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>; 627 def NAME # _PSEUDO 628 : Pseudo<(outs), 629 (ins MatrixIndexGPR32Op12_15:$idx, imm0_15:$imm4, 630 GPR64sp:$base), []>, 631 Sched<[]> { 632 // Translated to actual instruction in AArch64ISelLowering.cpp 633 let usesCustomInserter = 1; 634 let mayLoad = 1; 635 } 636 // base 637 def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base), 638 (!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>; 639 // scalar + immediate (mul vl) 640 let AddedComplexity = 2 in { 641 def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, 642 (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)), 643 (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm4, $base)>; 644 } 645} 646 647//===----------------------------------------------------------------------===// 648// Move instructions 649//===----------------------------------------------------------------------===// 650 651class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 652 string mnemonic, string argstr> 653 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 654 bits<2> Rv; 655 bits<3> Pg; 656 bits<5> Zn; 657 let Inst{31-24} = 0b11000000; 658 let Inst{23-22} = sz; 659 let Inst{21-17} = 0b00000; 660 let Inst{16} = Q; 661 let Inst{15} = V; 662 let Inst{14-13} = Rv; 663 let Inst{12-10} = Pg; 664 let Inst{9-5} = Zn; 665 let Inst{4} = 0b0; 666} 667 668class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty, 669 bit is_col, Operand imm_ty, ZPRRegOp zpr_ty, 670 string mnemonic> 671 : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd), 672 (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 673 mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">{ 674 675 let Constraints = "$ZAd = $_ZAd"; 676} 677 678 679multiclass sme_vector_to_tile_aliases<Instruction inst, 680 MatrixTileVectorOperand tile_ty, 681 ZPRRegOp zpr_ty, Operand imm_ty> { 682 def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn", 683 (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>; 684} 685 686multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt, 687 ValueType ppr_vt, Operand imm_ty, 688 Operand offset_ty, 689 SDPatternOperator op, 690 ComplexPattern tileslice> { 691 def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx, 692 (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), 693 (inst imm_ty:$tile, $idx, 0, $pg, $zn)>; 694 let AddedComplexity = 1 in { 695 def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 696 offset_ty:$imm)), 697 (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), 698 (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; 699 } 700} 701 702class sme_mova_insert_pseudo 703 : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, 704 i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, 705 Sched<[]> { 706 // Translated to the actual instructions in AArch64ISelLowering.cpp 707 let usesCustomInserter = 1; 708} 709 710multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> { 711 def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, 712 TileVectorOpH8), 713 is_col, sme_elm_idx0_15, ZPR8, mnemonic> { 714 bits<4> imm; 715 let Inst{3-0} = imm; 716 } 717 def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, 718 TileVectorOpH16), 719 is_col, sme_elm_idx0_7, ZPR16, mnemonic> { 720 bits<1> ZAd; 721 bits<3> imm; 722 let Inst{3} = ZAd; 723 let Inst{2-0} = imm; 724 } 725 def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, 726 TileVectorOpH32), 727 is_col, sme_elm_idx0_3, ZPR32, mnemonic> { 728 bits<2> ZAd; 729 bits<2> imm; 730 let Inst{3-2} = ZAd; 731 let Inst{1-0} = imm; 732 } 733 def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, 734 TileVectorOpH64), 735 is_col, sme_elm_idx0_1, ZPR64, mnemonic> { 736 bits<3> ZAd; 737 bits<1> imm; 738 let Inst{3-1} = ZAd; 739 let Inst{0} = imm; 740 } 741 def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, 742 TileVectorOpH128), 743 is_col, sme_elm_idx0_0, ZPR128, mnemonic> { 744 bits<4> ZAd; 745 bits<1> imm; 746 let Inst{3-0} = ZAd; 747 } 748 749 // Pseudo instructions for lowering intrinsics, using immediates instead of 750 // tile registers. 751 def _PSEUDO_B : sme_mova_insert_pseudo; 752 def _PSEUDO_H : sme_mova_insert_pseudo; 753 def _PSEUDO_S : sme_mova_insert_pseudo; 754 def _PSEUDO_D : sme_mova_insert_pseudo; 755 def _PSEUDO_Q : sme_mova_insert_pseudo; 756 757 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B), 758 !if(is_col, TileVectorOpV8, 759 TileVectorOpH8), 760 ZPR8, sme_elm_idx0_15>; 761 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H), 762 !if(is_col, TileVectorOpV16, 763 TileVectorOpH16), 764 ZPR16, sme_elm_idx0_7>; 765 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S), 766 !if(is_col, TileVectorOpV32, 767 TileVectorOpH32), 768 ZPR32, sme_elm_idx0_3>; 769 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D), 770 !if(is_col, TileVectorOpV64, 771 TileVectorOpH64), 772 ZPR64, sme_elm_idx0_1>; 773 defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q), 774 !if(is_col, TileVectorOpV128, 775 TileVectorOpH128), 776 ZPR128, sme_elm_idx0_0>; 777 778 defvar op = !if(is_col, int_aarch64_sme_write_vert, 779 int_aarch64_sme_write_horiz); 780 781 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_B), 782 nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15, 783 op, tileslice8>; 784 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 785 nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7, 786 op, tileslice16>; 787 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 788 nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7, 789 op, tileslice16>; 790 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_H), 791 nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7, 792 op, tileslice16>; 793 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 794 nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3, 795 op, tileslice32>; 796 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_S), 797 nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3, 798 op, tileslice32>; 799 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 800 nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1, 801 op, tileslice64>; 802 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_D), 803 nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1, 804 op, tileslice64>; 805 806 defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, 807 int_aarch64_sme_writeq_horiz); 808 809 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 810 nxv16i8, nxv16i1, sme_elm_idx0_15, 811 sme_elm_idx0_0, opq, tileslice128>; 812 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 813 nxv8i16, nxv8i1, sme_elm_idx0_15, 814 sme_elm_idx0_0, opq, tileslice128>; 815 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 816 nxv8f16, nxv8i1, sme_elm_idx0_15, 817 sme_elm_idx0_0, opq, tileslice128>; 818 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 819 nxv8bf16, nxv8i1, sme_elm_idx0_15, 820 sme_elm_idx0_0, opq, tileslice128>; 821 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 822 nxv4i32, nxv4i1, sme_elm_idx0_15, 823 sme_elm_idx0_0, opq, tileslice128>; 824 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 825 nxv4f32, nxv4i1, sme_elm_idx0_15, 826 sme_elm_idx0_0, opq, tileslice128>; 827 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 828 nxv2i64, nxv2i1, sme_elm_idx0_15, 829 sme_elm_idx0_0, opq, tileslice128>; 830 defm : sme_vector_to_tile_patterns<!cast<Instruction>(NAME # _PSEUDO_Q), 831 nxv2f64, nxv2i1, sme_elm_idx0_15, 832 sme_elm_idx0_0, opq, tileslice128>; 833} 834 835multiclass sme_vector_to_tile<string mnemonic> { 836 defm _H : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b0>; 837 defm _V : sme_vector_v_to_tile<mnemonic, /*is_col=*/0b1>; 838} 839 840class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins, 841 string mnemonic, string argstr> 842 : I<outs, ins, mnemonic, argstr, "", []>, Sched<[]> { 843 bits<2> Rv; 844 bits<3> Pg; 845 bits<5> Zd; 846 let Inst{31-24} = 0b11000000; 847 let Inst{23-22} = sz; 848 let Inst{21-17} = 0b00001; 849 let Inst{16} = Q; 850 let Inst{15} = V; 851 let Inst{14-13} = Rv; 852 let Inst{12-10} = Pg; 853 let Inst{9} = 0b0; 854 let Inst{4-0} = Zd; 855} 856 857class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty, 858 MatrixTileVectorOperand tile_ty, 859 bit is_col, Operand imm_ty, string mnemonic> 860 : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd), 861 (ins zpr_ty:$_Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 862 mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]"> { 863 864 let Constraints = "$Zd = $_Zd"; 865} 866 867multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty, 868 MatrixTileVectorOperand tile_ty, 869 Operand imm_ty > { 870 def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv, $imm]", 871 (inst zpr_ty:$Zd, PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1>; 872} 873 874multiclass sme_tile_to_vector_patterns<Instruction inst, ValueType zpr_vt, 875 ValueType ppr_vt, Operand offset_ty, 876 ComplexPattern imm2tile, 877 ComplexPattern tileslice, 878 SDPatternOperator op> { 879 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 880 (imm2tile untyped:$tile), MatrixIndexGPR32Op12_15:$idx)), 881 (inst $passthru, $pg, $tile, $idx, 0)>; 882 let AddedComplexity = 1 in { 883 def : Pat<(zpr_vt (op (zpr_vt ZPRAny:$passthru), (ppr_vt PPR3bAny:$pg), 884 (imm2tile untyped:$tile), 885 (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, 886 offset_ty:$imm)))), 887 (inst $passthru, $pg, $tile, $idx, $imm)>; 888 } 889} 890 891multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> { 892 def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8, 893 TileVectorOpH8), 894 is_col, sme_elm_idx0_15, mnemonic> { 895 bits<4> imm; 896 let Inst{8-5} = imm; 897 } 898 def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16, 899 TileVectorOpH16), 900 is_col, sme_elm_idx0_7, mnemonic> { 901 bits<1> ZAn; 902 bits<3> imm; 903 let Inst{8} = ZAn; 904 let Inst{7-5} = imm; 905 } 906 def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32, 907 TileVectorOpH32), 908 is_col, sme_elm_idx0_3, mnemonic> { 909 bits<2> ZAn; 910 bits<2> imm; 911 let Inst{8-7} = ZAn; 912 let Inst{6-5} = imm; 913 } 914 def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64, 915 TileVectorOpH64), 916 is_col, sme_elm_idx0_1, mnemonic> { 917 bits<3> ZAn; 918 bits<1> imm; 919 let Inst{8-6} = ZAn; 920 let Inst{5} = imm; 921 } 922 def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128, 923 TileVectorOpH128), 924 is_col, sme_elm_idx0_0, mnemonic> { 925 bits<4> ZAn; 926 let Inst{8-5} = ZAn; 927 } 928 929 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8, 930 !if(is_col, TileVectorOpV8, 931 TileVectorOpH8), sme_elm_idx0_15>; 932 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16, 933 !if(is_col, TileVectorOpV16, 934 TileVectorOpH16), sme_elm_idx0_7>; 935 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32, 936 !if(is_col, TileVectorOpV32, 937 TileVectorOpH32), sme_elm_idx0_3>; 938 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64, 939 !if(is_col, TileVectorOpV64, 940 TileVectorOpH64), sme_elm_idx0_1>; 941 defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128, 942 !if(is_col, TileVectorOpV128, 943 TileVectorOpH128), sme_elm_idx0_0>; 944 945 defvar op = !if(is_col, int_aarch64_sme_read_vert, 946 int_aarch64_sme_read_horiz); 947 948 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _B), 949 nxv16i8, nxv16i1, imm0_15, 950 imm_to_tile8, tileslice8, op>; 951 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 952 nxv8i16, nxv8i1, imm0_7, 953 imm_to_tile16, tileslice16, op>; 954 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 955 nxv8f16, nxv8i1, imm0_7, 956 imm_to_tile16, tileslice16, op>; 957 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _H), 958 nxv8bf16, nxv8i1, imm0_7, 959 imm_to_tile16, tileslice16, op>; 960 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 961 nxv4i32, nxv4i1, imm0_3, 962 imm_to_tile32, tileslice32, op>; 963 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _S), 964 nxv4f32, nxv4i1, imm0_3, 965 imm_to_tile32, tileslice32, op>; 966 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 967 nxv2i64, nxv2i1, imm0_1, 968 imm_to_tile64, tileslice64, op>; 969 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _D), 970 nxv2f64, nxv2i1, imm0_1, 971 imm_to_tile64, tileslice64, op>; 972 973 defvar opq = !if(is_col, int_aarch64_sme_readq_vert, 974 int_aarch64_sme_readq_horiz); 975 976 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 977 nxv16i8, nxv16i1, sme_elm_idx0_0, 978 imm_to_tile128, tileslice128, opq>; 979 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 980 nxv8i16, nxv8i1, sme_elm_idx0_0, 981 imm_to_tile128, tileslice128, opq>; 982 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 983 nxv8f16, nxv8i1, sme_elm_idx0_0, 984 imm_to_tile128, tileslice128, opq>; 985 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 986 nxv8bf16, nxv8i1, sme_elm_idx0_0, 987 imm_to_tile128, tileslice128, opq>; 988 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 989 nxv4i32, nxv4i1, sme_elm_idx0_0, 990 imm_to_tile128, tileslice128, opq>; 991 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 992 nxv4f32, nxv4i1, sme_elm_idx0_0, 993 imm_to_tile128, tileslice128, opq>; 994 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 995 nxv2i64, nxv2i1, sme_elm_idx0_0, 996 imm_to_tile128, tileslice128, opq>; 997 defm : sme_tile_to_vector_patterns<!cast<Instruction>(NAME # _Q), 998 nxv2f64, nxv2i1, sme_elm_idx0_0, 999 imm_to_tile128, tileslice128, opq>; 1000} 1001 1002multiclass sme_tile_to_vector<string mnemonic> { 1003 defm _H : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b0>; 1004 defm _V : sme_tile_to_vector_v<mnemonic, /*is_col=*/0b1>; 1005} 1006 1007//===----------------------------------------------------------------------===// 1008// SME Zero 1009//===----------------------------------------------------------------------===// 1010 1011// NOTE: This definition isn't really correct because there are outputs, i.e. 1012// the tile registers being zeroed. We fix this up in a custom inserter that 1013// marks the appropriate registers as being implicitly defined. 1014class sme_zero_inst<string mnemonic> 1015 : I<(outs), (ins MatrixTileList:$imm), 1016 mnemonic, "\t$imm", "", []>, Sched<[]> { 1017 bits<8> imm; 1018 let Inst{31-8} = 0b110000000000100000000000; 1019 let Inst{7-0} = imm; 1020} 1021 1022multiclass sme_zero<string mnemonic> { 1023 def NAME : sme_zero_inst<mnemonic>; 1024 1025 def : InstAlias<"zero\t\\{za\\}", (!cast<Instruction>(NAME) 0b11111111), 1>; 1026 def : InstAlias<"zero\t\\{za0.h\\}", (!cast<Instruction>(NAME) 0b01010101), 1>; 1027 def : InstAlias<"zero\t\\{za1.h\\}", (!cast<Instruction>(NAME) 0b10101010), 1>; 1028 def : InstAlias<"zero\t\\{za0.s\\}", (!cast<Instruction>(NAME) 0b00010001), 1>; 1029 def : InstAlias<"zero\t\\{za1.s\\}", (!cast<Instruction>(NAME) 0b00100010), 1>; 1030 def : InstAlias<"zero\t\\{za2.s\\}", (!cast<Instruction>(NAME) 0b01000100), 1>; 1031 def : InstAlias<"zero\t\\{za3.s\\}", (!cast<Instruction>(NAME) 0b10001000), 1>; 1032 def : InstAlias<"zero\t\\{za0.s,za1.s\\}", (!cast<Instruction>(NAME) 0b00110011), 1>; 1033 def : InstAlias<"zero\t\\{za0.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10011001), 1>; 1034 def : InstAlias<"zero\t\\{za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01100110), 1>; 1035 def : InstAlias<"zero\t\\{za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11001100), 1>; 1036 def : InstAlias<"zero\t\\{za0.s,za1.s,za2.s\\}", (!cast<Instruction>(NAME) 0b01110111), 1>; 1037 def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast<Instruction>(NAME) 0b10111011), 1>; 1038 def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11011101), 1>; 1039 def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast<Instruction>(NAME) 0b11101110), 1>; 1040 1041 def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>, 1042 Sched<[]> { 1043 // Translated to the actual instructions in AArch64ISelLowering.cpp 1044 let usesCustomInserter = 1; 1045 } 1046 1047 def : Pat<(int_aarch64_sme_zero imm:$imm), 1048 (!cast<Instruction>(NAME # _PSEUDO) imm:$imm)>; 1049} 1050 1051//===----------------------------------------------------------------------===// 1052// SVE2 Instructions 1053//===----------------------------------------------------------------------===// 1054 1055class sve2_int_perm_revd<string asm> 1056 : I<(outs ZPR128:$Zd), (ins ZPR128:$_Zd, PPR3bAny:$Pg, ZPR128:$Zn), 1057 asm, "\t$Zd, $Pg/m, $Zn", "", []>, 1058 Sched<[]> { 1059 bits<5> Zd; 1060 bits<3> Pg; 1061 bits<5> Zn; 1062 let Inst{31-24} = 0b00000101; 1063 let Inst{23-22} = 0b00; // size 1064 let Inst{21-13} = 0b101110100; 1065 let Inst{12-10} = Pg; 1066 let Inst{9-5} = Zn; 1067 let Inst{4-0} = Zd; 1068 1069 let Constraints = "$Zd = $_Zd"; 1070 let DestructiveInstType = DestructiveUnary; 1071 let ElementSize = ZPR128.ElementSize; 1072} 1073 1074multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { 1075 def NAME : sve2_int_perm_revd<asm>; 1076 1077 def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>; 1078 def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>; 1079 def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>; 1080 def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>; 1081} 1082 1083class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> 1084 : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), 1085 asm, "\t$Zd, $Zn, $Zm", "", []>, 1086 Sched<[]> { 1087 bits<5> Zm; 1088 bits<5> Zn; 1089 bits<5> Zd; 1090 let Inst{31-24} = 0b01000100; 1091 let Inst{23-22} = sz; 1092 let Inst{21} = 0b0; 1093 let Inst{20-16} = Zm; 1094 let Inst{15-11} = 0b11000; 1095 let Inst{10} = U; 1096 let Inst{9-5} = Zn; 1097 let Inst{4-0} = Zd; 1098 1099 let Constraints = "$Zd = $_Zd"; 1100 let DestructiveInstType = DestructiveOther; 1101 let ElementSize = zpr_ty.ElementSize; 1102} 1103 1104multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> { 1105 def _B : sve2_clamp<asm, 0b00, U, ZPR8>; 1106 def _H : sve2_clamp<asm, 0b01, U, ZPR16>; 1107 def _S : sve2_clamp<asm, 0b10, U, ZPR32>; 1108 def _D : sve2_clamp<asm, 0b11, U, ZPR64>; 1109 1110 def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; 1111 def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; 1112 def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; 1113 def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; 1114} 1115 1116class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty> 1117 : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm, 1118 MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), 1119 asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, 1120 Sched<[]> { 1121 bits<2> Rv; 1122 bits<4> Pn; 1123 bits<4> Pm; 1124 bits<4> Pd; 1125 let Inst{31-24} = 0b00100101; 1126 let Inst{21} = 0b1; 1127 let Inst{17-16} = Rv; 1128 let Inst{15-14} = 0b01; 1129 let Inst{13-10} = Pn; 1130 let Inst{9} = 0b0; 1131 let Inst{8-5} = Pm; 1132 let Inst{4} = 0b0; 1133 let Inst{3-0} = Pd; 1134} 1135 1136multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> { 1137 def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> { 1138 bits<4> imm; 1139 let Inst{23-22} = imm{3-2}; 1140 let Inst{20-19} = imm{1-0}; 1141 let Inst{18} = 0b1; 1142 } 1143 def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> { 1144 bits<3> imm; 1145 let Inst{23-22} = imm{2-1}; 1146 let Inst{20} = imm{0}; 1147 let Inst{19-18} = 0b10; 1148 } 1149 def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> { 1150 bits<2> imm; 1151 let Inst{23-22} = imm{1-0}; 1152 let Inst{20-18} = 0b100; 1153 } 1154 def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> { 1155 bits<1> imm; 1156 let Inst{23} = imm; 1157 let Inst{22} = 0b1; 1158 let Inst{20-18} = 0b000; 1159 } 1160 1161 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), 1162 MatrixIndexGPR32Op12_15:$idx)), 1163 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>; 1164 def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), 1165 MatrixIndexGPR32Op12_15:$idx)), 1166 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>; 1167 def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), 1168 MatrixIndexGPR32Op12_15:$idx)), 1169 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>; 1170 def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), 1171 MatrixIndexGPR32Op12_15:$idx)), 1172 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>; 1173 1174 let AddedComplexity = 1 in { 1175 def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm), 1176 (i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))), 1177 (!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>; 1178 def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm), 1179 (i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))), 1180 (!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>; 1181 def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm), 1182 (i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))), 1183 (!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>; 1184 def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm), 1185 (i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))), 1186 (!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>; 1187 } 1188} 1189