1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the instructions that make up the Intel AMX instruction 10// set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// AMX instructions 16 17multiclass AMX_TILE_COMMON<string Suffix, Predicate HasEGPR> { 18let Predicates = [HasAMXTILE, HasEGPR, In64BitMode] in { 19 let hasSideEffects = 1, 20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 21 def LDTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), 22 "ldtilecfg\t$src", 23 [(int_x86_ldtilecfg addr:$src)]>, 24 T8, PS; 25 let hasSideEffects = 1 in 26 def STTILECFG#Suffix : I<0x49, MRM0m, (outs), (ins opaquemem:$src), 27 "sttilecfg\t$src", 28 [(int_x86_sttilecfg addr:$src)]>, 29 T8, PD; 30 let mayLoad = 1 in 31 def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 32 (ins sibmem:$src), 33 "tileloadd\t{$src, $dst|$dst, $src}", []>, 34 T8, XD; 35 let mayLoad = 1 in 36 def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 37 (ins sibmem:$src), 38 "tileloaddt1\t{$src, $dst|$dst, $src}", []>, 39 T8, PD; 40 let mayStore = 1 in 41 def TILESTORED#Suffix : I<0x4b, MRMDestMemFSIB, (outs), 42 (ins sibmem:$dst, TILE:$src), 43 "tilestored\t{$src, $dst|$dst, $src}", []>, 44 T8, XS; 45} 46} 47 48let SchedRW = [WriteSystem] in { 49 defm "" : AMX_TILE_COMMON<"", NoEGPR>, VEX; 50 defm "" : AMX_TILE_COMMON<"_EVEX", HasEGPR>, EVEX, NoCD8; 51 52 let Predicates = [HasAMXTILE, In64BitMode] in { 53 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 54 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), 55 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8, PS; 56 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), 57 "tilezero\t$dst", []>, 58 VEX, T8, XD; 59 60 // Pseduo instruction for RA. 61 let isPseudo = true, mayLoad = 1, hasSideEffects = 1, 62 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 63 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; 64 let isPseudo = true, mayLoad = 1 in 65 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 66 GR16:$src2, 67 opaquemem:$src3), []>; 68 let isPseudo = true, mayLoad = 1 in 69 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 70 GR16:$src2, 71 opaquemem:$src3), []>; 72 let isPseudo = true, mayStore = 1 in 73 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, 74 GR16:$src2, opaquemem:$src3, 75 TILE:$src4), []>; 76 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, 77 canFoldAsLoad = 1 in 78 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), 79 [(set TILE:$dst, (int_x86_tilezero_internal 80 GR16:$src1, GR16:$src2))]>; 81 82 let usesCustomInserter = 1 in { 83 // Pseudo instructions, using immediates instead of tile registers. 84 // To be translated to the actual instructions in X86ISelLowering.cpp 85 let mayLoad = 1 in 86 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; 87 let mayLoad = 1 in 88 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1, 89 sibmem:$src2), []>; 90 let mayStore = 1 in 91 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>; 92 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), 93 [(int_x86_tilezero timm:$src)]>; 94 } 95 } // Predicates 96} // SchedRW 97 98let Predicates = [HasAMXINT8, In64BitMode] in { 99 let SchedRW = [WriteSystem] in { 100 let Constraints = "$src1 = $dst" in { 101 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 102 (ins TILE:$src1, TILE:$src2, TILE:$src3), 103 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 104 VEX, VVVV, T8, XD; 105 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 106 (ins TILE:$src1, TILE:$src2, TILE:$src3), 107 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 108 VEX, VVVV, T8, XS; 109 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 110 (ins TILE:$src1, TILE:$src2, TILE:$src3), 111 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 112 VEX, VVVV, T8, PD; 113 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 114 (ins TILE:$src1, TILE:$src2, TILE:$src3), 115 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 116 VEX, VVVV, T8; 117 } 118 119 // Pseduo instruction for RA. 120 let isPseudo = true, Constraints = "$src4 = $dst" in { 121 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 122 GR16:$src2, GR16:$src3, TILE:$src4, 123 TILE:$src5, TILE:$src6), 124 [(set TILE: $dst, 125 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2, 126 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 127 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 128 GR16:$src2, GR16:$src3, TILE:$src4, 129 TILE:$src5, TILE:$src6), 130 [(set TILE: $dst, 131 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2, 132 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 133 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 134 GR16:$src2, GR16:$src3, TILE:$src4, 135 TILE:$src5, TILE:$src6), 136 [(set TILE: $dst, 137 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2, 138 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 139 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 140 GR16:$src2, GR16:$src3, TILE:$src4, 141 TILE:$src5, TILE:$src6), 142 [(set TILE: $dst, 143 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2, 144 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 145 } 146 147 let usesCustomInserter = 1 in { 148 // Pseudo instructions, using immediates instead of tile registers. 149 // To be translated to the actual instructions in X86ISelLowering.cpp 150 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1, 151 u8imm:$src2, u8imm:$src3), 152 [(int_x86_tdpbssd timm:$src1, 153 timm:$src2, timm:$src3)]>; 154 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1, 155 u8imm:$src2, u8imm:$src3), 156 [(int_x86_tdpbsud timm:$src1, 157 timm:$src2, timm:$src3)]>; 158 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1, 159 u8imm:$src2, u8imm:$src3), 160 [(int_x86_tdpbusd timm:$src1, 161 timm:$src2, timm:$src3)]>; 162 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1, 163 u8imm:$src2, u8imm:$src3), 164 [(int_x86_tdpbuud timm:$src1, 165 timm:$src2, timm:$src3)]>; 166 } 167 } 168} // HasAMXTILE 169 170let Predicates = [HasAMXBF16, In64BitMode] in { 171 let SchedRW = [WriteSystem] in { 172 let Constraints = "$src1 = $dst" in 173 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 174 (ins TILE:$src1, TILE:$src2, TILE:$src3), 175 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 176 []>, VEX, VVVV, T8, XS; 177 178 // Pseduo instruction for RA. 179 let isPseudo = true, Constraints = "$src4 = $dst" in 180 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 181 GR16:$src2, GR16:$src3, TILE:$src4, 182 TILE:$src5, TILE:$src6), 183 [(set TILE: $dst, 184 (int_x86_tdpbf16ps_internal GR16:$src1, 185 GR16:$src2, GR16:$src3, TILE:$src4, 186 TILE:$src5, TILE:$src6))]>; 187 188 let usesCustomInserter = 1 in { 189 // Pseudo instructions, using immediates instead of tile registers. 190 // To be translated to the actual instructions in X86ISelLowering.cpp 191 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, 192 u8imm:$src2, u8imm:$src3), 193 [(int_x86_tdpbf16ps timm:$src1, 194 timm:$src2, timm:$src3)]>; 195 } 196 } 197} // HasAMXTILE, HasAMXBF16 198 199//AMX-FP16 200let Predicates = [HasAMXFP16, In64BitMode] in { 201 let SchedRW = [WriteSystem] in { 202 let Constraints = "$src1 = $dst" in { 203 def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 204 (ins TILE:$src1, TILE:$src2, TILE:$src3), 205 "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 206 []>, VEX, VVVV, T8, XD; 207 } 208 209 // Pseduo instruction for RA. 210 let isPseudo = true, Constraints = "$src4 = $dst" in { 211 def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 212 GR16:$src2, GR16:$src3, TILE:$src4, 213 TILE:$src5, TILE:$src6), 214 [(set TILE: $dst, 215 (int_x86_tdpfp16ps_internal GR16:$src1, 216 GR16:$src2, GR16:$src3, TILE:$src4, 217 TILE:$src5, TILE:$src6))]>; 218 } 219 220 let usesCustomInserter = 1 in { 221 def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, 222 u8imm:$src2, u8imm:$src3), 223 [(int_x86_tdpfp16ps timm:$src1, 224 timm:$src2, timm:$src3)]>; 225 } 226 } 227} // HasAMXTILE, HasAMXFP16 228 229let Predicates = [HasAMXCOMPLEX, In64BitMode] in { 230 let SchedRW = [WriteSystem] in { 231 let Constraints = "$src1 = $dst" in { 232 def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 233 (ins TILE:$src1, TILE:$src2, TILE:$src3), 234 "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 235 []>, T8, PD, VEX, VVVV; 236 def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 237 (ins TILE:$src1, TILE:$src2, TILE:$src3), 238 "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 239 []>, VEX, VVVV, WIG, T8; 240 241 } // Constraints = "$src1 = $dst" 242 243 let Constraints = "$src4 = $dst" in { 244 def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 245 GR16:$src2, GR16:$src3, TILE:$src4, 246 TILE:$src5, TILE:$src6), 247 [(set TILE: $dst, 248 (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2, 249 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 250 def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 251 GR16:$src2, GR16:$src3, TILE:$src4, 252 TILE:$src5, TILE:$src6), 253 [(set TILE: $dst, 254 (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2, 255 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 256 } 257 258 let usesCustomInserter = 1 in { 259 def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, 260 u8imm:$src2, u8imm:$src3), 261 [(int_x86_tcmmimfp16ps timm:$src1, 262 timm:$src2, timm:$src3)]>; 263 def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, 264 u8imm:$src2, u8imm:$src3), 265 [(int_x86_tcmmrlfp16ps timm:$src1, 266 timm:$src2, timm:$src3)]>; 267 } 268 } // SchedRW = [WriteSystem] 269} 270