1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the instructions that make up the Intel AMX instruction 10// set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// AMX instructions 16 17let Predicates = [HasAMXTILE, In64BitMode] in { 18 let SchedRW = [WriteSystem] in { 19 let hasSideEffects = 1, 20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 21 def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), 22 "ldtilecfg\t$src", 23 [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS; 24 let hasSideEffects = 1 in 25 def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src), 26 "sttilecfg\t$src", 27 [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD; 28 let mayLoad = 1 in 29 def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 30 (ins sibmem:$src), 31 "tileloadd\t{$src, $dst|$dst, $src}", []>, 32 VEX, T8XD; 33 let mayLoad = 1 in 34 def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 35 (ins sibmem:$src), 36 "tileloaddt1\t{$src, $dst|$dst, $src}", []>, 37 VEX, T8PD; 38 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 39 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins), 40 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS; 41 let mayStore = 1 in 42 def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs), 43 (ins sibmem:$dst, TILE:$src), 44 "tilestored\t{$src, $dst|$dst, $src}", []>, 45 VEX, T8XS; 46 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), 47 "tilezero\t$dst", []>, 48 VEX, T8XD; 49 50 // Pseduo instruction for RA. 51 let isPseudo = true, mayLoad = 1, hasSideEffects = 1, 52 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in 53 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>; 54 let isPseudo = true, mayLoad = 1 in 55 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 56 GR16:$src2, 57 opaquemem:$src3), []>; 58 let isPseudo = true, mayLoad = 1 in 59 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 60 GR16:$src2, 61 opaquemem:$src3), []>; 62 let isPseudo = true, mayStore = 1 in 63 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1, 64 GR16:$src2, opaquemem:$src3, 65 TILE:$src4), []>; 66 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1, 67 canFoldAsLoad = 1 in 68 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), 69 [(set TILE:$dst, (int_x86_tilezero_internal 70 GR16:$src1, GR16:$src2))]>; 71 72 let usesCustomInserter = 1 in { 73 // Pseudo instructions, using immediates instead of tile registers. 74 // To be translated to the actual instructions in X86ISelLowering.cpp 75 let mayLoad = 1 in 76 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>; 77 let mayLoad = 1 in 78 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1, 79 sibmem:$src2), []>; 80 let mayStore = 1 in 81 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>; 82 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src), 83 [(int_x86_tilezero timm:$src)]>; 84 } 85 } // SchedRW 86} // HasAMXTILE 87 88let Predicates = [HasAMXINT8, In64BitMode] in { 89 let SchedRW = [WriteSystem] in { 90 let Constraints = "$src1 = $dst" in { 91 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 92 (ins TILE:$src1, TILE:$src2, TILE:$src3), 93 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 94 VEX_4V, T8XD; 95 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 96 (ins TILE:$src1, TILE:$src2, TILE:$src3), 97 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 98 VEX_4V, T8XS; 99 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 100 (ins TILE:$src1, TILE:$src2, TILE:$src3), 101 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 102 VEX_4V, T8PD; 103 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst), 104 (ins TILE:$src1, TILE:$src2, TILE:$src3), 105 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, 106 VEX_4V, T8PS; 107 } 108 109 // Pseduo instruction for RA. 110 let isPseudo = true, Constraints = "$src4 = $dst" in { 111 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 112 GR16:$src2, GR16:$src3, TILE:$src4, 113 TILE:$src5, TILE:$src6), 114 [(set TILE: $dst, 115 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2, 116 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 117 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 118 GR16:$src2, GR16:$src3, TILE:$src4, 119 TILE:$src5, TILE:$src6), 120 [(set TILE: $dst, 121 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2, 122 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 123 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 124 GR16:$src2, GR16:$src3, TILE:$src4, 125 TILE:$src5, TILE:$src6), 126 [(set TILE: $dst, 127 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2, 128 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 129 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 130 GR16:$src2, GR16:$src3, TILE:$src4, 131 TILE:$src5, TILE:$src6), 132 [(set TILE: $dst, 133 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2, 134 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 135 } 136 137 let usesCustomInserter = 1 in { 138 // Pseudo instructions, using immediates instead of tile registers. 139 // To be translated to the actual instructions in X86ISelLowering.cpp 140 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1, 141 u8imm:$src2, u8imm:$src3), 142 [(int_x86_tdpbssd timm:$src1, 143 timm:$src2, timm:$src3)]>; 144 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1, 145 u8imm:$src2, u8imm:$src3), 146 [(int_x86_tdpbsud timm:$src1, 147 timm:$src2, timm:$src3)]>; 148 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1, 149 u8imm:$src2, u8imm:$src3), 150 [(int_x86_tdpbusd timm:$src1, 151 timm:$src2, timm:$src3)]>; 152 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1, 153 u8imm:$src2, u8imm:$src3), 154 [(int_x86_tdpbuud timm:$src1, 155 timm:$src2, timm:$src3)]>; 156 } 157 } 158} // HasAMXTILE 159 160let Predicates = [HasAMXBF16, In64BitMode] in { 161 let SchedRW = [WriteSystem] in { 162 let Constraints = "$src1 = $dst" in 163 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 164 (ins TILE:$src1, TILE:$src2, TILE:$src3), 165 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 166 []>, VEX_4V, T8XS; 167 168 // Pseduo instruction for RA. 169 let isPseudo = true, Constraints = "$src4 = $dst" in 170 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 171 GR16:$src2, GR16:$src3, TILE:$src4, 172 TILE:$src5, TILE:$src6), 173 [(set TILE: $dst, 174 (int_x86_tdpbf16ps_internal GR16:$src1, 175 GR16:$src2, GR16:$src3, TILE:$src4, 176 TILE:$src5, TILE:$src6))]>; 177 178 let usesCustomInserter = 1 in { 179 // Pseudo instructions, using immediates instead of tile registers. 180 // To be translated to the actual instructions in X86ISelLowering.cpp 181 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1, 182 u8imm:$src2, u8imm:$src3), 183 [(int_x86_tdpbf16ps timm:$src1, 184 timm:$src2, timm:$src3)]>; 185 } 186 } 187} // HasAMXTILE, HasAMXBF16 188 189//AMX-FP16 190let Predicates = [HasAMXFP16, In64BitMode] in { 191 let SchedRW = [WriteSystem] in { 192 let Constraints = "$src1 = $dst" in { 193 def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst), 194 (ins TILE:$src1, TILE:$src2, TILE:$src3), 195 "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 196 []>, VEX_4V, T8XD; 197 } 198 199 // Pseduo instruction for RA. 200 let isPseudo = true, Constraints = "$src4 = $dst" in { 201 def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1, 202 GR16:$src2, GR16:$src3, TILE:$src4, 203 TILE:$src5, TILE:$src6), 204 [(set TILE: $dst, 205 (int_x86_tdpfp16ps_internal GR16:$src1, 206 GR16:$src2, GR16:$src3, TILE:$src4, 207 TILE:$src5, TILE:$src6))]>; 208 } 209 210 let usesCustomInserter = 1 in { 211 def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1, 212 u8imm:$src2, u8imm:$src3), 213 [(int_x86_tdpfp16ps timm:$src1, 214 timm:$src2, timm:$src3)]>; 215 } 216 } 217} // HasAMXTILE, HasAMXFP16 218 219let Predicates = [HasAMXCOMPLEX, In64BitMode] in { 220 let SchedRW = [WriteSystem] in { 221 let Constraints = "$src1 = $dst" in { 222 def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 223 (ins TILE:$src1, TILE:$src2, TILE:$src3), 224 "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 225 []>, T8PD, VEX_4V; 226 def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst), 227 (ins TILE:$src1, TILE:$src2, TILE:$src3), 228 "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}", 229 []>, VEX_4V, WIG, T8PS; 230 231 } // Constraints = "$src1 = $dst" 232 233 let Constraints = "$src4 = $dst" in { 234 def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 235 GR16:$src2, GR16:$src3, TILE:$src4, 236 TILE:$src5, TILE:$src6), 237 [(set TILE: $dst, 238 (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2, 239 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 240 def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 241 GR16:$src2, GR16:$src3, TILE:$src4, 242 TILE:$src5, TILE:$src6), 243 [(set TILE: $dst, 244 (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2, 245 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>; 246 } 247 248 let usesCustomInserter = 1 in { 249 def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1, 250 u8imm:$src2, u8imm:$src3), 251 [(int_x86_tcmmimfp16ps timm:$src1, 252 timm:$src2, timm:$src3)]>; 253 def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1, 254 u8imm:$src2, u8imm:$src3), 255 [(int_x86_tcmmrlfp16ps timm:$src1, 256 timm:$src2, timm:$src3)]>; 257 } 258 } // SchedRW = [WriteSystem] 259} 260