xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAMX.td (revision c8e7f78a3d28ff6e6223ed136ada8e1e2f34965e)
1//===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the instructions that make up the Intel AMX instruction
10// set.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// AMX instructions
16
17let Predicates = [HasAMXTILE, In64BitMode] in {
18  let SchedRW = [WriteSystem] in {
19    let hasSideEffects = 1,
20        Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21    def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
22                       "ldtilecfg\t$src",
23                       [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
24    let hasSideEffects = 1 in
25    def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
26                       "sttilecfg\t$src",
27                       [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
28    let mayLoad = 1 in
29    def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
30                      (ins sibmem:$src),
31                      "tileloadd\t{$src, $dst|$dst, $src}", []>,
32                      VEX, T8XD;
33    let mayLoad = 1 in
34    def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
35                        (ins sibmem:$src),
36                        "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
37                        VEX, T8PD;
38    let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
39    def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
40                        "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
41    let mayStore = 1 in
42    def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
43                       (ins sibmem:$dst, TILE:$src),
44                       "tilestored\t{$src, $dst|$dst, $src}", []>,
45                       VEX, T8XS;
46    def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
47                     "tilezero\t$dst", []>,
48                     VEX, T8XD;
49
50    // Pseduo instruction for RA.
51    let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
52        Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
53    def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
54    let isPseudo = true, mayLoad = 1 in
55    def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
56                                                     GR16:$src2,
57                                                     opaquemem:$src3), []>;
58    let isPseudo = true, mayLoad = 1 in
59    def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
60                                                       GR16:$src2,
61                                                       opaquemem:$src3), []>;
62    let isPseudo = true, mayStore = 1 in
63    def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
64                                            GR16:$src2, opaquemem:$src3,
65                                            TILE:$src4), []>;
66    let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
67        canFoldAsLoad = 1 in
68      def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
69                                [(set TILE:$dst, (int_x86_tilezero_internal
70                                  GR16:$src1, GR16:$src2))]>;
71
72    let usesCustomInserter = 1 in {
73      // Pseudo instructions, using immediates instead of tile registers.
74      // To be translated to the actual instructions in X86ISelLowering.cpp
75      let mayLoad = 1 in
76      def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
77      let mayLoad = 1 in
78      def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
79                                          sibmem:$src2), []>;
80      let mayStore = 1 in
81      def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
82      def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
83                              [(int_x86_tilezero timm:$src)]>;
84    }
85  } // SchedRW
86} // HasAMXTILE
87
88let Predicates = [HasAMXINT8, In64BitMode] in {
89  let SchedRW = [WriteSystem] in {
90    let Constraints = "$src1 = $dst" in {
91      def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
92                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
93                      "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
94                      VEX_4V, T8XD;
95      def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
96                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
97                      "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
98                      VEX_4V, T8XS;
99      def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
100                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
101                      "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
102                      VEX_4V, T8PD;
103      def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
104                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
105                      "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
106                      VEX_4V, T8PS;
107    }
108
109    // Pseduo instruction for RA.
110    let isPseudo = true, Constraints = "$src4 = $dst" in {
111      def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
112                              GR16:$src2, GR16:$src3, TILE:$src4,
113                              TILE:$src5, TILE:$src6),
114                              [(set TILE: $dst,
115                              (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
116                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
117      def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
118                              GR16:$src2, GR16:$src3, TILE:$src4,
119                              TILE:$src5, TILE:$src6),
120                              [(set TILE: $dst,
121                              (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
122                               GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
123      def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
124                              GR16:$src2, GR16:$src3, TILE:$src4,
125                              TILE:$src5, TILE:$src6),
126                              [(set TILE: $dst,
127                              (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
128                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
129      def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
130                              GR16:$src2, GR16:$src3, TILE:$src4,
131                              TILE:$src5, TILE:$src6),
132                              [(set TILE: $dst,
133                              (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
134                              GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
135    }
136
137    let usesCustomInserter = 1 in {
138      // Pseudo instructions, using immediates instead of tile registers.
139      // To be translated to the actual instructions in X86ISelLowering.cpp
140      def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
141                             u8imm:$src2, u8imm:$src3),
142                             [(int_x86_tdpbssd timm:$src1,
143                               timm:$src2, timm:$src3)]>;
144      def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
145                             u8imm:$src2, u8imm:$src3),
146                             [(int_x86_tdpbsud timm:$src1,
147                               timm:$src2, timm:$src3)]>;
148      def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
149                             u8imm:$src2, u8imm:$src3),
150                             [(int_x86_tdpbusd timm:$src1,
151                               timm:$src2, timm:$src3)]>;
152      def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
153                             u8imm:$src2, u8imm:$src3),
154                             [(int_x86_tdpbuud timm:$src1,
155                               timm:$src2, timm:$src3)]>;
156    }
157  }
158} // HasAMXTILE
159
160let Predicates = [HasAMXBF16, In64BitMode] in {
161  let SchedRW = [WriteSystem] in {
162    let Constraints = "$src1 = $dst" in
163    def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
164                      (ins TILE:$src1, TILE:$src2, TILE:$src3),
165                      "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
166                      []>, VEX_4V, T8XS;
167
168    // Pseduo instruction for RA.
169    let isPseudo = true, Constraints = "$src4 = $dst" in
170      def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
171                                 GR16:$src2, GR16:$src3, TILE:$src4,
172                                 TILE:$src5, TILE:$src6),
173                                 [(set TILE: $dst,
174                                  (int_x86_tdpbf16ps_internal GR16:$src1,
175                                   GR16:$src2, GR16:$src3, TILE:$src4,
176                                   TILE:$src5, TILE:$src6))]>;
177
178    let usesCustomInserter = 1 in {
179      // Pseudo instructions, using immediates instead of tile registers.
180      // To be translated to the actual instructions in X86ISelLowering.cpp
181      def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
182                               u8imm:$src2, u8imm:$src3),
183                               [(int_x86_tdpbf16ps timm:$src1,
184                                 timm:$src2, timm:$src3)]>;
185    }
186  }
187} // HasAMXTILE, HasAMXBF16
188
189//AMX-FP16
190let Predicates = [HasAMXFP16, In64BitMode] in {
191  let SchedRW = [WriteSystem] in {
192    let Constraints = "$src1 = $dst" in {
193      def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
194                        (ins TILE:$src1, TILE:$src2, TILE:$src3),
195                        "tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
196                        []>, VEX_4V, T8XD;
197    }
198
199    // Pseduo instruction for RA.
200    let isPseudo = true, Constraints = "$src4 = $dst" in {
201      def PTDPFP16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
202                                 GR16:$src2, GR16:$src3, TILE:$src4,
203                                 TILE:$src5, TILE:$src6),
204                                 [(set TILE: $dst,
205                                  (int_x86_tdpfp16ps_internal GR16:$src1,
206                                   GR16:$src2, GR16:$src3, TILE:$src4,
207                                   TILE:$src5, TILE:$src6))]>;
208    }
209
210    let  usesCustomInserter = 1 in {
211      def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
212                               u8imm:$src2, u8imm:$src3),
213                               [(int_x86_tdpfp16ps timm:$src1,
214                                 timm:$src2, timm:$src3)]>;
215    }
216  }
217} // HasAMXTILE, HasAMXFP16
218
219let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
220  let SchedRW = [WriteSystem] in {
221    let Constraints = "$src1 = $dst" in {
222      def TCMMIMFP16PS   : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
223                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
224                            "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
225                            []>, T8PD, VEX_4V;
226      def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
227                            (ins TILE:$src1, TILE:$src2, TILE:$src3),
228                            "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
229                            []>, VEX_4V, WIG, T8PS;
230
231    } // Constraints = "$src1 = $dst"
232
233    let Constraints = "$src4 = $dst" in {
234      def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
235                                  GR16:$src2, GR16:$src3, TILE:$src4,
236                                  TILE:$src5, TILE:$src6),
237                                  [(set TILE: $dst,
238                                  (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
239                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
240      def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
241                                  GR16:$src2, GR16:$src3, TILE:$src4,
242                                  TILE:$src5, TILE:$src6),
243                                  [(set TILE: $dst,
244                                  (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
245                                   GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
246    }
247
248    let usesCustomInserter = 1 in {
249      def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
250                                u8imm:$src2, u8imm:$src3),
251                                [(int_x86_tcmmimfp16ps timm:$src1,
252                                  timm:$src2, timm:$src3)]>;
253      def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
254                                u8imm:$src2, u8imm:$src3),
255                                [(int_x86_tcmmrlfp16ps timm:$src1,
256                                  timm:$src2, timm:$src3)]>;
257    }
258  } // SchedRW = [WriteSystem]
259}
260