xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX10.td (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX10 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// VNNI FP16
16let ExeDomain = SSEPackedSingle in
17defm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info,
18                                    [HasAVX10_2], [HasAVX10_2_512]>,
19                    T8, PS, EVEX_CD8<32, CD8VF>;
20
21// VNNI INT8
22defm VPDPBSSD   : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1,
23                              [HasAVX10_2], [HasAVX10_2_512]>, XD;
24defm VPDPBSSDS  : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1,
25                              [HasAVX10_2], [HasAVX10_2_512]>, XD;
26defm VPDPBSUD   : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0,
27                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
28defm VPDPBSUDS  : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0,
29                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
30defm VPDPBUUD   : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1,
31                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
32defm VPDPBUUDS  : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1,
33                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
34
35// VNNI INT16
36defm VPDPWSUD   : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0,
37                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
38defm VPDPWSUDS  : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0,
39                              [HasAVX10_2], [HasAVX10_2_512]>, XS;
40defm VPDPWUSD   : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0,
41                              [HasAVX10_2], [HasAVX10_2_512]>, PD;
42defm VPDPWUSDS  : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0,
43                              [HasAVX10_2], [HasAVX10_2_512]>, PD;
44defm VPDPWUUD   : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1,
45                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
46defm VPDPWUUDS  : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1,
47                              [HasAVX10_2], [HasAVX10_2_512]>, PS;
48
49// VMPSADBW
50defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW,
51                                          avx512vl_i16_info, avx512vl_i8_info,
52                                          HasAVX10_2>,
53                    XS, EVEX_CD8<32, CD8VF>;
54
55//-------------------------------------------------
56// AVX10 MINMAX instructions
57//-------------------------------------------------
58
59multiclass avx10_minmax_packed_base<string OpStr, X86VectorVTInfo VTI, SDNode OpNode> {
60  let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
61    defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst),
62                                (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr,
63                                "$src3, $src2, $src1", "$src1, $src2, $src3",
64                                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
65                                                (i32 timm:$src3)))>,
66                                EVEX, VVVV, Sched<[WriteFMAX]>;
67    defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst),
68                                (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr,
69                                "$src3, $src2, $src1", "$src1, $src2, $src3",
70                                (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2),
71                                                (i32 timm:$src3)))>,
72                                EVEX, VVVV,
73                                Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
74    defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst),
75                                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3),
76                                OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1",
77                                "$src1, ${src2}"#VTI.BroadcastStr#", $src3",
78                                (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2),
79                                                (i32 timm:$src3)))>,
80                                EVEX, VVVV, EVEX_B,
81                                Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
82  }
83}
84
85multiclass avx10_minmax_packed_sae<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> {
86  let Uses = []<Register>, mayRaiseFPException = 0 in
87    defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst),
88                                (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr,
89                                "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
90                                (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1),
91                                                        (VTI.info512.VT VTI.info512.RC:$src2),
92                                                        (i32 timm:$src3)))>,
93                                EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>;
94}
95
96multiclass avx10_minmax_packed<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> {
97  let Predicates = [HasAVX10_2_512] in
98    defm Z    :   avx10_minmax_packed_base<OpStr, VTI.info512, OpNode>, EVEX_V512;
99  let Predicates = [HasAVX10_2] in {
100    defm Z256 :   avx10_minmax_packed_base<OpStr, VTI.info256, OpNode>, EVEX_V256;
101    defm Z128 :   avx10_minmax_packed_base<OpStr, VTI.info128, OpNode>, EVEX_V128;
102  }
103}
104
105multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
106                                SDNode OpNodeSAE> {
107  let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
108    let mayRaiseFPException = 1 in {
109      let isCodeGenOnly = 1 in {
110        def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
111                            (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
112                             !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
113                             [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
114                       Sched<[WriteFMAX]>;
115
116        def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
117                            (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
118                             !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
119                             [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
120                                                       (i32 timm:$src3)))]>,
121                       Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
122      }
123      defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
124                                 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
125                                  OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
126                                  (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
127                                                (i32 timm:$src3))),
128                                 0, 0, 0, vselect_mask, "", "_Int">,
129                       Sched<[WriteFMAX]>;
130
131      defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
132                                 (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
133                                  OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
134                                  (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
135                                                (i32 timm:$src3))),
136                                 0, 0, 0, vselect_mask, "", "_Int">,
137                       Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
138    }
139    let Uses = []<Register>, mayRaiseFPException = 0 in
140      defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
141                                  (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
142                                   OpStr, "$src3, {sae}, $src2, $src1",
143                                   "$src1, $src2, {sae}, $src3",
144                                   (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
145                                                    (i32 timm:$src3))),
146                                  0, 0, 0, vselect_mask, "", "_Int">,
147                       Sched<[WriteFMAX]>, EVEX_B;
148  }
149}
150
151
152let mayRaiseFPException = 0 in
153defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>,
154                   AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA;
155
156defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>,
157                 avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>,
158                 AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>;
159
160defm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>,
161                 avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>,
162                 AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>;
163
164defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>,
165                 avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>,
166                 AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>;
167
168defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>,
169                 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W;
170defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>,
171                 AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA;
172defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>,
173                 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>;
174
175//-------------------------------------------------
176// AVX10 SATCVT instructions
177//-------------------------------------------------
178
179multiclass avx10_sat_cvt_rmb<bits<8> Opc, string OpStr, X86FoldableSchedWrite sched,
180                             X86VectorVTInfo DestInfo,
181                             X86VectorVTInfo SrcInfo,
182                             SDNode MaskNode> {
183  defm rr: AVX512_maskable<Opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
184                           (ins SrcInfo.RC:$src), OpStr, "$src", "$src",
185                           (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src)))>,
186                          Sched<[sched]>;
187  defm rm: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
188                           (ins SrcInfo.MemOp:$src), OpStr, "$src", "$src",
189                           (DestInfo.VT (MaskNode (SrcInfo.VT
190                           (SrcInfo.LdFrag addr:$src))))>,
191                          Sched<[sched.Folded, sched.ReadAfterFold]>;
192  defm rmb: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
193                            (ins SrcInfo.ScalarMemOp:$src), OpStr,
194                            "${src}"#SrcInfo.BroadcastStr, "${src}"#SrcInfo.BroadcastStr,
195                            (DestInfo.VT (MaskNode (SrcInfo.VT
196                            (SrcInfo.BroadcastLdFrag addr:$src))))>, EVEX_B,
197                            Sched<[sched.Folded, sched.ReadAfterFold]>;
198}
199
200// Conversion with rounding control (RC)
201multiclass avx10_sat_cvt_rc<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
202                            AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
203                            SDNode MaskNode> {
204  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in
205  defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512,
206                              (outs DestInfo.info512.RC:$dst),
207                              (ins SrcInfo.info512.RC:$src, AVX512RC:$rc),
208                              OpStr, "$rc, $src", "$src, $rc",
209                              (DestInfo.info512.VT
210                                (MaskNode (SrcInfo.info512.VT SrcInfo.info512.RC:$src),
211                                          (i32 timm:$rc)))>,
212                             Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B;
213}
214
215// Conversion with SAE
216multiclass avx10_sat_cvt_sae<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
217                             AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
218                             SDNode Node> {
219  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in
220  defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512,
221                             (outs DestInfo.info512.RC:$dst),
222                             (ins SrcInfo.info512.RC:$src),
223                             OpStr, "{sae}, $src", "$src, {sae}",
224                             (DestInfo.info512.VT
225                               (Node (SrcInfo.info512.VT SrcInfo.info512.RC:$src)))>,
226                             Sched<[sched.ZMM]>, EVEX, EVEX_B;
227}
228
229multiclass avx10_sat_cvt_base<bits<8> Opc, string OpStr, X86SchedWriteWidths sched,
230                               SDNode MaskNode, AVX512VLVectorVTInfo DestInfo,
231                               AVX512VLVectorVTInfo SrcInfo> {
232  let Predicates = [HasAVX10_2_512] in
233  defm Z : avx10_sat_cvt_rmb<Opc, OpStr, sched.ZMM,
234           DestInfo.info512, SrcInfo.info512,
235           MaskNode>,
236      EVEX, EVEX_V512;
237  let Predicates = [HasAVX10_2] in {
238    defm Z256
239        : avx10_sat_cvt_rmb<Opc, OpStr, sched.YMM,
240           DestInfo.info256, SrcInfo.info256,
241          MaskNode>,
242          EVEX, EVEX_V256;
243    defm Z128
244        : avx10_sat_cvt_rmb<Opc, OpStr, sched.XMM,
245          DestInfo.info128, SrcInfo.info128,
246          MaskNode>,
247          EVEX, EVEX_V128;
248  }
249}
250
251defm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs",
252                                       SchedWriteVecIMul, X86vcvtp2ibs,
253                                       avx512vl_i16_info, avx512vl_bf16_info>,
254                      AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
255defm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs",
256                                        SchedWriteVecIMul, X86vcvtp2iubs,
257                                        avx512vl_i16_info, avx512vl_bf16_info>,
258                       AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
259
260defm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul,
261                                     X86vcvtp2ibs, avx512vl_i16_info,
262                                     avx512vl_f16_info>,
263                  avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul,
264                                   avx512vl_i16_info, avx512vl_f16_info,
265                                   X86vcvtp2ibsRnd>,
266                  AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
267defm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul,
268                                      X86vcvtp2iubs, avx512vl_i16_info,
269                                      avx512vl_f16_info>,
270                   avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul,
271                                    avx512vl_i16_info, avx512vl_f16_info,
272                                    X86vcvtp2iubsRnd>,
273                   AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
274
275defm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul,
276                                     X86vcvtp2ibs, avx512vl_i32_info,
277                                     avx512vl_f32_info>,
278                  avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul,
279                                   avx512vl_i32_info, avx512vl_f32_info,
280                                   X86vcvtp2ibsRnd>,
281                  AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
282defm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul,
283                                      X86vcvtp2iubs, avx512vl_i32_info,
284                                      avx512vl_f32_info>,
285                   avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul,
286                                    avx512vl_i32_info, avx512vl_f32_info,
287                                    X86vcvtp2iubsRnd>,
288                   AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
289
290defm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs",
291                                        SchedWriteVecIMul, X86vcvttp2ibs,
292                                        avx512vl_i16_info, avx512vl_bf16_info>,
293                       AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
294defm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs",
295                                         SchedWriteVecIMul, X86vcvttp2iubs,
296                                         avx512vl_i16_info, avx512vl_bf16_info>,
297                        AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
298
299defm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul,
300                                      X86vcvttp2ibs, avx512vl_i16_info,
301                                      avx512vl_f16_info>,
302                   avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul,
303                                     avx512vl_i16_info, avx512vl_f16_info,
304                                     X86vcvttp2ibsSAE>,
305                   AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
306defm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul,
307                                       X86vcvttp2iubs, avx512vl_i16_info,
308                                       avx512vl_f16_info>,
309                    avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul,
310                                      avx512vl_i16_info, avx512vl_f16_info,
311                                      X86vcvttp2iubsSAE>,
312                    AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>;
313
314defm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul,
315                                      X86vcvttp2ibs, avx512vl_i32_info,
316                                      avx512vl_f32_info>,
317                   avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul,
318                                     avx512vl_i32_info, avx512vl_f32_info,
319                                     X86vcvttp2ibsSAE>,
320                   AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
321defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
322                                       X86vcvttp2iubs, avx512vl_i32_info,
323                                       avx512vl_f32_info>,
324                    avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul,
325                                      avx512vl_i32_info, avx512vl_f32_info,
326                                      X86vcvttp2iubsSAE>,
327                    AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>;
328
329//-------------------------------------------------
330// AVX10 SATCVT-DS instructions
331//-------------------------------------------------
332
333// Convert Double to Signed/Unsigned Doubleword with truncation.
334multiclass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
335                            SDNode MaskOpNode, SDNode OpNodeSAE,
336                            X86SchedWriteWidths sched> {
337  let Predicates = [HasAVX10_2_512] in {
338    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
339                            MaskOpNode, sched.ZMM>,
340             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
341                               OpNodeSAE, sched.ZMM>, EVEX_V512;
342  }
343  let Predicates = [HasAVX10_2] in {
344    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
345                              null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
346                              f128mem, VK2WM>, EVEX_V128;
347    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
348                              MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
349  }
350
351  let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
352    defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE,
353                                  sched.YMM>, EVEX_V256;
354  }
355
356
357  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
358                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
359                   VR128X:$src), 0, "att">;
360  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
361                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
362                   VK2WM:$mask, VR128X:$src), 0, "att">;
363  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
364                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
365                   VK2WM:$mask, VR128X:$src), 0, "att">;
366  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
367                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
368                   f64mem:$src), 0, "att">;
369  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
370                  "$dst {${mask}}, ${src}{1to2}}",
371                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
372                   VK2WM:$mask, f64mem:$src), 0, "att">;
373  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
374                  "$dst {${mask}} {z}, ${src}{1to2}}",
375                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
376                   VK2WM:$mask, f64mem:$src), 0, "att">;
377
378  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
379                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
380                   VR256X:$src), 0, "att">;
381  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}",
382                  (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst,
383                   VR256X:$src), 0, "att">;
384  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
385                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
386                   VK4WM:$mask, VR256X:$src), 0, "att">;
387  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}",
388                  (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst,
389                   VK4WM:$mask, VR256X:$src), 0, "att">;
390  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
391                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
392                   VK4WM:$mask, VR256X:$src), 0, "att">;
393  def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}",
394                  (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst,
395                   VK4WM:$mask, VR256X:$src), 0, "att">;
396  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
397                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
398                   f64mem:$src), 0, "att">;
399  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
400                  "$dst {${mask}}, ${src}{1to4}}",
401                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
402                   VK4WM:$mask, f64mem:$src), 0, "att">;
403  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
404                  "$dst {${mask}} {z}, ${src}{1to4}}",
405                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
406                   VK4WM:$mask, f64mem:$src), 0, "att">;
407}
408
409// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled
410multiclass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
411                            SDNode MaskOpNode, SDNode OpNodeRnd,
412                            X86SchedWriteWidths sched> {
413   let Predicates = [HasAVX10_2_512] in {
414     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
415                            MaskOpNode, sched.ZMM>,
416              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
417                                OpNodeRnd, sched.ZMM>, EVEX_V512;
418   }
419   let Predicates = [HasAVX10_2] in {
420     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
421                               MaskOpNode, sched.XMM>, EVEX_V128;
422     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
423                               MaskOpNode, sched.YMM>, EVEX_V256;
424   }
425   let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
426     defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info,
427                                   OpNodeRnd, sched.YMM>, EVEX_V256;
428   }
429}
430
431// Convert Float to Signed/Unsigned Quardword with truncation
432multiclass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
433                            SDNode MaskOpNode, SDNode OpNodeRnd,
434                            X86SchedWriteWidths sched> {
435  let Predicates = [HasAVX10_2_512] in {
436    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
437                           MaskOpNode, sched.ZMM>,
438             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
439                               OpNodeRnd, sched.ZMM>, EVEX_V512;
440  }
441  let Predicates = [HasAVX10_2] in {
442    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
443                              MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
444                              (v2i64 (OpNode (bc_v4f32 (v2f64
445                               (scalar_to_vector (loadf64 addr:$src)))))),
446                              (v2i64 (MaskOpNode (bc_v4f32 (v2f64
447                               (scalar_to_vector (loadf64 addr:$src))))))>,
448                              EVEX_V128;
449    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
450                              MaskOpNode, sched.YMM>, EVEX_V256;
451  }
452
453  let Predicates = [HasAVX10_2], hasEVEX_U=1 in {
454    defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd,
455                                  sched.YMM>, EVEX_V256;
456  }
457}
458
459// Convert Float to Signed/Unsigned Doubleword with truncation
460multiclass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
461                            SDNode MaskOpNode,
462                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
463  let Predicates = [HasAVX10_2_512] in {
464    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
465                           MaskOpNode, sched.ZMM>,
466             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
467                                OpNodeSAE, sched.ZMM>, EVEX_V512;
468  }
469
470  let Predicates = [HasAVX10_2] in {
471    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
472                              MaskOpNode, sched.XMM>, EVEX_V128;
473    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
474                              MaskOpNode, sched.YMM>, EVEX_V256;
475  }
476}
477
478defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis,
479                                    X86cvttp2sis, X86cvttp2sisSAE,
480                                    SchedWriteCvtPD2DQ>,
481                                    PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
482defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis,
483                                     X86cvttp2uis, X86cvttp2uisSAE,
484                                     SchedWriteCvtPD2DQ>,
485                                     REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>;
486defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis,
487                                    X86cvttp2sis, X86cvttp2sisSAE,
488                                    SchedWriteCvtPS2DQ>, T_MAP5,PS,
489                                    EVEX_CD8<32, CD8VF>;
490defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis,
491                                     X86cvttp2uis, X86cvttp2uisSAE,
492                                     SchedWriteCvtPS2DQ>, T_MAP5,PS,
493                                     EVEX_CD8<32, CD8VF>;
494defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis,
495                                    X86cvttp2sis, X86cvttp2sisSAE,
496                                    SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
497                                    EVEX_CD8<64, CD8VF>;
498defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis,
499                                    X86cvttp2sis, X86cvttp2sisSAE,
500                                    SchedWriteCvtPS2DQ>, T_MAP5,PD,
501                                    EVEX_CD8<32, CD8VH>;
502defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis,
503                                     X86cvttp2uis, X86cvttp2uisSAE,
504                                     SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD,
505                                     EVEX_CD8<64, CD8VF>;
506defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis,
507                                     X86cvttp2uis, X86cvttp2uisSAE,
508                                     SchedWriteCvtPS2DQ>, T_MAP5,PD,
509                                     EVEX_CD8<32, CD8VH>;
510
511let Predicates = [HasAVX10_2] in {
512// Special patterns to allow use of X86mcvttp2si for masking. Instruction
513// patterns have been disabled with null_frag.
514// Patterns VCVTTPD2DQSZ128
515
516// VCVTTPD2DQS
517def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))),
518          (VCVTTPD2DQSZ128rr VR128X:$src)>;
519def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
520          (VCVTTPD2DQSZ256rr VR256X:$src)>;
521def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
522          (VCVTTPD2DQSZrr VR512:$src)>;
523
524// VCVTTPD2QQS
525def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
526          (VCVTTPD2QQSZ128rr VR128X:$src)>;
527def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
528          (VCVTTPD2QQSZ256rr VR256X:$src)>;
529def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
530          (VCVTTPD2QQSZrr VR512:$src)>;
531
532// VCVTTPD2UDQS
533def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))),
534          (VCVTTPD2UDQSZ128rr VR128X:$src)>;
535def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
536          (VCVTTPD2UDQSZ256rr VR256X:$src)>;
537def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
538          (VCVTTPD2UDQSZrr VR512:$src)>;
539
540// VCVTTPD2UQQS
541def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
542          (VCVTTPD2UQQSZ128rr VR128X:$src)>;
543def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
544          (VCVTTPD2UQQSZ256rr VR256X:$src)>;
545def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
546          (VCVTTPD2UQQSZrr VR512:$src)>;
547
548// VCVTTPS2DQS
549def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
550          (VCVTTPS2DQSZ128rr VR128X:$src)>;
551def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
552          (VCVTTPS2DQSZ256rr VR256X:$src)>;
553def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
554          (VCVTTPS2DQSZrr VR512:$src)>;
555
556// VCVTTPS2QQS
557def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))),
558          (VCVTTPS2QQSZ128rr VR128X:$src)>;
559def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
560          (VCVTTPS2QQSZ256rr VR128X:$src)>;
561def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
562          (VCVTTPS2QQSZrr VR256X:$src)>;
563
564// VCVTTPS2UDQS
565def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
566          (VCVTTPS2UDQSZ128rr VR128X:$src)>;
567def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
568          (VCVTTPS2UDQSZ256rr VR256X:$src)>;
569def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
570          (VCVTTPS2UDQSZrr VR512:$src)>;
571
572// VCVTTPS2UQQS
573def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))),
574          (VCVTTPS2UQQSZ128rr VR128X:$src)>;
575def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
576          (VCVTTPS2UQQSZ256rr VR128X:$src)>;
577def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
578          (VCVTTPS2UQQSZrr VR256X:$src)>;
579
580def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
581          (VCVTTPD2DQSZ128rr VR128X:$src)>;
582def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
583          (VCVTTPD2DQSZ128rm addr:$src)>;
584def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))),
585          (VCVTTPD2DQSZ128rmb addr:$src)>;
586def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
587           VK2WM:$mask),
588          (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
589def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
590           VK2WM:$mask),
591          (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
592def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
593           VK2WM:$mask),
594          (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
595def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
596           VK2WM:$mask),
597          (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>;
598def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
599           (v4i32 VR128X:$src0), VK2WM:$mask),
600          (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
601def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)),
602           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
603          (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>;
604
605// Patterns VCVTTPD2UDQSZ128
606def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
607          (VCVTTPD2UDQSZ128rmb addr:$src)>;
608def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))),
609          (VCVTTPD2UDQSZ128rr VR128X:$src)>;
610def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))),
611          (VCVTTPD2UDQSZ128rmb addr:$src)>;
612def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
613           VK2WM:$mask),
614          (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
615def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
616           VK2WM:$mask),
617          (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>;
618def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
619           VK2WM:$mask),
620          (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
621def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
622           VK2WM:$mask),
623          (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>;
624def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
625           (v4i32 VR128X:$src0), VK2WM:$mask),
626          (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
627def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)),
628           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
629          (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>;
630}
631
632// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation.
633multiclass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
634                          X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
635                          SDNode OpNodeInt, SDNode OpNodeSAE,
636                          X86FoldableSchedWrite sched> {
637  let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in {
638    let isCodeGenOnly = 1 in {
639    def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
640                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
641                      [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>,
642                    EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
643    def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
644                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
645                      [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>,
646                    EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
647  }
648  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
649                      !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
650                        [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
651                      EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
652  let Uses = [MXCSR] in
653  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
654                       !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
655                        [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
656                      EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
657  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
658                      (ins _SrcRC.IntScalarMemOp:$src),
659                      !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
660                        [(set _DstRC.RC:$dst,
661                          (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
662                      EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>,
663                      SIMD_EXC;
664  }
665}
666
667defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info,
668                                 fp_to_sint_sat, X86cvttss2Int,
669                                 X86cvttss2IntSAE, WriteCvtSS2I>,
670                                 T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
671defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info,
672                                   fp_to_sint_sat, X86cvttss2Int,
673                                   X86cvttss2IntSAE, WriteCvtSS2I>,
674                                   REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
675defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info,
676                                 fp_to_sint_sat, X86cvttss2Int,
677                                 X86cvttss2IntSAE, WriteCvtSD2I>,
678                                 T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
679defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info,
680                                   fp_to_sint_sat, X86cvttss2Int,
681                                   X86cvttss2IntSAE, WriteCvtSD2I>,
682                                   REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
683defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info,
684                                  fp_to_uint_sat, X86cvttss2UInt,
685                                  X86cvttss2UIntSAE, WriteCvtSS2I>,
686                                  T_MAP5,XS, EVEX_CD8<32, CD8VT1>;
687defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info,
688                                    fp_to_uint_sat, X86cvttss2UInt,
689                                     X86cvttss2UIntSAE, WriteCvtSS2I>,
690                                    T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>;
691defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info,
692                                  fp_to_uint_sat, X86cvttss2UInt,
693                                  X86cvttss2UIntSAE, WriteCvtSD2I>,
694                                  T_MAP5,XD, EVEX_CD8<64, CD8VT1>;
695defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info,
696                                    fp_to_uint_sat, X86cvttss2UInt,
697                                    X86cvttss2UIntSAE, WriteCvtSD2I>,
698                                    T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>;
699
700//-------------------------------------------------
701// AVX10 CONVERT instructions
702//-------------------------------------------------
703
704multiclass avx10_cvt2ps2ph_rc<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
705                              X86VectorVTInfo _Src, X86VectorVTInfo _,
706                              SDNode OpNodeRnd> {
707  let Uses = [MXCSR] in
708    defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
709                               (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
710                               "$rc, $src2, $src1", "$src1, $src2, $rc",
711                               (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
712                                                (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
713                              EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>;
714}
715
716//TODO: Merge into avx512_binop_all, difference is rounding control added here.
717multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr,
718                           X86SchedWriteWidths sched,
719                           AVX512VLVectorVTInfo _SrcVTInfo,
720                           AVX512VLVectorVTInfo _DstVTInfo,
721                           SDNode OpNode, SDNode OpNodeRnd> {
722  let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in {
723    defm Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
724                              _SrcVTInfo.info512, _DstVTInfo.info512,
725                              _SrcVTInfo.info512>,
726             avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.ZMM,
727                                _SrcVTInfo.info512, _DstVTInfo.info512,
728                                OpNodeRnd>,
729             EVEX_V512, EVEX_CD8<32, CD8VF>;
730  }
731  let Predicates = [HasAVX10_2] in {
732    defm Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
733                                 _SrcVTInfo.info256, _DstVTInfo.info256,
734                                 _SrcVTInfo.info256>,
735                                EVEX_V256, EVEX_CD8<32, CD8VF>;
736    defm Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
737                                 _SrcVTInfo.info128, _DstVTInfo.info128,
738                                 _SrcVTInfo.info128>,
739                EVEX_V128, EVEX_CD8<32, CD8VF>;
740  }
741}
742
743defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx",
744                                   SchedWriteCvtPD2PS,
745                                   avx512vl_f32_info, avx512vl_f16_info,
746                                   X86vfpround2, X86vfpround2Rnd>, T8;
747
748defm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS,
749                                     avx512vl_f16_info, avx512vl_i8_info,
750                                     X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>,
751                                    EVEX_CD8<16, CD8VF>, T8, XD;
752defm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS,
753                                      avx512vl_f16_info, avx512vl_i8_info,
754                                      X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>,
755                                     EVEX_CD8<16, CD8VF>, T_MAP5, XD;
756defm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS,
757                                     avx512vl_f16_info, avx512vl_i8_info,
758                                     X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>,
759                                    EVEX_CD8<16, CD8VF>, T_MAP5, XD;
760defm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS,
761                                      avx512vl_f16_info, avx512vl_i8_info,
762                                      X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>,
763                                     EVEX_CD8<16, CD8VF>, T_MAP5, XD;
764
765//TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here.
766multiclass avx10_convert_3op_packed<bits<8> OpCode, string OpcodeStr,
767                                    X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1,
768                                    X86VectorVTInfo vt_src2, SDPatternOperator OpNode,
769                                    SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched,
770                                    string Broadcast = vt_src2.BroadcastStr,
771                                    X86MemOperand MemOp = vt_src2.MemOp,
772                                    RegisterClass MaskRC = vt_src2.KRCWM,
773                                    dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1),
774                                                           (vt_src2.VT (vt_src2.LdFrag addr:$src2)))),
775                                    dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
776                                                               (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> {
777  defm rr : AVX512_maskable_cvt<OpCode, MRMSrcReg, vt_dst, (outs vt_dst.RC:$dst),
778                      (ins vt_src1.RC:$src1, vt_src2.RC:$src2),
779                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2),
780                      (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2),
781                      OpcodeStr, "$src2, $src1", "$src1, $src2",
782                      (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1),
783                                         (vt_src2.VT vt_src2.RC:$src2))),
784                      (vselect_mask MaskRC:$mask,
785                        (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
786                        (vt_src2.VT vt_src2.RC:$src2))),
787                        vt_dst.RC:$src0),
788                      (vselect_mask MaskRC:$mask,
789                        (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1),
790                        (vt_src2.VT vt_src2.RC:$src2))),
791                        vt_dst.ImmAllZerosV)>,
792                      EVEX, VVVV, Sched<[sched]>;
793  let mayLoad = 1 in
794  defm rm : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst),
795                      (ins vt_src1.RC:$src1, MemOp:$src2),
796                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2),
797                      (ins MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2),
798                      OpcodeStr, "$src2, $src1", "$src1, $src2",
799                      LdDAG,
800                      (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.RC:$src0),
801                      (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.ImmAllZerosV)>,
802                      EVEX, VVVV, Sched<[sched]>;
803
804  let mayLoad = 1 in
805  defm rmb : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst),
806                      (ins vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2),
807                      (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1,
808                           vt_src2.ScalarMemOp:$src2),
809                      (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2),
810                      OpcodeStr,
811                      "${src2}"#Broadcast#", $src1", "$src1, ${src2}"#Broadcast,
812                      (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT
813                                  (vt_src2.BroadcastLdFrag addr:$src2)))),
814                      (vselect_mask MaskRC:$mask,
815                                       (vt_dst.VT
816                                        (MaskOpNode
817                                         (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT
818                                          (vt_src2.BroadcastLdFrag addr:$src2)))),
819                                       vt_dst.RC:$src0),
820                      (vselect_mask MaskRC:$mask,
821                                       (vt_dst.VT
822                                        (MaskOpNode
823                                         (vt_src1.VT vt_src1.RC:$src1),
824                                         (vt_src2.VT
825                                          (vt_src2.BroadcastLdFrag addr:$src2)))),
826                                       vt_dst.ImmAllZerosV)>,
827                      EVEX, VVVV, EVEX_B, Sched<[sched]>;
828}
829
830//TODO: Merge into avx512_cvt_trunc
831multiclass avx10_convert_3op<bits<8> OpCode, string OpcodeStr,
832           AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src,
833           X86SchedWriteWidths sched,
834           SDPatternOperator OpNode,
835           SDPatternOperator MaskOpNode,
836           PatFrag bcast128 = vt_src.info128.BroadcastLdFrag,
837           PatFrag loadVT128 = vt_src.info128.LdFrag,
838           RegisterClass maskRC128 = vt_src.info128.KRCWM> {
839  let Predicates = [HasAVX10_2_512] in
840    defm Z : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info256,
841               vt_dst.info512, vt_src.info512, OpNode, OpNode, sched.ZMM>,
842               EVEX_V512, EVEX_CD8<16, CD8VF>;
843  let Predicates = [HasAVX10_2] in {
844    defm Z256 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128,
845                  vt_dst.info256, vt_src.info256, OpNode, OpNode, sched.YMM>,
846                  EVEX_V256, EVEX_CD8<16, CD8VF>;
847    defm Z128 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128,
848                  vt_dst.info128, vt_src.info128,
849                  null_frag, null_frag, sched.XMM>,
850                  EVEX_V128, EVEX_CD8<16, CD8VF>;
851    // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction
852    // patterns have been disabled with null_frag.
853    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
854                                         (vt_src.info128.VT VR128X:$src2))),
855              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>;
856    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
857                          (vt_src.info128.VT VR128X:$src2),
858                          (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask),
859              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask,
860                          VR128X:$src1, VR128X:$src2)>;
861    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
862                          (vt_src.info128.VT VR128X:$src2),
863                          vt_dst.info128.ImmAllZerosV, maskRC128:$mask),
864              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask,
865                          VR128X:$src1, VR128X:$src2)>;
866
867    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
868                                         (loadVT128 addr:$src2))),
869              (!cast<Instruction>(NAME # "Z128rm") VR128X:$src1, addr:$src2)>;
870    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
871                          (loadVT128 addr:$src2),
872                          (vt_dst.info128.VT VR128X:$src0),
873                          maskRC128:$mask),
874              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask,
875                          VR128X:$src1, addr:$src2)>;
876    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
877                          (loadVT128 addr:$src2),
878                          vt_dst.info128.ImmAllZerosV,
879                          maskRC128:$mask),
880              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask,
881                          VR128X:$src1, addr:$src2)>;
882
883    def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1),
884                                         (vt_src.info128.VT (bcast128 addr:$src2)))),
885              (!cast<Instruction>(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>;
886    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
887                          (vt_src.info128.VT (bcast128 addr:$src2)),
888                          (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask),
889              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask,
890                           VR128X:$src1, addr:$src2)>;
891    def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1),
892                          (vt_src.info128.VT (bcast128 addr:$src2)),
893                          vt_dst.info128.ImmAllZerosV, maskRC128:$mask),
894              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask,
895                           VR128X:$src1, addr:$src2)>;
896  }
897}
898
899defm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8",
900                                        avx512vl_i8_info, avx512vl_f16_info,
901                                        SchedWriteCvtPD2PS,
902                                        X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>,
903                                        T8, PS;
904defm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s",
905                                         avx512vl_i8_info, avx512vl_f16_info,
906                                         SchedWriteCvtPD2PS,
907                                         X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>,
908                                         T_MAP5, PS;
909defm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8",
910                                        avx512vl_i8_info, avx512vl_f16_info,
911                                        SchedWriteCvtPD2PS,
912                                        X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>,
913                                        T_MAP5, PS;
914defm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s",
915                                         avx512vl_i8_info, avx512vl_f16_info,
916                                         SchedWriteCvtPD2PS,
917                                         X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>,
918                                         T_MAP5, PS;
919
920defm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info,
921                                        avx512vl_f16_info, SchedWriteCvtPD2PS,
922                                        X86vcvtph2bf8, X86vmcvtph2bf8,
923                                        [HasAVX10_2], [HasAVX10_2_512]>,
924                                        T8, XS, EVEX_CD8<16, CD8VF>;
925
926defm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info,
927                                         avx512vl_f16_info, SchedWriteCvtPD2PS,
928                                         X86vcvtph2bf8s, X86vmcvtph2bf8s,
929                                         [HasAVX10_2], [HasAVX10_2_512]>,
930                                         T_MAP5, XS, EVEX_CD8<16, CD8VF>;
931
932defm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info,
933                                        avx512vl_f16_info, SchedWriteCvtPD2PS,
934                                        X86vcvtph2hf8, X86vmcvtph2hf8,
935                                        [HasAVX10_2], [HasAVX10_2_512]>,
936                                        T_MAP5, XS, EVEX_CD8<16, CD8VF>;
937
938defm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info,
939                                         avx512vl_f16_info, SchedWriteCvtPD2PS,
940                                         X86vcvtph2hf8s, X86vmcvtph2hf8s,
941                                         [HasAVX10_2], [HasAVX10_2_512]>,
942                                         T_MAP5, XS, EVEX_CD8<16, CD8VF>;
943
944multiclass avx10_convert_2op_nomb_packed<bits<8> opc, string OpcodeStr,
945                                         X86VectorVTInfo _dest, X86VectorVTInfo _src,
946                                         SDNode OpNode, X86MemOperand x86memop,
947                                         X86FoldableSchedWrite sched,
948                                         dag ld_dag = (load addr:$src)> {
949  let ExeDomain = _dest.ExeDomain in {
950  defm rr : AVX512_maskable_split<opc, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
951                                  (ins _src.RC:$src), OpcodeStr, "$src", "$src",
952                                  (OpNode (_src.VT _src.RC:$src)),
953                                  (OpNode (_src.VT _src.RC:$src))>,
954                                 Sched<[sched]>;
955  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _dest, (outs _dest.RC:$dst),
956                                  (ins x86memop:$src), OpcodeStr, "$src", "$src",
957                                  (OpNode (_src.VT ld_dag)),
958                                  (OpNode (_src.VT ld_dag))>,
959                                 Sched<[sched.Folded]>;
960  }
961}
962
963multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest,
964                                  AVX512VLVectorVTInfo _src, bits<8> opc, SDNode OpNode> {
965  let Predicates = [HasAVX10_2_512] in
966  defm Z : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info512, _src.info256,
967                                         OpNode, f256mem, WriteCvtPH2PSZ>, EVEX_V512;
968  let Predicates = [HasAVX10_2] in {
969  defm Z128 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info128, _src.info128,
970                                            OpNode, f64mem, WriteCvtPH2PSZ>, EVEX_V128;
971  defm Z256 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info256, _src.info128,
972                                            OpNode, f128mem, WriteCvtPH2PSZ>, EVEX_V256;
973  }
974}
975
976defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info,
977                                         avx512vl_i8_info, 0x1e, X86vcvthf82ph>,
978                  AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>;
979
980//-------------------------------------------------
981// AVX10 BF16 instructions
982//-------------------------------------------------
983
984// VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16
985multiclass avx10_fp_binop_int_bf16<bits<8> opc, string OpcodeStr,
986                                      X86SchedWriteSizes sched,
987                                      bit IsCommutable = 0> {
988  let Predicates = [HasAVX10_2_512] in
989    defm Z : avx512_fp_packed<opc, OpcodeStr,
990                              !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"),
991                              !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"),
992                              v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
993                              T_MAP5, PD, EVEX_CD8<16, CD8VF>;
994  let Predicates = [HasAVX10_2] in {
995    defm Z128 : avx512_fp_packed<opc, OpcodeStr,
996                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"),
997                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"),
998                                 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
999                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1000    defm Z256 : avx512_fp_packed<opc, OpcodeStr,
1001                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"),
1002                                 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"),
1003                                 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
1004                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1005  }
1006}
1007
1008multiclass avx10_fp_binop_bf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
1009                                X86SchedWriteSizes sched,
1010                                bit IsCommutable = 0,
1011                                SDPatternOperator MaskOpNode = OpNode> {
1012  let Predicates = [HasAVX10_2_512] in
1013    defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1014                              v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512,
1015                              T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1016  let Predicates = [HasAVX10_2] in {
1017    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1018                                 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128,
1019                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1020    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode,
1021                                 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256,
1022                                 T_MAP5, PD, EVEX_CD8<16, CD8VF>;
1023  }
1024}
1025
1026let Uses = []<Register>, mayRaiseFPException = 0 in {
1027defm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>;
1028defm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>;
1029defm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>;
1030defm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>;
1031defm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>;
1032defm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>;
1033}
1034
1035// VCOMISBF16
1036let Uses = []<Register>, mayRaiseFPException = 0,
1037  Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
1038  //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *`
1039  //which may require extend supports on BFR16X, loadbf16, ...
1040  defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16,
1041                                   "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX,
1042                                   VEX_LIG, EVEX_CD8<16, CD8VT1>;
1043
1044  let isCodeGenOnly = 1 in {
1045    defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem,
1046                                         sse_load_bf16, "comisbf16", SSEPackedSingle>,
1047                                         T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
1048  }
1049}
1050
1051// VCMPBF16
1052multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1053  let mayRaiseFPException = 0 in {
1054  defm rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1055                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1056                   "vcmp"#_.Suffix,
1057                   "$cc, $src2, $src1", "$src1, $src2, $cc",
1058                   (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1059                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1060                   1>, Sched<[sched]>;
1061
1062  defm rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1063                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
1064                "vcmp"#_.Suffix,
1065                "$cc, $src2, $src1", "$src1, $src2, $cc",
1066                (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
1067                         timm:$cc),
1068                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
1069                            timm:$cc)>,
1070                Sched<[sched.Folded, sched.ReadAfterFold]>;
1071
1072  defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1073                (outs _.KRC:$dst),
1074                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1075                "vcmp"#_.Suffix,
1076                "$cc, ${src2}"#_.BroadcastStr#", $src1",
1077                "$src1, ${src2}"#_.BroadcastStr#", $cc",
1078                (X86cmpm (_.VT _.RC:$src1),
1079                         (_.VT (_.BroadcastLdFrag addr:$src2)),
1080                         timm:$cc),
1081                (X86cmpm_su (_.VT _.RC:$src1),
1082                            (_.VT (_.BroadcastLdFrag addr:$src2)),
1083                            timm:$cc)>,
1084                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
1085  }
1086}
1087
1088multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
1089  let Predicates = [HasAVX10_2_512] in
1090    defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512;
1091  let Predicates = [HasAVX10_2] in {
1092    defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128;
1093    defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256;
1094  }
1095}
1096
1097defm VCMPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>,
1098                                 AVX512XDIi8Base, EVEX, VVVV,
1099                                 EVEX_CD8<16, CD8VF>, TA;
1100
1101
1102// VSQRTBF16
1103multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr,
1104                                  X86SchedWriteSizes sched> {
1105  let Predicates = [HasAVX10_2_512] in
1106  defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1107                              sched.PH.ZMM, v32bf16_info>,
1108                              EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1109  let Predicates = [HasAVX10_2] in {
1110    defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1111                                   sched.PH.XMM, v8bf16x_info>,
1112                                   EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1113    defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"),
1114                                   sched.PH.YMM, v16bf16x_info>,
1115                                   EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>;
1116  }
1117}
1118
1119let Uses = []<Register>, mayRaiseFPException = 0 in
1120defm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>;
1121
1122// VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16
1123multiclass avx10_fp14_bf16<bits<8> opc, string OpcodeStr, SDNode OpNode,
1124                            X86SchedWriteWidths sched> {
1125  let Predicates = [HasAVX10_2_512] in
1126  defm BF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1127                             OpNode, sched.ZMM, v32bf16_info>,
1128                             EVEX_V512;
1129  let Predicates = [HasAVX10_2] in {
1130    defm BF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1131                                  OpNode, sched.XMM, v8bf16x_info>,
1132                                  EVEX_V128;
1133    defm BF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"),
1134                                  OpNode, sched.YMM, v16bf16x_info>,
1135                                  EVEX_V256;
1136  }
1137}
1138
1139defm VRSQRT  : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>,
1140                                T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1141defm VRCP    : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>,
1142                                T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1143defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>,
1144                                T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1145
1146// VSCALEFBF16
1147multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr,
1148                                X86SchedWriteWidths sched> {
1149  let Predicates = [HasAVX10_2_512] in
1150    defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>,
1151                                EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>;
1152  let Predicates = [HasAVX10_2] in {
1153    defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>,
1154                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
1155    defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>,
1156                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS;
1157  }
1158}
1159
1160let Uses = []<Register>, mayRaiseFPException = 0 in
1161defm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>;
1162
1163// VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16
1164multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr,
1165            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
1166            SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> {
1167  let Predicates = [HasAVX10_2_512] in
1168    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1169                                           sched.ZMM, _.info512>, EVEX_V512;
1170  let Predicates = [HasAVX10_2] in {
1171    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1172                                           sched.XMM, _.info128>, EVEX_V128;
1173    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
1174                                           sched.YMM, _.info256>, EVEX_V256;
1175  }
1176}
1177
1178let Uses = []<Register>, mayRaiseFPException = 0 in {
1179defm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56,
1180                            X86VReduce, X86VReduce, SchedWriteFRnd>,
1181                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1182defm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08,
1183                            X86any_VRndScale, X86VRndScale, SchedWriteFRnd>,
1184                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1185defm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26,
1186                            X86VGetMant, X86VGetMant, SchedWriteFRnd>,
1187                            AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1188}
1189
1190// VFPCLASSBF16
1191multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec,
1192                                  X86SchedWriteWidths sched> {
1193  let Predicates = [HasAVX10_2_512] in
1194    defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM,
1195                                   avx512vl_bf16_info.info512, "z",
1196                                   []<Register>>, EVEX_V512;
1197  let Predicates = [HasAVX10_2] in {
1198    defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM,
1199                                      avx512vl_bf16_info.info128, "x",
1200                                      []<Register>>, EVEX_V128;
1201    defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM,
1202                                      avx512vl_bf16_info.info256, "y",
1203                                      []<Register>>, EVEX_V256;
1204  }
1205}
1206
1207defm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>,
1208                                      AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
1209
1210// VF[,N]M[ADD,SUB][132,213,231]BF16
1211multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr,
1212                                SDPatternOperator OpNode, SDNode MaskOpNode,
1213                                X86SchedWriteWidths sched> {
1214  let Predicates = [HasAVX10_2_512] in
1215    defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1216                               sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1217                               EVEX_CD8<16, CD8VF>;
1218  let Predicates = [HasAVX10_2] in {
1219    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1220                               sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1221                               EVEX_CD8<16, CD8VF>;
1222    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1223                               sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1224                               EVEX_CD8<16, CD8VF>;
1225  }
1226}
1227
1228let Uses = []<Register>, mayRaiseFPException = 0 in {
1229defm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma,
1230                                          fma, SchedWriteFMA>;
1231defm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub,
1232                                          X86Fmsub, SchedWriteFMA>;
1233defm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd,
1234                                           X86Fnmadd, SchedWriteFMA>;
1235defm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub,
1236                                           X86Fnmsub, SchedWriteFMA>;
1237}
1238
1239multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr,
1240                                 SDPatternOperator OpNode, SDNode MaskOpNode,
1241                                 X86SchedWriteWidths sched> {
1242  let Predicates = [HasAVX10_2_512] in
1243    defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1244                               sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1245                               EVEX_CD8<16, CD8VF>;
1246  let Predicates = [HasAVX10_2] in {
1247    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1248                               sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1249                               EVEX_CD8<16, CD8VF>;
1250    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1251                               sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1252                               EVEX_CD8<16, CD8VF>;
1253  }
1254}
1255
1256let Uses = []<Register>, mayRaiseFPException = 0 in {
1257defm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma,
1258                                          fma, SchedWriteFMA>;
1259defm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub,
1260                                          X86Fmsub, SchedWriteFMA>;
1261defm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd,
1262                                           X86Fnmadd, SchedWriteFMA>;
1263defm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub,
1264                                           X86Fnmsub, SchedWriteFMA>;
1265}
1266
1267multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr,
1268                                 SDPatternOperator OpNode, SDNode MaskOpNode,
1269                                 X86SchedWriteWidths sched> {
1270  let Predicates = [HasAVX10_2_512] in
1271    defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1272                                 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS,
1273                                 EVEX_CD8<16, CD8VF>;
1274  let Predicates = [HasAVX10_2] in {
1275    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1276                                    sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS,
1277                                    EVEX_CD8<16, CD8VF>;
1278    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
1279                                    sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS,
1280                                    EVEX_CD8<16, CD8VF>;
1281  }
1282}
1283
1284let Uses = []<Register>, mayRaiseFPException = 0 in {
1285defm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma,
1286                                          fma, SchedWriteFMA>;
1287defm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub,
1288                                             X86Fmsub, SchedWriteFMA>;
1289defm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd,
1290                                           X86Fnmadd, SchedWriteFMA>;
1291defm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub,
1292                                           X86Fnmsub, SchedWriteFMA>;
1293}
1294
1295//-------------------------------------------------
1296// AVX10  COMEF instructions
1297//-------------------------------------------------
1298multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
1299                        SDPatternOperator OpNode, string OpcodeStr,
1300                        X86MemOperand x86memop, PatFrag ld_frag,
1301                        Domain d, X86FoldableSchedWrite sched = WriteFComX>{
1302  let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
1303    def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
1304                    !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1305                    [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
1306                    EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1307    let mayLoad = 1 in {
1308      def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
1309                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1310                      [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
1311                      EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
1312    }
1313  }
1314}
1315
1316multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
1317                             string OpcodeStr,
1318                             Domain d,
1319                             X86FoldableSchedWrite sched = WriteFComX> {
1320  let ExeDomain = d, mayRaiseFPException = 1 in {
1321    def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1322                        !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1323                        [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
1324                        EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1325    let mayLoad = 1 in {
1326      def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1327                          !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1328                          [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
1329                          EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1330    }
1331    def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1332                        !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
1333                        []>,
1334                        EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC;
1335  }
1336}
1337
1338let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
1339  defm VUCOMXSDZ  :  avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
1340                                  "vucomxsd", f64mem, loadf64, SSEPackedDouble>,
1341                                  TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1342  defm VUCOMXSHZ  :  avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
1343                                  "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
1344                                  T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
1345  defm VUCOMXSSZ  :  avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
1346                                  "vucomxss", f32mem, loadf32, SSEPackedSingle>,
1347                                  TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1348  defm VCOMXSDZ   :  avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
1349                                      "vcomxsd", SSEPackedDouble>,
1350                                      TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1351  defm VCOMXSHZ   :  avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
1352                                      "vcomxsh", SSEPackedSingle>,
1353                                      T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
1354  defm VCOMXSSZ   :  avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
1355                                      "vcomxss", SSEPackedSingle>,
1356                                      TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1357  defm VUCOMXSDZ  :  avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
1358                                      "vucomxsd", SSEPackedDouble>,
1359                                      TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1360  defm VUCOMXSHZ  :  avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
1361                                      "vucomxsh", SSEPackedSingle>,
1362                                      T_MAP5, XS, EVEX_CD8<16, CD8VT1>;
1363  defm VUCOMXSSZ  :  avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
1364                                      "vucomxss", SSEPackedSingle>,
1365                                      TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1366}
1367
1368//-------------------------------------------------
1369// AVX10 MOVZXC (COPY) instructions
1370//-------------------------------------------------
1371let Predicates = [HasAVX10_2] in {
1372  def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
1373                                    (ins VR128X:$src),
1374                                    "vmovd\t{$src, $dst|$dst, $src}",
1375                                    [(set VR128X:$dst, (v4i32 (X86vzmovl
1376                                    (v4i32 VR128X:$src))))]>, EVEX,
1377                                    Sched<[WriteVecMoveFromGpr]>;
1378
1379let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
1380  def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
1381                                    (ins i32mem:$src),
1382                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1383                                     EVEX_CD8<32, CD8VT1>,
1384                                     Sched<[WriteVecLoad]>;
1385
1386let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
1387  def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
1388                                    (ins i32mem:$dst, VR128X:$src),
1389                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1390                                    EVEX_CD8<32, CD8VT1>,
1391                                    Sched<[WriteVecStore]>;
1392
1393let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
1394  def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
1395                                     (ins VR128X:$src),
1396                                     "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
1397                                     Sched<[WriteVecMoveFromGpr]>;
1398  def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
1399                  (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
1400
1401def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
1402                                  (ins VR128X:$src),
1403                                  "vmovw\t{$src, $dst|$dst, $src}",
1404                                  [(set VR128X:$dst, (v8i16 (X86vzmovl
1405                                  (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
1406                                  Sched<[WriteVecMoveFromGpr]>;
1407
1408let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
1409  def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
1410                                    (ins i16mem:$src),
1411                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
1412                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
1413                                    Sched<[WriteVecLoad]>;
1414
1415let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
1416  def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
1417                                    (ins i32mem:$dst, VR128X:$src),
1418                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
1419                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
1420                                    Sched<[WriteVecStore]>;
1421
1422let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
1423  def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
1424                                     (ins VR128X:$src),
1425                                     "vmovw\t{$src, $dst|$dst, $src}",
1426                                     []>, EVEX, T_MAP5,
1427                                     Sched<[WriteVecMoveFromGpr]>;
1428  def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
1429                  (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
1430}
1431
1432// MOVRS
1433multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> {
1434  let ExeDomain = _.ExeDomain in {
1435    defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
1436                            (ins _.MemOp:$src), OpStr, "$src", "$src",
1437                            (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size)
1438                                   addr:$src))>, EVEX;
1439  }
1440}
1441
1442multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> {
1443  let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in
1444    defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512;
1445  let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in {
1446    defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128;
1447    defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256;
1448  }
1449}
1450
1451defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>,
1452                          T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>;
1453defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>,
1454                          T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>;
1455defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
1456                          T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
1457defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
1458                          T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
1459
1460// SM4(EVEX)
1461multiclass avx10_sm4_base<string OpStr> {
1462  // SM4_Base is in X86InstrSSE.td.
1463  let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
1464    defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
1465    defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
1466  }
1467  let Predicates = [HasSM4, HasAVX10_2_512] in
1468    defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
1469}
1470
1471defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
1472defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
1473