1//===-- X86InstrAVX10.td - AVX10 Instruction Set -----------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the X86 AVX10 instruction set, defining the 10// instructions, and properties of the instructions which are needed for code 11// generation, machine code emission, and analysis. 12// 13//===----------------------------------------------------------------------===// 14 15// VNNI FP16 16let ExeDomain = SSEPackedSingle in 17defm VDPPHPS : avx512_dpf16ps_sizes<0x52, "vdpphps", X86dpfp16ps, avx512vl_f16_info, 18 [HasAVX10_2], [HasAVX10_2_512]>, 19 T8, PS, EVEX_CD8<32, CD8VF>; 20 21// VNNI INT8 22defm VPDPBSSD : VNNI_common<0x50, "vpdpbssd", X86vpdpbssd, SchedWriteVecIMul, 1, 23 [HasAVX10_2], [HasAVX10_2_512]>, XD; 24defm VPDPBSSDS : VNNI_common<0x51, "vpdpbssds", X86vpdpbssds, SchedWriteVecIMul, 1, 25 [HasAVX10_2], [HasAVX10_2_512]>, XD; 26defm VPDPBSUD : VNNI_common<0x50, "vpdpbsud", X86vpdpbsud, SchedWriteVecIMul, 0, 27 [HasAVX10_2], [HasAVX10_2_512]>, XS; 28defm VPDPBSUDS : VNNI_common<0x51, "vpdpbsuds", X86vpdpbsuds, SchedWriteVecIMul, 0, 29 [HasAVX10_2], [HasAVX10_2_512]>, XS; 30defm VPDPBUUD : VNNI_common<0x50, "vpdpbuud", X86vpdpbuud, SchedWriteVecIMul, 1, 31 [HasAVX10_2], [HasAVX10_2_512]>, PS; 32defm VPDPBUUDS : VNNI_common<0x51, "vpdpbuuds", X86vpdpbuuds, SchedWriteVecIMul, 1, 33 [HasAVX10_2], [HasAVX10_2_512]>, PS; 34 35// VNNI INT16 36defm VPDPWSUD : VNNI_common<0xd2, "vpdpwsud", X86vpdpwsud, SchedWriteVecIMul, 0, 37 [HasAVX10_2], [HasAVX10_2_512]>, XS; 38defm VPDPWSUDS : VNNI_common<0xd3, "vpdpwsuds", X86vpdpwsuds, SchedWriteVecIMul, 0, 39 [HasAVX10_2], [HasAVX10_2_512]>, XS; 40defm VPDPWUSD : VNNI_common<0xd2, "vpdpwusd", X86vpdpwusd, SchedWriteVecIMul, 0, 41 [HasAVX10_2], [HasAVX10_2_512]>, PD; 42defm VPDPWUSDS : VNNI_common<0xd3, "vpdpwusds", X86vpdpwusds, SchedWriteVecIMul, 0, 43 [HasAVX10_2], [HasAVX10_2_512]>, PD; 44defm VPDPWUUD : VNNI_common<0xd2, "vpdpwuud", X86vpdpwuud, SchedWriteVecIMul, 1, 45 [HasAVX10_2], [HasAVX10_2_512]>, PS; 46defm VPDPWUUDS : VNNI_common<0xd3, "vpdpwuuds", X86vpdpwuuds, SchedWriteVecIMul, 1, 47 [HasAVX10_2], [HasAVX10_2_512]>, PS; 48 49// VMPSADBW 50defm VMPSADBW : avx512_common_3Op_rm_imm8<0x42, X86Vmpsadbw, "vmpsadbw", SchedWritePSADBW, 51 avx512vl_i16_info, avx512vl_i8_info, 52 HasAVX10_2>, 53 XS, EVEX_CD8<32, CD8VF>; 54 55//------------------------------------------------- 56// AVX10 MINMAX instructions 57//------------------------------------------------- 58 59multiclass avx10_minmax_packed_base<string OpStr, X86VectorVTInfo VTI, SDNode OpNode> { 60 let ExeDomain = VTI.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { 61 defm rri : AVX512_maskable<0x52, MRMSrcReg, VTI, (outs VTI.RC:$dst), 62 (ins VTI.RC:$src1, VTI.RC:$src2, i32u8imm:$src3), OpStr, 63 "$src3, $src2, $src1", "$src1, $src2, $src3", 64 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, 65 (i32 timm:$src3)))>, 66 EVEX, VVVV, Sched<[WriteFMAX]>; 67 defm rmi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 68 (ins VTI.RC:$src1, VTI.MemOp:$src2, i32u8imm:$src3), OpStr, 69 "$src3, $src2, $src1", "$src1, $src2, $src3", 70 (VTI.VT (OpNode VTI.RC:$src1, (VTI.LdFrag addr:$src2), 71 (i32 timm:$src3)))>, 72 EVEX, VVVV, 73 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 74 defm rmbi : AVX512_maskable<0x52, MRMSrcMem, VTI, (outs VTI.RC:$dst), 75 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, i32u8imm:$src3), 76 OpStr, "$src3, ${src2}"#VTI.BroadcastStr#", $src1", 77 "$src1, ${src2}"#VTI.BroadcastStr#", $src3", 78 (VTI.VT (OpNode VTI.RC:$src1, (VTI.BroadcastLdFrag addr:$src2), 79 (i32 timm:$src3)))>, 80 EVEX, VVVV, EVEX_B, 81 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 82 } 83} 84 85multiclass avx10_minmax_packed_sae<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 86 let Uses = []<Register>, mayRaiseFPException = 0 in 87 defm Zrrib : AVX512_maskable<0x52, MRMSrcReg, VTI.info512, (outs VTI.info512.RC:$dst), 88 (ins VTI.info512.RC:$src1, VTI.info512.RC:$src2, i32u8imm:$src3), OpStr, 89 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", 90 (VTI.info512.VT (OpNode (VTI.info512.VT VTI.info512.RC:$src1), 91 (VTI.info512.VT VTI.info512.RC:$src2), 92 (i32 timm:$src3)))>, 93 EVEX, VVVV, EVEX_B, EVEX_V512, Sched<[WriteFMAX]>; 94} 95 96multiclass avx10_minmax_packed<string OpStr, AVX512VLVectorVTInfo VTI, SDNode OpNode> { 97 let Predicates = [HasAVX10_2_512] in 98 defm Z : avx10_minmax_packed_base<OpStr, VTI.info512, OpNode>, EVEX_V512; 99 let Predicates = [HasAVX10_2] in { 100 defm Z256 : avx10_minmax_packed_base<OpStr, VTI.info256, OpNode>, EVEX_V256; 101 defm Z128 : avx10_minmax_packed_base<OpStr, VTI.info128, OpNode>, EVEX_V128; 102 } 103} 104 105multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode, 106 SDNode OpNodeSAE> { 107 let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in { 108 let mayRaiseFPException = 1 in { 109 let isCodeGenOnly = 1 in { 110 def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst), 111 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), 112 !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 113 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>, 114 Sched<[WriteFMAX]>; 115 116 def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst), 117 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 118 !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 119 [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2), 120 (i32 timm:$src3)))]>, 121 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 122 } 123 defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 124 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 125 OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 126 (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), 127 (i32 timm:$src3))), 128 0, 0, 0, vselect_mask, "", "_Int">, 129 Sched<[WriteFMAX]>; 130 131 defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), 132 (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), 133 OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", 134 (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), 135 (i32 timm:$src3))), 136 0, 0, 0, vselect_mask, "", "_Int">, 137 Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; 138 } 139 let Uses = []<Register>, mayRaiseFPException = 0 in 140 defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), 141 (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), 142 OpStr, "$src3, {sae}, $src2, $src1", 143 "$src1, $src2, {sae}, $src3", 144 (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), 145 (i32 timm:$src3))), 146 0, 0, 0, vselect_mask, "", "_Int">, 147 Sched<[WriteFMAX]>, EVEX_B; 148 } 149} 150 151 152let mayRaiseFPException = 0 in 153defm VMINMAXBF16 : avx10_minmax_packed<"vminmaxbf16", avx512vl_bf16_info, X86vminmax>, 154 AVX512XDIi8Base, EVEX_CD8<16, CD8VF>, TA; 155 156defm VMINMAXPD : avx10_minmax_packed<"vminmaxpd", avx512vl_f64_info, X86vminmax>, 157 avx10_minmax_packed_sae<"vminmaxpd", avx512vl_f64_info, X86vminmaxSae>, 158 AVX512PDIi8Base, REX_W, TA, EVEX_CD8<64, CD8VF>; 159 160defm VMINMAXPH : avx10_minmax_packed<"vminmaxph", avx512vl_f16_info, X86vminmax>, 161 avx10_minmax_packed_sae<"vminmaxph", avx512vl_f16_info, X86vminmaxSae>, 162 AVX512PSIi8Base, TA, EVEX_CD8<16, CD8VF>; 163 164defm VMINMAXPS : avx10_minmax_packed<"vminmaxps", avx512vl_f32_info, X86vminmax>, 165 avx10_minmax_packed_sae<"vminmaxps", avx512vl_f32_info, X86vminmaxSae>, 166 AVX512PDIi8Base, TA, EVEX_CD8<32, CD8VF>; 167 168defm VMINMAXSD : avx10_minmax_scalar<"vminmaxsd", v2f64x_info, X86vminmaxs, X86vminmaxsSae>, 169 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<64, CD8VT1>, REX_W; 170defm VMINMAXSH : avx10_minmax_scalar<"vminmaxsh", v8f16x_info, X86vminmaxs, X86vminmaxsSae>, 171 AVX512PSIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<16, CD8VT1>, TA; 172defm VMINMAXSS : avx10_minmax_scalar<"vminmaxss", v4f32x_info, X86vminmaxs, X86vminmaxsSae>, 173 AVX512AIi8Base, VEX_LIG, EVEX, VVVV, EVEX_CD8<32, CD8VT1>; 174 175//------------------------------------------------- 176// AVX10 SATCVT instructions 177//------------------------------------------------- 178 179multiclass avx10_sat_cvt_rmb<bits<8> Opc, string OpStr, X86FoldableSchedWrite sched, 180 X86VectorVTInfo DestInfo, 181 X86VectorVTInfo SrcInfo, 182 SDNode MaskNode> { 183 defm rr: AVX512_maskable<Opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), 184 (ins SrcInfo.RC:$src), OpStr, "$src", "$src", 185 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src)))>, 186 Sched<[sched]>; 187 defm rm: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 188 (ins SrcInfo.MemOp:$src), OpStr, "$src", "$src", 189 (DestInfo.VT (MaskNode (SrcInfo.VT 190 (SrcInfo.LdFrag addr:$src))))>, 191 Sched<[sched.Folded, sched.ReadAfterFold]>; 192 defm rmb: AVX512_maskable<Opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), 193 (ins SrcInfo.ScalarMemOp:$src), OpStr, 194 "${src}"#SrcInfo.BroadcastStr, "${src}"#SrcInfo.BroadcastStr, 195 (DestInfo.VT (MaskNode (SrcInfo.VT 196 (SrcInfo.BroadcastLdFrag addr:$src))))>, EVEX_B, 197 Sched<[sched.Folded, sched.ReadAfterFold]>; 198} 199 200// Conversion with rounding control (RC) 201multiclass avx10_sat_cvt_rc<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 202 AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 203 SDNode MaskNode> { 204 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 205 defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 206 (outs DestInfo.info512.RC:$dst), 207 (ins SrcInfo.info512.RC:$src, AVX512RC:$rc), 208 OpStr, "$rc, $src", "$src, $rc", 209 (DestInfo.info512.VT 210 (MaskNode (SrcInfo.info512.VT SrcInfo.info512.RC:$src), 211 (i32 timm:$rc)))>, 212 Sched<[sched.ZMM]>, EVEX, EVEX_RC, EVEX_B; 213} 214 215// Conversion with SAE 216multiclass avx10_sat_cvt_sae<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 217 AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, 218 SDNode Node> { 219 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in 220 defm Zrrb : AVX512_maskable<Opc, MRMSrcReg, DestInfo.info512, 221 (outs DestInfo.info512.RC:$dst), 222 (ins SrcInfo.info512.RC:$src), 223 OpStr, "{sae}, $src", "$src, {sae}", 224 (DestInfo.info512.VT 225 (Node (SrcInfo.info512.VT SrcInfo.info512.RC:$src)))>, 226 Sched<[sched.ZMM]>, EVEX, EVEX_B; 227} 228 229multiclass avx10_sat_cvt_base<bits<8> Opc, string OpStr, X86SchedWriteWidths sched, 230 SDNode MaskNode, AVX512VLVectorVTInfo DestInfo, 231 AVX512VLVectorVTInfo SrcInfo> { 232 let Predicates = [HasAVX10_2_512] in 233 defm Z : avx10_sat_cvt_rmb<Opc, OpStr, sched.ZMM, 234 DestInfo.info512, SrcInfo.info512, 235 MaskNode>, 236 EVEX, EVEX_V512; 237 let Predicates = [HasAVX10_2] in { 238 defm Z256 239 : avx10_sat_cvt_rmb<Opc, OpStr, sched.YMM, 240 DestInfo.info256, SrcInfo.info256, 241 MaskNode>, 242 EVEX, EVEX_V256; 243 defm Z128 244 : avx10_sat_cvt_rmb<Opc, OpStr, sched.XMM, 245 DestInfo.info128, SrcInfo.info128, 246 MaskNode>, 247 EVEX, EVEX_V128; 248 } 249} 250 251defm VCVTBF162IBS : avx10_sat_cvt_base<0x69, "vcvtbf162ibs", 252 SchedWriteVecIMul, X86vcvtp2ibs, 253 avx512vl_i16_info, avx512vl_bf16_info>, 254 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 255defm VCVTBF162IUBS : avx10_sat_cvt_base<0x6b, "vcvtbf162iubs", 256 SchedWriteVecIMul, X86vcvtp2iubs, 257 avx512vl_i16_info, avx512vl_bf16_info>, 258 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 259 260defm VCVTPH2IBS : avx10_sat_cvt_base<0x69, "vcvtph2ibs", SchedWriteVecIMul, 261 X86vcvtp2ibs, avx512vl_i16_info, 262 avx512vl_f16_info>, 263 avx10_sat_cvt_rc<0x69, "vcvtph2ibs", SchedWriteVecIMul, 264 avx512vl_i16_info, avx512vl_f16_info, 265 X86vcvtp2ibsRnd>, 266 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 267defm VCVTPH2IUBS : avx10_sat_cvt_base<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 268 X86vcvtp2iubs, avx512vl_i16_info, 269 avx512vl_f16_info>, 270 avx10_sat_cvt_rc<0x6b, "vcvtph2iubs", SchedWriteVecIMul, 271 avx512vl_i16_info, avx512vl_f16_info, 272 X86vcvtp2iubsRnd>, 273 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 274 275defm VCVTPS2IBS : avx10_sat_cvt_base<0x69, "vcvtps2ibs", SchedWriteVecIMul, 276 X86vcvtp2ibs, avx512vl_i32_info, 277 avx512vl_f32_info>, 278 avx10_sat_cvt_rc<0x69, "vcvtps2ibs", SchedWriteVecIMul, 279 avx512vl_i32_info, avx512vl_f32_info, 280 X86vcvtp2ibsRnd>, 281 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 282defm VCVTPS2IUBS : avx10_sat_cvt_base<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 283 X86vcvtp2iubs, avx512vl_i32_info, 284 avx512vl_f32_info>, 285 avx10_sat_cvt_rc<0x6b, "vcvtps2iubs", SchedWriteVecIMul, 286 avx512vl_i32_info, avx512vl_f32_info, 287 X86vcvtp2iubsRnd>, 288 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 289 290defm VCVTTBF162IBS : avx10_sat_cvt_base<0x68, "vcvttbf162ibs", 291 SchedWriteVecIMul, X86vcvttp2ibs, 292 avx512vl_i16_info, avx512vl_bf16_info>, 293 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 294defm VCVTTBF162IUBS : avx10_sat_cvt_base<0x6a, "vcvttbf162iubs", 295 SchedWriteVecIMul, X86vcvttp2iubs, 296 avx512vl_i16_info, avx512vl_bf16_info>, 297 AVX512XDIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 298 299defm VCVTTPH2IBS : avx10_sat_cvt_base<0x68, "vcvttph2ibs", SchedWriteVecIMul, 300 X86vcvttp2ibs, avx512vl_i16_info, 301 avx512vl_f16_info>, 302 avx10_sat_cvt_sae<0x68, "vcvttph2ibs", SchedWriteVecIMul, 303 avx512vl_i16_info, avx512vl_f16_info, 304 X86vcvttp2ibsSAE>, 305 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 306defm VCVTTPH2IUBS : avx10_sat_cvt_base<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 307 X86vcvttp2iubs, avx512vl_i16_info, 308 avx512vl_f16_info>, 309 avx10_sat_cvt_sae<0x6a, "vcvttph2iubs", SchedWriteVecIMul, 310 avx512vl_i16_info, avx512vl_f16_info, 311 X86vcvttp2iubsSAE>, 312 AVX512PSIi8Base, T_MAP5, EVEX_CD8<16, CD8VF>; 313 314defm VCVTTPS2IBS : avx10_sat_cvt_base<0x68, "vcvttps2ibs", SchedWriteVecIMul, 315 X86vcvttp2ibs, avx512vl_i32_info, 316 avx512vl_f32_info>, 317 avx10_sat_cvt_sae<0x68, "vcvttps2ibs", SchedWriteVecIMul, 318 avx512vl_i32_info, avx512vl_f32_info, 319 X86vcvttp2ibsSAE>, 320 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 321defm VCVTTPS2IUBS : avx10_sat_cvt_base<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 322 X86vcvttp2iubs, avx512vl_i32_info, 323 avx512vl_f32_info>, 324 avx10_sat_cvt_sae<0x6a, "vcvttps2iubs", SchedWriteVecIMul, 325 avx512vl_i32_info, avx512vl_f32_info, 326 X86vcvttp2iubsSAE>, 327 AVX512PDIi8Base, T_MAP5, EVEX_CD8<32, CD8VF>; 328 329//------------------------------------------------- 330// AVX10 SATCVT-DS instructions 331//------------------------------------------------- 332 333// Convert Double to Signed/Unsigned Doubleword with truncation. 334multiclass avx10_cvttpd2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 335 SDNode MaskOpNode, SDNode OpNodeSAE, 336 X86SchedWriteWidths sched> { 337 let Predicates = [HasAVX10_2_512] in { 338 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, 339 MaskOpNode, sched.ZMM>, 340 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, 341 OpNodeSAE, sched.ZMM>, EVEX_V512; 342 } 343 let Predicates = [HasAVX10_2] in { 344 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, 345 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", 346 f128mem, VK2WM>, EVEX_V128; 347 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, 348 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256; 349 } 350 351 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 352 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNodeSAE, 353 sched.YMM>, EVEX_V256; 354 } 355 356 357 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}", 358 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, 359 VR128X:$src), 0, "att">; 360 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 361 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst, 362 VK2WM:$mask, VR128X:$src), 0, "att">; 363 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 364 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst, 365 VK2WM:$mask, VR128X:$src), 0, "att">; 366 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}", 367 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, 368 f64mem:$src), 0, "att">; 369 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|" 370 "$dst {${mask}}, ${src}{1to2}}", 371 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst, 372 VK2WM:$mask, f64mem:$src), 0, "att">; 373 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|" 374 "$dst {${mask}} {z}, ${src}{1to2}}", 375 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst, 376 VK2WM:$mask, f64mem:$src), 0, "att">; 377 378 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}", 379 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, 380 VR256X:$src), 0, "att">; 381 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst|$dst, $src {sae}}", 382 (!cast<Instruction>(NAME # "Z256rrb") VR128X:$dst, 383 VR256X:$src), 0, "att">; 384 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", 385 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst, 386 VK4WM:$mask, VR256X:$src), 0, "att">; 387 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}}|$dst {${mask}}, $src {sae}}", 388 (!cast<Instruction>(NAME # "Z256rrbk") VR128X:$dst, 389 VK4WM:$mask, VR256X:$src), 0, "att">; 390 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", 391 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst, 392 VK4WM:$mask, VR256X:$src), 0, "att">; 393 def : InstAlias<OpcodeStr#"y\t{{sae} $src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src {sae}}", 394 (!cast<Instruction>(NAME # "Z256rrbkz") VR128X:$dst, 395 VK4WM:$mask, VR256X:$src), 0, "att">; 396 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}", 397 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, 398 f64mem:$src), 0, "att">; 399 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|" 400 "$dst {${mask}}, ${src}{1to4}}", 401 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst, 402 VK4WM:$mask, f64mem:$src), 0, "att">; 403 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|" 404 "$dst {${mask}} {z}, ${src}{1to4}}", 405 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst, 406 VK4WM:$mask, f64mem:$src), 0, "att">; 407} 408 409// Convert Double to Signed/Unsigned Quardword with truncation saturationn enabled 410multiclass avx10_cvttpd2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 411 SDNode MaskOpNode, SDNode OpNodeRnd, 412 X86SchedWriteWidths sched> { 413 let Predicates = [HasAVX10_2_512] in { 414 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, 415 MaskOpNode, sched.ZMM>, 416 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, 417 OpNodeRnd, sched.ZMM>, EVEX_V512; 418 } 419 let Predicates = [HasAVX10_2] in { 420 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, 421 MaskOpNode, sched.XMM>, EVEX_V128; 422 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, 423 MaskOpNode, sched.YMM>, EVEX_V256; 424 } 425 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 426 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f64x_info, 427 OpNodeRnd, sched.YMM>, EVEX_V256; 428 } 429} 430 431// Convert Float to Signed/Unsigned Quardword with truncation 432multiclass avx10_cvttps2qqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 433 SDNode MaskOpNode, SDNode OpNodeRnd, 434 X86SchedWriteWidths sched> { 435 let Predicates = [HasAVX10_2_512] in { 436 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, 437 MaskOpNode, sched.ZMM>, 438 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, 439 OpNodeRnd, sched.ZMM>, EVEX_V512; 440 } 441 let Predicates = [HasAVX10_2] in { 442 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, 443 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM, 444 (v2i64 (OpNode (bc_v4f32 (v2f64 445 (scalar_to_vector (loadf64 addr:$src)))))), 446 (v2i64 (MaskOpNode (bc_v4f32 (v2f64 447 (scalar_to_vector (loadf64 addr:$src))))))>, 448 EVEX_V128; 449 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, 450 MaskOpNode, sched.YMM>, EVEX_V256; 451 } 452 453 let Predicates = [HasAVX10_2], hasEVEX_U=1 in { 454 defm Z256 : avx512_vcvt_fp_sae<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNodeRnd, 455 sched.YMM>, EVEX_V256; 456 } 457} 458 459// Convert Float to Signed/Unsigned Doubleword with truncation 460multiclass avx10_cvttps2dqs<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 461 SDNode MaskOpNode, 462 SDNode OpNodeSAE, X86SchedWriteWidths sched> { 463 let Predicates = [HasAVX10_2_512] in { 464 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, 465 MaskOpNode, sched.ZMM>, 466 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, 467 OpNodeSAE, sched.ZMM>, EVEX_V512; 468 } 469 470 let Predicates = [HasAVX10_2] in { 471 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, 472 MaskOpNode, sched.XMM>, EVEX_V128; 473 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, 474 MaskOpNode, sched.YMM>, EVEX_V256; 475 } 476} 477 478defm VCVTTPD2DQS : avx10_cvttpd2dqs<0x6D, "vcvttpd2dqs", X86cvttp2sis, 479 X86cvttp2sis, X86cvttp2sisSAE, 480 SchedWriteCvtPD2DQ>, 481 PD, REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 482defm VCVTTPD2UDQS : avx10_cvttpd2dqs<0x6C, "vcvttpd2udqs", X86cvttp2uis, 483 X86cvttp2uis, X86cvttp2uisSAE, 484 SchedWriteCvtPD2DQ>, 485 REX_W, T_MAP5,PS, EVEX_CD8<64, CD8VF>; 486defm VCVTTPS2DQS : avx10_cvttps2dqs<0x6D, "vcvttps2dqs", X86cvttp2sis, 487 X86cvttp2sis, X86cvttp2sisSAE, 488 SchedWriteCvtPS2DQ>, T_MAP5,PS, 489 EVEX_CD8<32, CD8VF>; 490defm VCVTTPS2UDQS : avx10_cvttps2dqs<0x6C, "vcvttps2udqs", X86cvttp2uis, 491 X86cvttp2uis, X86cvttp2uisSAE, 492 SchedWriteCvtPS2DQ>, T_MAP5,PS, 493 EVEX_CD8<32, CD8VF>; 494defm VCVTTPD2QQS : avx10_cvttpd2qqs<0x6D, "vcvttpd2qqs", X86cvttp2sis, 495 X86cvttp2sis, X86cvttp2sisSAE, 496 SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 497 EVEX_CD8<64, CD8VF>; 498defm VCVTTPS2QQS : avx10_cvttps2qqs<0x6D, "vcvttps2qqs", X86cvttp2sis, 499 X86cvttp2sis, X86cvttp2sisSAE, 500 SchedWriteCvtPS2DQ>, T_MAP5,PD, 501 EVEX_CD8<32, CD8VH>; 502defm VCVTTPD2UQQS : avx10_cvttpd2qqs<0x6C, "vcvttpd2uqqs", X86cvttp2uis, 503 X86cvttp2uis, X86cvttp2uisSAE, 504 SchedWriteCvtPD2DQ>, REX_W, T_MAP5,PD, 505 EVEX_CD8<64, CD8VF>; 506defm VCVTTPS2UQQS : avx10_cvttps2qqs<0x6C, "vcvttps2uqqs", X86cvttp2uis, 507 X86cvttp2uis, X86cvttp2uisSAE, 508 SchedWriteCvtPS2DQ>, T_MAP5,PD, 509 EVEX_CD8<32, CD8VH>; 510 511let Predicates = [HasAVX10_2] in { 512// Special patterns to allow use of X86mcvttp2si for masking. Instruction 513// patterns have been disabled with null_frag. 514// Patterns VCVTTPD2DQSZ128 515 516// VCVTTPD2DQS 517def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))), 518 (VCVTTPD2DQSZ128rr VR128X:$src)>; 519def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)), 520 (VCVTTPD2DQSZ256rr VR256X:$src)>; 521def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)), 522 (VCVTTPD2DQSZrr VR512:$src)>; 523 524// VCVTTPD2QQS 525def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)), 526 (VCVTTPD2QQSZ128rr VR128X:$src)>; 527def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)), 528 (VCVTTPD2QQSZ256rr VR256X:$src)>; 529def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)), 530 (VCVTTPD2QQSZrr VR512:$src)>; 531 532// VCVTTPD2UDQS 533def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))), 534 (VCVTTPD2UDQSZ128rr VR128X:$src)>; 535def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)), 536 (VCVTTPD2UDQSZ256rr VR256X:$src)>; 537def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)), 538 (VCVTTPD2UDQSZrr VR512:$src)>; 539 540// VCVTTPD2UQQS 541def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)), 542 (VCVTTPD2UQQSZ128rr VR128X:$src)>; 543def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)), 544 (VCVTTPD2UQQSZ256rr VR256X:$src)>; 545def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)), 546 (VCVTTPD2UQQSZrr VR512:$src)>; 547 548// VCVTTPS2DQS 549def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)), 550 (VCVTTPS2DQSZ128rr VR128X:$src)>; 551def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)), 552 (VCVTTPS2DQSZ256rr VR256X:$src)>; 553def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)), 554 (VCVTTPS2DQSZrr VR512:$src)>; 555 556// VCVTTPS2QQS 557def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))), 558 (VCVTTPS2QQSZ128rr VR128X:$src)>; 559def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)), 560 (VCVTTPS2QQSZ256rr VR128X:$src)>; 561def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)), 562 (VCVTTPS2QQSZrr VR256X:$src)>; 563 564// VCVTTPS2UDQS 565def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)), 566 (VCVTTPS2UDQSZ128rr VR128X:$src)>; 567def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)), 568 (VCVTTPS2UDQSZ256rr VR256X:$src)>; 569def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)), 570 (VCVTTPS2UDQSZrr VR512:$src)>; 571 572// VCVTTPS2UQQS 573def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))), 574 (VCVTTPS2UQQSZ128rr VR128X:$src)>; 575def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)), 576 (VCVTTPS2UQQSZ256rr VR128X:$src)>; 577def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)), 578 (VCVTTPS2UQQSZrr VR256X:$src)>; 579 580def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))), 581 (VCVTTPD2DQSZ128rr VR128X:$src)>; 582def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))), 583 (VCVTTPD2DQSZ128rm addr:$src)>; 584def : Pat<(v4i32 (X86cvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)))), 585 (VCVTTPD2DQSZ128rmb addr:$src)>; 586def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 587 VK2WM:$mask), 588 (VCVTTPD2DQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 589def : Pat<(X86mcvttp2sis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 590 VK2WM:$mask), 591 (VCVTTPD2DQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 592def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 593 VK2WM:$mask), 594 (VCVTTPD2DQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 595def : Pat<(X86mcvttp2sis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 596 VK2WM:$mask), 597 (VCVTTPD2DQSZ128rmkz VK2WM:$mask, addr:$src)>; 598def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 599 (v4i32 VR128X:$src0), VK2WM:$mask), 600 (VCVTTPD2DQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 601def : Pat<(X86mcvttp2sis (v2f64 (X86VBroadcastld64 addr:$src)), 602 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 603 (VCVTTPD2DQSZ128rmbkz VK2WM:$mask, addr:$src)>; 604 605// Patterns VCVTTPD2UDQSZ128 606def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 607 (VCVTTPD2UDQSZ128rmb addr:$src)>; 608def : Pat<(v4i32 (X86cvttp2uis (v2f64 VR128X:$src))), 609 (VCVTTPD2UDQSZ128rr VR128X:$src)>; 610def : Pat<(v4i32 (X86cvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)))), 611 (VCVTTPD2UDQSZ128rmb addr:$src)>; 612def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), (v4i32 VR128X:$src0), 613 VK2WM:$mask), 614 (VCVTTPD2UDQSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>; 615def : Pat<(X86mcvttp2uis (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV, 616 VK2WM:$mask), 617 (VCVTTPD2UDQSZ128rrkz VK2WM:$mask, VR128X:$src)>; 618def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), (v4i32 VR128X:$src0), 619 VK2WM:$mask), 620 (VCVTTPD2UDQSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; 621def : Pat<(X86mcvttp2uis (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV, 622 VK2WM:$mask), 623 (VCVTTPD2UDQSZ128rmkz VK2WM:$mask, addr:$src)>; 624def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 625 (v4i32 VR128X:$src0), VK2WM:$mask), 626 (VCVTTPD2UDQSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>; 627def : Pat<(X86mcvttp2uis (v2f64 (X86VBroadcastld64 addr:$src)), 628 v4i32x_info.ImmAllZerosV, VK2WM:$mask), 629 (VCVTTPD2UDQSZ128rmbkz VK2WM:$mask, addr:$src)>; 630} 631 632// Convert scalar float/double to signed/unsigned int 32/64 with truncation and saturation. 633multiclass avx10_cvt_s_ds<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 634 X86VectorVTInfo _DstRC, SDPatternOperator OpNode, 635 SDNode OpNodeInt, SDNode OpNodeSAE, 636 X86FoldableSchedWrite sched> { 637 let Predicates = [HasAVX10_2], ExeDomain = _SrcRC.ExeDomain in { 638 let isCodeGenOnly = 1 in { 639 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), 640 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 641 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src, _DstRC.EltVT))]>, 642 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 643 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), 644 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 645 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src), _DstRC.EltVT))]>, 646 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 647 } 648 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 649 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 650 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>, 651 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; 652 let Uses = [MXCSR] in 653 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), 654 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), 655 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>, 656 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>; 657 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 658 (ins _SrcRC.IntScalarMemOp:$src), 659 !strconcat(asm,"\t{$src, $dst|$dst, $src}"), 660 [(set _DstRC.RC:$dst, 661 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>, 662 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, 663 SIMD_EXC; 664 } 665} 666 667defm VCVTTSS2SIS: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i32x_info, 668 fp_to_sint_sat, X86cvttss2Int, 669 X86cvttss2IntSAE, WriteCvtSS2I>, 670 T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 671defm VCVTTSS2SI64S: avx10_cvt_s_ds<0x6D, "vcvttss2sis", f32x_info, i64x_info, 672 fp_to_sint_sat, X86cvttss2Int, 673 X86cvttss2IntSAE, WriteCvtSS2I>, 674 REX_W, T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 675defm VCVTTSD2SIS: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i32x_info, 676 fp_to_sint_sat, X86cvttss2Int, 677 X86cvttss2IntSAE, WriteCvtSD2I>, 678 T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 679defm VCVTTSD2SI64S: avx10_cvt_s_ds<0x6D, "vcvttsd2sis", f64x_info, i64x_info, 680 fp_to_sint_sat, X86cvttss2Int, 681 X86cvttss2IntSAE, WriteCvtSD2I>, 682 REX_W, T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 683defm VCVTTSS2USIS: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i32x_info, 684 fp_to_uint_sat, X86cvttss2UInt, 685 X86cvttss2UIntSAE, WriteCvtSS2I>, 686 T_MAP5,XS, EVEX_CD8<32, CD8VT1>; 687defm VCVTTSS2USI64S: avx10_cvt_s_ds<0x6C, "vcvttss2usis", f32x_info, i64x_info, 688 fp_to_uint_sat, X86cvttss2UInt, 689 X86cvttss2UIntSAE, WriteCvtSS2I>, 690 T_MAP5,XS,REX_W, EVEX_CD8<32, CD8VT1>; 691defm VCVTTSD2USIS: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i32x_info, 692 fp_to_uint_sat, X86cvttss2UInt, 693 X86cvttss2UIntSAE, WriteCvtSD2I>, 694 T_MAP5,XD, EVEX_CD8<64, CD8VT1>; 695defm VCVTTSD2USI64S: avx10_cvt_s_ds<0x6C, "vcvttsd2usis", f64x_info, i64x_info, 696 fp_to_uint_sat, X86cvttss2UInt, 697 X86cvttss2UIntSAE, WriteCvtSD2I>, 698 T_MAP5,XD, REX_W, EVEX_CD8<64, CD8VT1>; 699 700//------------------------------------------------- 701// AVX10 CONVERT instructions 702//------------------------------------------------- 703 704multiclass avx10_cvt2ps2ph_rc<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, 705 X86VectorVTInfo _Src, X86VectorVTInfo _, 706 SDNode OpNodeRnd> { 707 let Uses = [MXCSR] in 708 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), 709 (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, 710 "$rc, $src2, $src1", "$src1, $src2, $rc", 711 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 712 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>, 713 EVEX, VVVV, EVEX_B, EVEX_RC, PD, Sched<[sched]>; 714} 715 716//TODO: Merge into avx512_binop_all, difference is rounding control added here. 717multiclass avx10_cvt2ps2ph<bits<8> opc, string OpcodeStr, 718 X86SchedWriteWidths sched, 719 AVX512VLVectorVTInfo _SrcVTInfo, 720 AVX512VLVectorVTInfo _DstVTInfo, 721 SDNode OpNode, SDNode OpNodeRnd> { 722 let Predicates = [HasAVX10_2_512], Uses = [MXCSR] in { 723 defm Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode, 724 _SrcVTInfo.info512, _DstVTInfo.info512, 725 _SrcVTInfo.info512>, 726 avx10_cvt2ps2ph_rc<opc, OpcodeStr, sched.ZMM, 727 _SrcVTInfo.info512, _DstVTInfo.info512, 728 OpNodeRnd>, 729 EVEX_V512, EVEX_CD8<32, CD8VF>; 730 } 731 let Predicates = [HasAVX10_2] in { 732 defm Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode, 733 _SrcVTInfo.info256, _DstVTInfo.info256, 734 _SrcVTInfo.info256>, 735 EVEX_V256, EVEX_CD8<32, CD8VF>; 736 defm Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode, 737 _SrcVTInfo.info128, _DstVTInfo.info128, 738 _SrcVTInfo.info128>, 739 EVEX_V128, EVEX_CD8<32, CD8VF>; 740 } 741} 742 743defm VCVT2PS2PHX : avx10_cvt2ps2ph<0x67, "vcvt2ps2phx", 744 SchedWriteCvtPD2PS, 745 avx512vl_f32_info, avx512vl_f16_info, 746 X86vfpround2, X86vfpround2Rnd>, T8; 747 748defm VCVT2PH2BF8 : avx512_binop_all<0x74, "vcvt2ph2bf8", SchedWriteCvtPD2PS, 749 avx512vl_f16_info, avx512vl_i8_info, 750 X86vcvt2ph2bf8, [HasAVX10_2_512], [HasAVX10_2]>, 751 EVEX_CD8<16, CD8VF>, T8, XD; 752defm VCVT2PH2BF8S : avx512_binop_all<0x74, "vcvt2ph2bf8s", SchedWriteCvtPD2PS, 753 avx512vl_f16_info, avx512vl_i8_info, 754 X86vcvt2ph2bf8s, [HasAVX10_2_512], [HasAVX10_2]>, 755 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 756defm VCVT2PH2HF8 : avx512_binop_all<0x18, "vcvt2ph2hf8", SchedWriteCvtPD2PS, 757 avx512vl_f16_info, avx512vl_i8_info, 758 X86vcvt2ph2hf8, [HasAVX10_2_512], [HasAVX10_2]>, 759 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 760defm VCVT2PH2HF8S : avx512_binop_all<0x1b, "vcvt2ph2hf8s", SchedWriteCvtPD2PS, 761 avx512vl_f16_info, avx512vl_i8_info, 762 X86vcvt2ph2hf8s, [HasAVX10_2_512], [HasAVX10_2]>, 763 EVEX_CD8<16, CD8VF>, T_MAP5, XD; 764 765//TODO: Merge into avx512_vcvt_fp, diffrence is one more source register here. 766multiclass avx10_convert_3op_packed<bits<8> OpCode, string OpcodeStr, 767 X86VectorVTInfo vt_dst, X86VectorVTInfo vt_src1, 768 X86VectorVTInfo vt_src2, SDPatternOperator OpNode, 769 SDPatternOperator MaskOpNode, X86FoldableSchedWrite sched, 770 string Broadcast = vt_src2.BroadcastStr, 771 X86MemOperand MemOp = vt_src2.MemOp, 772 RegisterClass MaskRC = vt_src2.KRCWM, 773 dag LdDAG = (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 774 (vt_src2.VT (vt_src2.LdFrag addr:$src2)))), 775 dag MaskLdDAG = (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 776 (vt_src2.VT (vt_src2.LdFrag addr:$src2))))> { 777 defm rr : AVX512_maskable_cvt<OpCode, MRMSrcReg, vt_dst, (outs vt_dst.RC:$dst), 778 (ins vt_src1.RC:$src1, vt_src2.RC:$src2), 779 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 780 (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.RC:$src2), 781 OpcodeStr, "$src2, $src1", "$src1, $src2", 782 (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), 783 (vt_src2.VT vt_src2.RC:$src2))), 784 (vselect_mask MaskRC:$mask, 785 (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 786 (vt_src2.VT vt_src2.RC:$src2))), 787 vt_dst.RC:$src0), 788 (vselect_mask MaskRC:$mask, 789 (vt_dst.VT (MaskOpNode (vt_src1.VT vt_src1.RC:$src1), 790 (vt_src2.VT vt_src2.RC:$src2))), 791 vt_dst.ImmAllZerosV)>, 792 EVEX, VVVV, Sched<[sched]>; 793 let mayLoad = 1 in 794 defm rm : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 795 (ins vt_src1.RC:$src1, MemOp:$src2), 796 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 797 (ins MaskRC:$mask, vt_src1.RC:$src1, MemOp:$src2), 798 OpcodeStr, "$src2, $src1", "$src1, $src2", 799 LdDAG, 800 (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.RC:$src0), 801 (vselect_mask MaskRC:$mask, MaskLdDAG, vt_dst.ImmAllZerosV)>, 802 EVEX, VVVV, Sched<[sched]>; 803 804 let mayLoad = 1 in 805 defm rmb : AVX512_maskable_cvt<OpCode, MRMSrcMem, vt_dst, (outs vt_dst.RC:$dst), 806 (ins vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 807 (ins vt_dst.RC:$src0, MaskRC:$mask, vt_src1.RC:$src1, 808 vt_src2.ScalarMemOp:$src2), 809 (ins MaskRC:$mask, vt_src1.RC:$src1, vt_src2.ScalarMemOp:$src2), 810 OpcodeStr, 811 "${src2}"#Broadcast#", $src1", "$src1, ${src2}"#Broadcast, 812 (vt_dst.VT (OpNode (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 813 (vt_src2.BroadcastLdFrag addr:$src2)))), 814 (vselect_mask MaskRC:$mask, 815 (vt_dst.VT 816 (MaskOpNode 817 (vt_src1.VT vt_src1.RC:$src1), (vt_src2.VT 818 (vt_src2.BroadcastLdFrag addr:$src2)))), 819 vt_dst.RC:$src0), 820 (vselect_mask MaskRC:$mask, 821 (vt_dst.VT 822 (MaskOpNode 823 (vt_src1.VT vt_src1.RC:$src1), 824 (vt_src2.VT 825 (vt_src2.BroadcastLdFrag addr:$src2)))), 826 vt_dst.ImmAllZerosV)>, 827 EVEX, VVVV, EVEX_B, Sched<[sched]>; 828} 829 830//TODO: Merge into avx512_cvt_trunc 831multiclass avx10_convert_3op<bits<8> OpCode, string OpcodeStr, 832 AVX512VLVectorVTInfo vt_dst, AVX512VLVectorVTInfo vt_src, 833 X86SchedWriteWidths sched, 834 SDPatternOperator OpNode, 835 SDPatternOperator MaskOpNode, 836 PatFrag bcast128 = vt_src.info128.BroadcastLdFrag, 837 PatFrag loadVT128 = vt_src.info128.LdFrag, 838 RegisterClass maskRC128 = vt_src.info128.KRCWM> { 839 let Predicates = [HasAVX10_2_512] in 840 defm Z : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info256, 841 vt_dst.info512, vt_src.info512, OpNode, OpNode, sched.ZMM>, 842 EVEX_V512, EVEX_CD8<16, CD8VF>; 843 let Predicates = [HasAVX10_2] in { 844 defm Z256 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 845 vt_dst.info256, vt_src.info256, OpNode, OpNode, sched.YMM>, 846 EVEX_V256, EVEX_CD8<16, CD8VF>; 847 defm Z128 : avx10_convert_3op_packed<OpCode, OpcodeStr, vt_dst.info128, 848 vt_dst.info128, vt_src.info128, 849 null_frag, null_frag, sched.XMM>, 850 EVEX_V128, EVEX_CD8<16, CD8VF>; 851 // Special patterns to allow use of MaskOpNode for masking 128 version. Instruction 852 // patterns have been disabled with null_frag. 853 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 854 (vt_src.info128.VT VR128X:$src2))), 855 (!cast<Instruction>(NAME # "Z128rr") VR128X:$src1, VR128X:$src2)>; 856 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 857 (vt_src.info128.VT VR128X:$src2), 858 (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 859 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, 860 VR128X:$src1, VR128X:$src2)>; 861 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 862 (vt_src.info128.VT VR128X:$src2), 863 vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 864 (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, 865 VR128X:$src1, VR128X:$src2)>; 866 867 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 868 (loadVT128 addr:$src2))), 869 (!cast<Instruction>(NAME # "Z128rm") VR128X:$src1, addr:$src2)>; 870 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 871 (loadVT128 addr:$src2), 872 (vt_dst.info128.VT VR128X:$src0), 873 maskRC128:$mask), 874 (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, 875 VR128X:$src1, addr:$src2)>; 876 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 877 (loadVT128 addr:$src2), 878 vt_dst.info128.ImmAllZerosV, 879 maskRC128:$mask), 880 (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, 881 VR128X:$src1, addr:$src2)>; 882 883 def : Pat<(vt_dst.info128.VT (OpNode (vt_dst.info128.VT VR128X:$src1), 884 (vt_src.info128.VT (bcast128 addr:$src2)))), 885 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$src1, addr:$src2)>; 886 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 887 (vt_src.info128.VT (bcast128 addr:$src2)), 888 (vt_dst.info128.VT VR128X:$src0), maskRC128:$mask), 889 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, 890 VR128X:$src1, addr:$src2)>; 891 def : Pat<(MaskOpNode (vt_dst.info128.VT VR128X:$src1), 892 (vt_src.info128.VT (bcast128 addr:$src2)), 893 vt_dst.info128.ImmAllZerosV, maskRC128:$mask), 894 (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, 895 VR128X:$src1, addr:$src2)>; 896 } 897} 898 899defm VCVTBIASPH2BF8 : avx10_convert_3op<0x74, "vcvtbiasph2bf8", 900 avx512vl_i8_info, avx512vl_f16_info, 901 SchedWriteCvtPD2PS, 902 X86vcvtbiasph2bf8, X86vmcvtbiasph2bf8>, 903 T8, PS; 904defm VCVTBIASPH2BF8S : avx10_convert_3op<0x74, "vcvtbiasph2bf8s", 905 avx512vl_i8_info, avx512vl_f16_info, 906 SchedWriteCvtPD2PS, 907 X86vcvtbiasph2bf8s, X86vmcvtbiasph2bf8s>, 908 T_MAP5, PS; 909defm VCVTBIASPH2HF8 : avx10_convert_3op<0x18, "vcvtbiasph2hf8", 910 avx512vl_i8_info, avx512vl_f16_info, 911 SchedWriteCvtPD2PS, 912 X86vcvtbiasph2hf8, X86vmcvtbiasph2hf8>, 913 T_MAP5, PS; 914defm VCVTBIASPH2HF8S : avx10_convert_3op<0x1b, "vcvtbiasph2hf8s", 915 avx512vl_i8_info, avx512vl_f16_info, 916 SchedWriteCvtPD2PS, 917 X86vcvtbiasph2hf8s, X86vmcvtbiasph2hf8s>, 918 T_MAP5, PS; 919 920defm VCVTPH2BF8 : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8", avx512vl_i8_info, 921 avx512vl_f16_info, SchedWriteCvtPD2PS, 922 X86vcvtph2bf8, X86vmcvtph2bf8, 923 [HasAVX10_2], [HasAVX10_2_512]>, 924 T8, XS, EVEX_CD8<16, CD8VF>; 925 926defm VCVTPH2BF8S : avx512_cvt_trunc_ne<0x74, "vcvtph2bf8s", avx512vl_i8_info, 927 avx512vl_f16_info, SchedWriteCvtPD2PS, 928 X86vcvtph2bf8s, X86vmcvtph2bf8s, 929 [HasAVX10_2], [HasAVX10_2_512]>, 930 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 931 932defm VCVTPH2HF8 : avx512_cvt_trunc_ne<0x18, "vcvtph2hf8", avx512vl_i8_info, 933 avx512vl_f16_info, SchedWriteCvtPD2PS, 934 X86vcvtph2hf8, X86vmcvtph2hf8, 935 [HasAVX10_2], [HasAVX10_2_512]>, 936 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 937 938defm VCVTPH2HF8S : avx512_cvt_trunc_ne<0x1b, "vcvtph2hf8s", avx512vl_i8_info, 939 avx512vl_f16_info, SchedWriteCvtPD2PS, 940 X86vcvtph2hf8s, X86vmcvtph2hf8s, 941 [HasAVX10_2], [HasAVX10_2_512]>, 942 T_MAP5, XS, EVEX_CD8<16, CD8VF>; 943 944multiclass avx10_convert_2op_nomb_packed<bits<8> opc, string OpcodeStr, 945 X86VectorVTInfo _dest, X86VectorVTInfo _src, 946 SDNode OpNode, X86MemOperand x86memop, 947 X86FoldableSchedWrite sched, 948 dag ld_dag = (load addr:$src)> { 949 let ExeDomain = _dest.ExeDomain in { 950 defm rr : AVX512_maskable_split<opc, MRMSrcReg, _dest ,(outs _dest.RC:$dst), 951 (ins _src.RC:$src), OpcodeStr, "$src", "$src", 952 (OpNode (_src.VT _src.RC:$src)), 953 (OpNode (_src.VT _src.RC:$src))>, 954 Sched<[sched]>; 955 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _dest, (outs _dest.RC:$dst), 956 (ins x86memop:$src), OpcodeStr, "$src", "$src", 957 (OpNode (_src.VT ld_dag)), 958 (OpNode (_src.VT ld_dag))>, 959 Sched<[sched.Folded]>; 960 } 961} 962 963multiclass avx10_convert_2op_nomb<string OpcodeStr, AVX512VLVectorVTInfo _dest, 964 AVX512VLVectorVTInfo _src, bits<8> opc, SDNode OpNode> { 965 let Predicates = [HasAVX10_2_512] in 966 defm Z : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info512, _src.info256, 967 OpNode, f256mem, WriteCvtPH2PSZ>, EVEX_V512; 968 let Predicates = [HasAVX10_2] in { 969 defm Z128 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info128, _src.info128, 970 OpNode, f64mem, WriteCvtPH2PSZ>, EVEX_V128; 971 defm Z256 : avx10_convert_2op_nomb_packed<opc, OpcodeStr, _dest.info256, _src.info128, 972 OpNode, f128mem, WriteCvtPH2PSZ>, EVEX_V256; 973 } 974} 975 976defm VCVTHF82PH : avx10_convert_2op_nomb<"vcvthf82ph", avx512vl_f16_info, 977 avx512vl_i8_info, 0x1e, X86vcvthf82ph>, 978 AVX512XDIi8Base, T_MAP5, EVEX, EVEX_CD8<16, CD8VH>; 979 980//------------------------------------------------- 981// AVX10 BF16 instructions 982//------------------------------------------------- 983 984// VADDBF16, VSUBBF16, VMULBF16, VDIVBF16, VMAXBF16, VMINBF16 985multiclass avx10_fp_binop_int_bf16<bits<8> opc, string OpcodeStr, 986 X86SchedWriteSizes sched, 987 bit IsCommutable = 0> { 988 let Predicates = [HasAVX10_2_512] in 989 defm Z : avx512_fp_packed<opc, OpcodeStr, 990 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 991 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16512"), 992 v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 993 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 994 let Predicates = [HasAVX10_2] in { 995 defm Z128 : avx512_fp_packed<opc, OpcodeStr, 996 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 997 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16128"), 998 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 999 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1000 defm Z256 : avx512_fp_packed<opc, OpcodeStr, 1001 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 1002 !cast<Intrinsic>("int_x86_avx10_"#OpcodeStr#"bf16256"), 1003 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 1004 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1005 } 1006} 1007 1008multiclass avx10_fp_binop_bf16<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, 1009 X86SchedWriteSizes sched, 1010 bit IsCommutable = 0, 1011 SDPatternOperator MaskOpNode = OpNode> { 1012 let Predicates = [HasAVX10_2_512] in 1013 defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1014 v32bf16_info, sched.PH.ZMM, IsCommutable>, EVEX_V512, 1015 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1016 let Predicates = [HasAVX10_2] in { 1017 defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1018 v8bf16x_info, sched.PH.XMM, IsCommutable>, EVEX_V128, 1019 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1020 defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, 1021 v16bf16x_info, sched.PH.YMM, IsCommutable>, EVEX_V256, 1022 T_MAP5, PD, EVEX_CD8<16, CD8VF>; 1023 } 1024} 1025 1026let Uses = []<Register>, mayRaiseFPException = 0 in { 1027defm VADDBF16 : avx10_fp_binop_bf16<0x58, "vadd", fadd, SchedWriteFAddSizes, 1>; 1028defm VSUBBF16 : avx10_fp_binop_bf16<0x5C, "vsub", fsub, SchedWriteFAddSizes, 0>; 1029defm VMULBF16 : avx10_fp_binop_bf16<0x59, "vmul", fmul, SchedWriteFMulSizes, 1>; 1030defm VDIVBF16 : avx10_fp_binop_bf16<0x5E, "vdiv", fdiv, SchedWriteFDivSizes, 0>; 1031defm VMINBF16 : avx10_fp_binop_int_bf16<0x5D, "vmin", SchedWriteFCmpSizes, 0>; 1032defm VMAXBF16 : avx10_fp_binop_int_bf16<0x5F, "vmax", SchedWriteFCmpSizes, 0>; 1033} 1034 1035// VCOMISBF16 1036let Uses = []<Register>, mayRaiseFPException = 0, 1037 Defs = [EFLAGS], Predicates = [HasAVX10_2] in { 1038 //TODO: Replace null_frag with X86fcmp to support lowering `fcmp oeq bfloat *` 1039 //which may require extend supports on BFR16X, loadbf16, ... 1040 defm VCOMISBF16Z : sse12_ord_cmp<0x2F, FR16X, null_frag, bf16, f16mem, loadf16, 1041 "comisbf16", SSEPackedSingle>, T_MAP5, PD, EVEX, 1042 VEX_LIG, EVEX_CD8<16, CD8VT1>; 1043 1044 let isCodeGenOnly = 1 in { 1045 defm VCOMISBF16Z : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8bf16, f16mem, 1046 sse_load_bf16, "comisbf16", SSEPackedSingle>, 1047 T_MAP5, PD, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>; 1048 } 1049} 1050 1051// VCMPBF16 1052multiclass avx10_vcmp_common_bf16<X86FoldableSchedWrite sched, X86VectorVTInfo _> { 1053 let mayRaiseFPException = 0 in { 1054 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, 1055 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), 1056 "vcmp"#_.Suffix, 1057 "$cc, $src2, $src1", "$src1, $src2, $cc", 1058 (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1059 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 1060 1>, Sched<[sched]>; 1061 1062 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1063 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), 1064 "vcmp"#_.Suffix, 1065 "$cc, $src2, $src1", "$src1, $src2, $cc", 1066 (X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 1067 timm:$cc), 1068 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), 1069 timm:$cc)>, 1070 Sched<[sched.Folded, sched.ReadAfterFold]>; 1071 1072 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, 1073 (outs _.KRC:$dst), 1074 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), 1075 "vcmp"#_.Suffix, 1076 "$cc, ${src2}"#_.BroadcastStr#", $src1", 1077 "$src1, ${src2}"#_.BroadcastStr#", $cc", 1078 (X86cmpm (_.VT _.RC:$src1), 1079 (_.VT (_.BroadcastLdFrag addr:$src2)), 1080 timm:$cc), 1081 (X86cmpm_su (_.VT _.RC:$src1), 1082 (_.VT (_.BroadcastLdFrag addr:$src2)), 1083 timm:$cc)>, 1084 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; 1085 } 1086} 1087 1088multiclass avx10_vcmp_bf16<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> { 1089 let Predicates = [HasAVX10_2_512] in 1090 defm Z : avx10_vcmp_common_bf16<sched.ZMM, _.info512>, EVEX_V512; 1091 let Predicates = [HasAVX10_2] in { 1092 defm Z128 : avx10_vcmp_common_bf16<sched.XMM, _.info128>, EVEX_V128; 1093 defm Z256 : avx10_vcmp_common_bf16<sched.YMM, _.info256>, EVEX_V256; 1094 } 1095} 1096 1097defm VCMPBF16 : avx10_vcmp_bf16<SchedWriteFCmp, avx512vl_bf16_info>, 1098 AVX512XDIi8Base, EVEX, VVVV, 1099 EVEX_CD8<16, CD8VF>, TA; 1100 1101 1102// VSQRTBF16 1103multiclass avx10_sqrt_packed_bf16<bits<8> opc, string OpcodeStr, 1104 X86SchedWriteSizes sched> { 1105 let Predicates = [HasAVX10_2_512] in 1106 defm Z : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1107 sched.PH.ZMM, v32bf16_info>, 1108 EVEX_V512, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1109 let Predicates = [HasAVX10_2] in { 1110 defm Z128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1111 sched.PH.XMM, v8bf16x_info>, 1112 EVEX_V128, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1113 defm Z256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "bf16"), 1114 sched.PH.YMM, v16bf16x_info>, 1115 EVEX_V256, PD, T_MAP5, EVEX_CD8<16, CD8VF>; 1116 } 1117} 1118 1119let Uses = []<Register>, mayRaiseFPException = 0 in 1120defm VSQRTBF16 : avx10_sqrt_packed_bf16<0x51, "vsqrt", SchedWriteFSqrtSizes>; 1121 1122// VRSQRTBF16, VRCPBF16, VSRQTBF16, VGETEXPBF16 1123multiclass avx10_fp14_bf16<bits<8> opc, string OpcodeStr, SDNode OpNode, 1124 X86SchedWriteWidths sched> { 1125 let Predicates = [HasAVX10_2_512] in 1126 defm BF16Z : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1127 OpNode, sched.ZMM, v32bf16_info>, 1128 EVEX_V512; 1129 let Predicates = [HasAVX10_2] in { 1130 defm BF16Z128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1131 OpNode, sched.XMM, v8bf16x_info>, 1132 EVEX_V128; 1133 defm BF16Z256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "bf16"), 1134 OpNode, sched.YMM, v16bf16x_info>, 1135 EVEX_V256; 1136 } 1137} 1138 1139defm VRSQRT : avx10_fp14_bf16<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>, 1140 T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1141defm VRCP : avx10_fp14_bf16<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>, 1142 T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1143defm VGETEXP : avx10_fp14_bf16<0x42, "vgetexp", X86fgetexp, SchedWriteFRnd>, 1144 T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1145 1146// VSCALEFBF16 1147multiclass avx10_fp_scalef_bf16<bits<8> opc, string OpcodeStr, 1148 X86SchedWriteWidths sched> { 1149 let Predicates = [HasAVX10_2_512] in 1150 defm Z : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32bf16_info>, 1151 EVEX_V512, T_MAP6, PS, EVEX_CD8<16, CD8VF>; 1152 let Predicates = [HasAVX10_2] in { 1153 defm Z128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8bf16x_info>, 1154 EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 1155 defm Z256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16bf16x_info>, 1156 EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6, PS; 1157 } 1158} 1159 1160let Uses = []<Register>, mayRaiseFPException = 0 in 1161defm VSCALEFBF16 : avx10_fp_scalef_bf16<0x2C, "vscalef", SchedWriteFAdd>; 1162 1163// VREDUCEBF16, VRNDSCALEBF16, VGETMANTBF16 1164multiclass avx10_common_unary_fp_packed_imm_bf16<string OpcodeStr, 1165 AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode, 1166 SDPatternOperator MaskOpNode, X86SchedWriteWidths sched> { 1167 let Predicates = [HasAVX10_2_512] in 1168 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1169 sched.ZMM, _.info512>, EVEX_V512; 1170 let Predicates = [HasAVX10_2] in { 1171 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1172 sched.XMM, _.info128>, EVEX_V128; 1173 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode, 1174 sched.YMM, _.info256>, EVEX_V256; 1175 } 1176} 1177 1178let Uses = []<Register>, mayRaiseFPException = 0 in { 1179defm VREDUCEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vreduce", avx512vl_bf16_info, 0x56, 1180 X86VReduce, X86VReduce, SchedWriteFRnd>, 1181 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1182defm VRNDSCALEBF16 : avx10_common_unary_fp_packed_imm_bf16<"vrndscale", avx512vl_bf16_info, 0x08, 1183 X86any_VRndScale, X86VRndScale, SchedWriteFRnd>, 1184 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1185defm VGETMANTBF16 : avx10_common_unary_fp_packed_imm_bf16<"vgetmant", avx512vl_bf16_info, 0x26, 1186 X86VGetMant, X86VGetMant, SchedWriteFRnd>, 1187 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1188} 1189 1190// VFPCLASSBF16 1191multiclass avx10_fp_fpclass_bf16<string OpcodeStr, bits<8> opcVec, 1192 X86SchedWriteWidths sched> { 1193 let Predicates = [HasAVX10_2_512] in 1194 defm Z : avx512_vector_fpclass<opcVec, OpcodeStr, sched.ZMM, 1195 avx512vl_bf16_info.info512, "z", 1196 []<Register>>, EVEX_V512; 1197 let Predicates = [HasAVX10_2] in { 1198 defm Z128 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.XMM, 1199 avx512vl_bf16_info.info128, "x", 1200 []<Register>>, EVEX_V128; 1201 defm Z256 : avx512_vector_fpclass<opcVec, OpcodeStr, sched.YMM, 1202 avx512vl_bf16_info.info256, "y", 1203 []<Register>>, EVEX_V256; 1204 } 1205} 1206 1207defm VFPCLASSBF16 : avx10_fp_fpclass_bf16<"vfpclass", 0x66, SchedWriteFCmp>, 1208 AVX512XDIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>; 1209 1210// VF[,N]M[ADD,SUB][132,213,231]BF16 1211multiclass avx10_fma3p_213_bf16<bits<8> opc, string OpcodeStr, 1212 SDPatternOperator OpNode, SDNode MaskOpNode, 1213 X86SchedWriteWidths sched> { 1214 let Predicates = [HasAVX10_2_512] in 1215 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1216 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1217 EVEX_CD8<16, CD8VF>; 1218 let Predicates = [HasAVX10_2] in { 1219 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1220 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1221 EVEX_CD8<16, CD8VF>; 1222 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1223 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1224 EVEX_CD8<16, CD8VF>; 1225 } 1226} 1227 1228let Uses = []<Register>, mayRaiseFPException = 0 in { 1229defm VFMADD213BF16 : avx10_fma3p_213_bf16<0xA8, "vfmadd213bf16", any_fma, 1230 fma, SchedWriteFMA>; 1231defm VFMSUB213BF16 : avx10_fma3p_213_bf16<0xAA, "vfmsub213bf16", X86any_Fmsub, 1232 X86Fmsub, SchedWriteFMA>; 1233defm VFNMADD213BF16 : avx10_fma3p_213_bf16<0xAC, "vfnmadd213bf16", X86any_Fnmadd, 1234 X86Fnmadd, SchedWriteFMA>; 1235defm VFNMSUB213BF16 : avx10_fma3p_213_bf16<0xAE, "vfnmsub213bf16", X86any_Fnmsub, 1236 X86Fnmsub, SchedWriteFMA>; 1237} 1238 1239multiclass avx10_fma3p_231_bf16<bits<8> opc, string OpcodeStr, 1240 SDPatternOperator OpNode, SDNode MaskOpNode, 1241 X86SchedWriteWidths sched> { 1242 let Predicates = [HasAVX10_2_512] in 1243 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1244 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1245 EVEX_CD8<16, CD8VF>; 1246 let Predicates = [HasAVX10_2] in { 1247 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1248 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1249 EVEX_CD8<16, CD8VF>; 1250 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1251 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1252 EVEX_CD8<16, CD8VF>; 1253 } 1254} 1255 1256let Uses = []<Register>, mayRaiseFPException = 0 in { 1257defm VFMADD231BF16 : avx10_fma3p_231_bf16<0xB8, "vfmadd231bf16", any_fma, 1258 fma, SchedWriteFMA>; 1259defm VFMSUB231BF16 : avx10_fma3p_231_bf16<0xBA, "vfmsub231bf16", X86any_Fmsub, 1260 X86Fmsub, SchedWriteFMA>; 1261defm VFNMADD231BF16 : avx10_fma3p_231_bf16<0xBC, "vfnmadd231bf16", X86any_Fnmadd, 1262 X86Fnmadd, SchedWriteFMA>; 1263defm VFNMSUB231BF16 : avx10_fma3p_231_bf16<0xBE, "vfnmsub231bf16", X86any_Fnmsub, 1264 X86Fnmsub, SchedWriteFMA>; 1265} 1266 1267multiclass avx10_fma3p_132_bf16<bits<8> opc, string OpcodeStr, 1268 SDPatternOperator OpNode, SDNode MaskOpNode, 1269 X86SchedWriteWidths sched> { 1270 let Predicates = [HasAVX10_2_512] in 1271 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1272 sched.ZMM, v32bf16_info>, EVEX_V512, T_MAP6, PS, 1273 EVEX_CD8<16, CD8VF>; 1274 let Predicates = [HasAVX10_2] in { 1275 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1276 sched.XMM, v8bf16x_info>, EVEX_V128, T_MAP6, PS, 1277 EVEX_CD8<16, CD8VF>; 1278 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode, 1279 sched.YMM, v16bf16x_info>, EVEX_V256, T_MAP6, PS, 1280 EVEX_CD8<16, CD8VF>; 1281 } 1282} 1283 1284let Uses = []<Register>, mayRaiseFPException = 0 in { 1285defm VFMADD132BF16 : avx10_fma3p_132_bf16<0x98, "vfmadd132bf16", any_fma, 1286 fma, SchedWriteFMA>; 1287defm VFMSUB132BF16 : avx10_fma3p_132_bf16<0x9A, "vfmsub132bf16", X86any_Fmsub, 1288 X86Fmsub, SchedWriteFMA>; 1289defm VFNMADD132BF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132bf16", X86any_Fnmadd, 1290 X86Fnmadd, SchedWriteFMA>; 1291defm VFNMSUB132BF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132bf16", X86any_Fnmsub, 1292 X86Fnmsub, SchedWriteFMA>; 1293} 1294 1295//------------------------------------------------- 1296// AVX10 COMEF instructions 1297//------------------------------------------------- 1298multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT, 1299 SDPatternOperator OpNode, string OpcodeStr, 1300 X86MemOperand x86memop, PatFrag ld_frag, 1301 Domain d, X86FoldableSchedWrite sched = WriteFComX>{ 1302 let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { 1303 def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 1304 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1305 [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>, 1306 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1307 let mayLoad = 1 in { 1308 def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 1309 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1310 [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>, 1311 EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; 1312 } 1313 } 1314} 1315 1316multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode, 1317 string OpcodeStr, 1318 Domain d, 1319 X86FoldableSchedWrite sched = WriteFComX> { 1320 let ExeDomain = d, mayRaiseFPException = 1 in { 1321 def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1322 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1323 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>, 1324 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1325 let mayLoad = 1 in { 1326 def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2), 1327 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 1328 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>, 1329 EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; 1330 } 1331 def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), 1332 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), 1333 []>, 1334 EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC; 1335 } 1336} 1337 1338let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { 1339 defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, 1340 "vucomxsd", f64mem, loadf64, SSEPackedDouble>, 1341 TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1342 defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, 1343 "vucomxsh", f16mem, loadf16, SSEPackedSingle>, 1344 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 1345 defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, 1346 "vucomxss", f32mem, loadf32, SSEPackedSingle>, 1347 TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1348 defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, 1349 "vcomxsd", SSEPackedDouble>, 1350 TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1351 defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512, 1352 "vcomxsh", SSEPackedSingle>, 1353 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 1354 defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512, 1355 "vcomxss", SSEPackedSingle>, 1356 TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1357 defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512, 1358 "vucomxsd", SSEPackedDouble>, 1359 TB, XD, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; 1360 defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512, 1361 "vucomxsh", SSEPackedSingle>, 1362 T_MAP5, XS, EVEX_CD8<16, CD8VT1>; 1363 defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, 1364 "vucomxss", SSEPackedSingle>, 1365 TB, XS, VEX_LIG, EVEX_CD8<32, CD8VT1>; 1366} 1367 1368//------------------------------------------------- 1369// AVX10 MOVZXC (COPY) instructions 1370//------------------------------------------------- 1371let Predicates = [HasAVX10_2] in { 1372 def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1373 (ins VR128X:$src), 1374 "vmovd\t{$src, $dst|$dst, $src}", 1375 [(set VR128X:$dst, (v4i32 (X86vzmovl 1376 (v4i32 VR128X:$src))))]>, EVEX, 1377 Sched<[WriteVecMoveFromGpr]>; 1378 1379let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1380 def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), 1381 (ins i32mem:$src), 1382 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1383 EVEX_CD8<32, CD8VT1>, 1384 Sched<[WriteVecLoad]>; 1385 1386let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1387 def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs), 1388 (ins i32mem:$dst, VR128X:$src), 1389 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1390 EVEX_CD8<32, CD8VT1>, 1391 Sched<[WriteVecStore]>; 1392 1393let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1394 def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst), 1395 (ins VR128X:$src), 1396 "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX, 1397 Sched<[WriteVecMoveFromGpr]>; 1398 def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}", 1399 (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>; 1400 1401def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst), 1402 (ins VR128X:$src), 1403 "vmovw\t{$src, $dst|$dst, $src}", 1404 [(set VR128X:$dst, (v8i16 (X86vzmovl 1405 (v8i16 VR128X:$src))))]>, EVEX, T_MAP5, 1406 Sched<[WriteVecMoveFromGpr]>; 1407 1408let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 1409 def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst), 1410 (ins i16mem:$src), 1411 "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1412 EVEX_CD8<16, CD8VT1>, T_MAP5, 1413 Sched<[WriteVecLoad]>; 1414 1415let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 1416 def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs), 1417 (ins i32mem:$dst, VR128X:$src), 1418 "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX, 1419 EVEX_CD8<16, CD8VT1>, T_MAP5, 1420 Sched<[WriteVecStore]>; 1421 1422let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 1423 def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), 1424 (ins VR128X:$src), 1425 "vmovw\t{$src, $dst|$dst, $src}", 1426 []>, EVEX, T_MAP5, 1427 Sched<[WriteVecMoveFromGpr]>; 1428 def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}", 1429 (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>; 1430} 1431 1432// MOVRS 1433multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> { 1434 let ExeDomain = _.ExeDomain in { 1435 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), 1436 (ins _.MemOp:$src), OpStr, "$src", "$src", 1437 (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size) 1438 addr:$src))>, EVEX; 1439 } 1440} 1441 1442multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> { 1443 let Predicates = [HasMOVRS, HasAVX10_2_512, In64BitMode] in 1444 defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512; 1445 let Predicates = [HasMOVRS, HasAVX10_2, In64BitMode] in { 1446 defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128; 1447 defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256; 1448 } 1449} 1450 1451defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>, 1452 T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>; 1453defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>, 1454 T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>; 1455defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, 1456 T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; 1457defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, 1458 T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; 1459 1460// SM4(EVEX) 1461multiclass avx10_sm4_base<string OpStr> { 1462 // SM4_Base is in X86InstrSSE.td. 1463 let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in { 1464 defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128; 1465 defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256; 1466 } 1467 let Predicates = [HasSM4, HasAVX10_2_512] in 1468 defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512; 1469} 1470 1471defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; 1472defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; 1473