xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 16), 8,
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", !subst("b", "", EltTypeName)));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst("b", "", !subst(EltSizeName, "", EltTypeName));
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(!subst("b", "", EltTypeName) # "mem");
68  // FP scalar memory operand for intrinsics - ssmem/sdmem.
69  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                           !if (!eq (EltTypeName, "bf16"), !cast<Operand>("shmem"),
71                           !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
72                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?))));
73
74  // Load patterns
75  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
76
77  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
78
79  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # !subst("b", "", EltTypeName));
80  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
81
82  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
83                               !if (!eq (EltTypeName, "bf16"), !cast<PatFrags>("sse_load_f16"),
84                               !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
85                               !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?))));
86
87  // The string to specify embedded broadcast in assembly.
88  string BroadcastStr = "{1to" # NumElts # "}";
89
90  // 8-bit compressed displacement tuple/subvector format.  This is only
91  // defined for NumElts <= 8.
92  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
93                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
94
95  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
96                          !if (!eq (Size, 256), sub_ymm, ?));
97
98  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
99                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100                     !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
101                     !if (!eq (EltTypeName, "bf16"), SSEPackedSingle, // FIXME?
102                     SSEPackedInt))));
103
104  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
105                      !if (!eq (EltTypeName, "f16"), FR16X,
106                      !if (!eq (EltTypeName, "bf16"), FR16X,
107                      FR64X)));
108
109  dag ImmAllZerosV = (VT immAllZerosV);
110
111  string ZSuffix = !if (!eq (Size, 128), "Z128",
112                   !if (!eq (Size, 256), "Z256", "Z"));
113}
114
115def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
116def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
117def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
118def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
119def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
120def v32bf16_info: X86VectorVTInfo<32, bf16, VR512, "pbf">;
121def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
122def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
123
124// "x" in v32i8x_info means RC = VR256X
125def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
126def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
127def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
128def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
129def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
130def v16bf16x_info: X86VectorVTInfo<16, bf16, VR256X, "pbf">;
131def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
132def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
133
134def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
135def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
136def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
137def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
138def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
139def v8bf16x_info : X86VectorVTInfo<8,  bf16, VR128X, "pbf">;
140def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
141def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
142
143// We map scalar types to the smallest (128-bit) vector type
144// with the appropriate element type. This allows to use the same masking logic.
145def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
146def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
147def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
148def bf16x_info   : X86VectorVTInfo<1,  bf16, VR128X, "sbf">;
149def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
150def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
151
152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153                           X86VectorVTInfo i128> {
154  X86VectorVTInfo info512 = i512;
155  X86VectorVTInfo info256 = i256;
156  X86VectorVTInfo info128 = i128;
157}
158
159def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
160                                             v16i8x_info>;
161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
162                                             v8i16x_info>;
163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
164                                             v4i32x_info>;
165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
166                                             v2i64x_info>;
167def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
168                                             v8f16x_info>;
169def avx512vl_bf16_info : AVX512VLVectorVTInfo<v32bf16_info, v16bf16x_info,
170                                             v8bf16x_info>;
171def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
172                                             v4f32x_info>;
173def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
174                                             v2f64x_info>;
175
176class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
177                       ValueType _vt> {
178  RegisterClass KRC = _krc;
179  RegisterClass KRCWM = _krcwm;
180  ValueType KVT = _vt;
181}
182
183def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
184def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
185def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
186def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
187def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
188def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
189def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
190
191// Used for matching masked operations. Ensures the operation part only has a
192// single use.
193def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
194                           (vselect node:$mask, node:$src1, node:$src2), [{
195  return isProfitableToFormMaskedOp(N);
196}]>;
197
198def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
199                              (X86selects node:$mask, node:$src1, node:$src2), [{
200  return isProfitableToFormMaskedOp(N);
201}]>;
202
203// This multiclass generates the masking variants from the non-masking
204// variant.  It only provides the assembly pieces for the masking variants.
205// It assumes custom ISel patterns for masking which can be provided as
206// template arguments.
207multiclass AVX512_maskable_custom<bits<8> O, Format F,
208                                  dag Outs,
209                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210                                  string OpcodeStr,
211                                  string AttSrcAsm, string IntelSrcAsm,
212                                  list<dag> Pattern,
213                                  list<dag> MaskingPattern,
214                                  list<dag> ZeroMaskingPattern,
215                                  string MaskingConstraint = "",
216                                  bit IsCommutable = 0,
217                                  bit IsKCommutable = 0,
218                                  bit IsKZCommutable = IsCommutable,
219                                  string ClobberConstraint = ""> {
220  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
221    def NAME: AVX512<O, F, Outs, Ins,
222                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
223                                     "$dst, "#IntelSrcAsm#"}",
224                       Pattern>;
225
226  // Prefer over VMOV*rrk Pat<>
227  let isCommutable = IsKCommutable in
228    def NAME#k: AVX512<O, F, Outs, MaskingIns,
229                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
230                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
231                       MaskingPattern>,
232              EVEX_K {
233      // In case of the 3src subclass this is overridden with a let.
234      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
235                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
236                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
237    }
238
239  // Zero mask does not add any restrictions to commute operands transformation.
240  // So, it is Ok to use IsCommutable instead of IsKCommutable.
241  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
242      Constraints = ClobberConstraint in
243    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
244                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
245                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
246                       ZeroMaskingPattern>,
247              EVEX_KZ;
248}
249
250
251// Common base class of AVX512_maskable and AVX512_maskable_3src.
252multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
253                                  dag Outs,
254                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
255                                  string OpcodeStr,
256                                  string AttSrcAsm, string IntelSrcAsm,
257                                  dag RHS, dag MaskingRHS,
258                                  SDPatternOperator Select = vselect_mask,
259                                  string MaskingConstraint = "",
260                                  bit IsCommutable = 0,
261                                  bit IsKCommutable = 0,
262                                  bit IsKZCommutable = IsCommutable,
263                                  string ClobberConstraint = ""> :
264  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
265                         AttSrcAsm, IntelSrcAsm,
266                         [(set _.RC:$dst, RHS)],
267                         [(set _.RC:$dst, MaskingRHS)],
268                         [(set _.RC:$dst,
269                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
270                         MaskingConstraint, IsCommutable,
271                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
272
273// This multiclass generates the unconditional/non-masking, the masking and
274// the zero-masking variant of the vector instruction.  In the masking case, the
275// preserved vector elements come from a new dummy input operand tied to $dst.
276// This version uses a separate dag for non-masking and masking.
277multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
278                           dag Outs, dag Ins, string OpcodeStr,
279                           string AttSrcAsm, string IntelSrcAsm,
280                           dag RHS, dag MaskRHS,
281                           string ClobberConstraint = "",
282                           bit IsCommutable = 0, bit IsKCommutable = 0,
283                           bit IsKZCommutable = IsCommutable> :
284   AVX512_maskable_custom<O, F, Outs, Ins,
285                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
286                          !con((ins _.KRCWM:$mask), Ins),
287                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
288                          [(set _.RC:$dst, RHS)],
289                          [(set _.RC:$dst,
290                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
291                          [(set _.RC:$dst,
292                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
293                          "$src0 = $dst", IsCommutable, IsKCommutable,
294                          IsKZCommutable, ClobberConstraint>;
295
296// This multiclass generates the unconditional/non-masking, the masking and
297// the zero-masking variant of the vector instruction.  In the masking case, the
298// preserved vector elements come from a new dummy input operand tied to $dst.
299multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
300                           dag Outs, dag Ins, string OpcodeStr,
301                           string AttSrcAsm, string IntelSrcAsm,
302                           dag RHS,
303                           bit IsCommutable = 0, bit IsKCommutable = 0,
304                           bit IsKZCommutable = IsCommutable,
305                           SDPatternOperator Select = vselect_mask,
306                           string ClobberConstraint = ""> :
307   AVX512_maskable_common<O, F, _, Outs, Ins,
308                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
309                          !con((ins _.KRCWM:$mask), Ins),
310                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
311                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
312                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
313                          IsKZCommutable, ClobberConstraint>;
314
315// This multiclass generates the unconditional/non-masking, the masking and
316// the zero-masking variant of the scalar instruction.
317multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
318                           dag Outs, dag Ins, string OpcodeStr,
319                           string AttSrcAsm, string IntelSrcAsm,
320                           dag RHS> :
321   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
322                   RHS, 0, 0, 0, X86selects_mask>;
323
324// Similar to AVX512_maskable but in this case one of the source operands
325// ($src1) is already tied to $dst so we just use that for the preserved
326// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
327// $src1.
328multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
329                                dag Outs, dag NonTiedIns, string OpcodeStr,
330                                string AttSrcAsm, string IntelSrcAsm,
331                                dag RHS,
332                                bit IsCommutable = 0,
333                                bit IsKCommutable = 0,
334                                SDPatternOperator Select = vselect_mask,
335                                bit MaskOnly = 0> :
336   AVX512_maskable_common<O, F, _, Outs,
337                          !con((ins _.RC:$src1), NonTiedIns),
338                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
339                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
340                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
341                          !if(MaskOnly, (null_frag), RHS),
342                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
343                          Select, "", IsCommutable, IsKCommutable>;
344
345// Similar to AVX512_maskable_3src but in this case the input VT for the tied
346// operand differs from the output VT. This requires a bitconvert on
347// the preserved vector going into the vselect.
348// NOTE: The unmasked pattern is disabled.
349multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
350                                     X86VectorVTInfo InVT,
351                                     dag Outs, dag NonTiedIns, string OpcodeStr,
352                                     string AttSrcAsm, string IntelSrcAsm,
353                                     dag RHS, bit IsCommutable = 0> :
354   AVX512_maskable_common<O, F, OutVT, Outs,
355                          !con((ins InVT.RC:$src1), NonTiedIns),
356                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
357                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
358                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
359                          (vselect_mask InVT.KRCWM:$mask, RHS,
360                           (bitconvert InVT.RC:$src1)),
361                           vselect_mask, "", IsCommutable>;
362
363multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
364                                     dag Outs, dag NonTiedIns, string OpcodeStr,
365                                     string AttSrcAsm, string IntelSrcAsm,
366                                     dag RHS,
367                                     bit IsCommutable = 0,
368                                     bit IsKCommutable = 0,
369                                     bit MaskOnly = 0> :
370   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
371                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
372                        X86selects_mask, MaskOnly>;
373
374multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
375                                  dag Outs, dag Ins,
376                                  string OpcodeStr,
377                                  string AttSrcAsm, string IntelSrcAsm,
378                                  list<dag> Pattern> :
379   AVX512_maskable_custom<O, F, Outs, Ins,
380                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
381                          !con((ins _.KRCWM:$mask), Ins),
382                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
383                          "$src0 = $dst">;
384
385multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
386                                       dag Outs, dag NonTiedIns,
387                                       string OpcodeStr,
388                                       string AttSrcAsm, string IntelSrcAsm,
389                                       list<dag> Pattern> :
390   AVX512_maskable_custom<O, F, Outs,
391                          !con((ins _.RC:$src1), NonTiedIns),
392                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
393                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
394                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
395                          "">;
396
397// Instruction with mask that puts result in mask register,
398// like "compare" and "vptest"
399multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
400                                  dag Outs,
401                                  dag Ins, dag MaskingIns,
402                                  string OpcodeStr,
403                                  string AttSrcAsm, string IntelSrcAsm,
404                                  list<dag> Pattern,
405                                  list<dag> MaskingPattern,
406                                  bit IsCommutable = 0> {
407    let isCommutable = IsCommutable in {
408    def NAME: AVX512<O, F, Outs, Ins,
409                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
410                                     "$dst, "#IntelSrcAsm#"}",
411                       Pattern>;
412
413    def NAME#k: AVX512<O, F, Outs, MaskingIns,
414                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
415                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
416                       MaskingPattern>, EVEX_K;
417    }
418}
419
420multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
421                                  dag Outs,
422                                  dag Ins, dag MaskingIns,
423                                  string OpcodeStr,
424                                  string AttSrcAsm, string IntelSrcAsm,
425                                  dag RHS, dag MaskingRHS,
426                                  bit IsCommutable = 0> :
427  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
428                         AttSrcAsm, IntelSrcAsm,
429                         [(set _.KRC:$dst, RHS)],
430                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
431
432multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
433                           dag Outs, dag Ins, string OpcodeStr,
434                           string AttSrcAsm, string IntelSrcAsm,
435                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
436   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
437                          !con((ins _.KRCWM:$mask), Ins),
438                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
439                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
440
441// Used by conversion instructions.
442multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
443                                  dag Outs,
444                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
445                                  string OpcodeStr,
446                                  string AttSrcAsm, string IntelSrcAsm,
447                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
448  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
449                         AttSrcAsm, IntelSrcAsm,
450                         [(set _.RC:$dst, RHS)],
451                         [(set _.RC:$dst, MaskingRHS)],
452                         [(set _.RC:$dst, ZeroMaskingRHS)],
453                         "$src0 = $dst">;
454
455multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
456                               dag Outs, dag NonTiedIns, string OpcodeStr,
457                               string AttSrcAsm, string IntelSrcAsm,
458                               dag RHS, dag MaskingRHS, bit IsCommutable,
459                               bit IsKCommutable> :
460   AVX512_maskable_custom<O, F, Outs,
461                          !con((ins _.RC:$src1), NonTiedIns),
462                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
463                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
464                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
465                          [(set _.RC:$dst, RHS)],
466                          [(set _.RC:$dst,
467                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
468                          [(set _.RC:$dst,
469                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
470                          "", IsCommutable, IsKCommutable>;
471
472// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
473// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
474// swizzled by ExecutionDomainFix to pxor.
475// We set canFoldAsLoad because this can be converted to a constant-pool
476// load of an all-zeros value if folding it would be beneficial.
477let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
478    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
479def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
480               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
481def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
482               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
483}
484
485let Predicates = [HasAVX512] in {
486def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
487def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
488def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
489def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
490def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
491def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
492}
493
494// Alias instructions that allow VPTERNLOG to be used with a mask to create
495// a mix of all ones and all zeros elements. This is done this way to force
496// the same register to be used as input for all three sources.
497let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
498def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
499                                (ins VK16WM:$mask), "",
500                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
501                                                      (v16i32 immAllOnesV),
502                                                      (v16i32 immAllZerosV)))]>;
503def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
504                                (ins VK8WM:$mask), "",
505                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
506                                           (v8i64 immAllOnesV),
507                                           (v8i64 immAllZerosV)))]>;
508}
509
510let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
511    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
512def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
514def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
515               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
516}
517
518let Predicates = [HasAVX512] in {
519def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
520def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
521def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
522def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
523def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
524def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
525def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
526def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
527def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
528def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
529def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
530def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
531}
532
533// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
534// This is expanded by ExpandPostRAPseudos.
535let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
536    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
537  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
538                          [(set FR16X:$dst, fp16imm0)]>;
539  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
540                          [(set FR32X:$dst, fp32imm0)]>;
541  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
542                          [(set FR64X:$dst, fp64imm0)]>;
543  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
544                            [(set VR128X:$dst, fp128imm0)]>;
545}
546
547//===----------------------------------------------------------------------===//
548// AVX-512 - VECTOR INSERT
549//
550
551// Supports two different pattern operators for mask and unmasked ops. Allows
552// null_frag to be passed for one.
553multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
554                                  X86VectorVTInfo To,
555                                  SDPatternOperator vinsert_insert,
556                                  SDPatternOperator vinsert_for_mask,
557                                  X86FoldableSchedWrite sched> {
558  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
559    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
560                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
561                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
562                   "$src3, $src2, $src1", "$src1, $src2, $src3",
563                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
564                                         (From.VT From.RC:$src2),
565                                         (iPTR imm)),
566                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
567                                           (From.VT From.RC:$src2),
568                                           (iPTR imm))>,
569                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
570    let mayLoad = 1 in
571    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
572                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
573                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
574                   "$src3, $src2, $src1", "$src1, $src2, $src3",
575                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
576                               (From.VT (From.LdFrag addr:$src2)),
577                               (iPTR imm)),
578                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
579                               (From.VT (From.LdFrag addr:$src2)),
580                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
581                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
582                   Sched<[sched.Folded, sched.ReadAfterFold]>;
583  }
584}
585
586// Passes the same pattern operator for masked and unmasked ops.
587multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
588                            X86VectorVTInfo To,
589                            SDPatternOperator vinsert_insert,
590                            X86FoldableSchedWrite sched> :
591  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
592
593multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
594                       X86VectorVTInfo To, PatFrag vinsert_insert,
595                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
596  let Predicates = p in {
597    def : Pat<(vinsert_insert:$ins
598                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
599              (To.VT (!cast<Instruction>(InstrStr#"rr")
600                     To.RC:$src1, From.RC:$src2,
601                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
602
603    def : Pat<(vinsert_insert:$ins
604                  (To.VT To.RC:$src1),
605                  (From.VT (From.LdFrag addr:$src2)),
606                  (iPTR imm)),
607              (To.VT (!cast<Instruction>(InstrStr#"rm")
608                  To.RC:$src1, addr:$src2,
609                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
610  }
611}
612
613multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
614                            ValueType EltVT64, int Opcode256,
615                            X86FoldableSchedWrite sched> {
616
617  let Predicates = [HasVLX] in
618    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
619                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
620                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
621                                 vinsert128_insert, sched>, EVEX_V256;
622
623  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
624                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
625                                 X86VectorVTInfo<16, EltVT32, VR512>,
626                                 vinsert128_insert, sched>, EVEX_V512;
627
628  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
629                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
630                                 X86VectorVTInfo< 8, EltVT64, VR512>,
631                                 vinsert256_insert, sched>, REX_W, EVEX_V512;
632
633  // Even with DQI we'd like to only use these instructions for masking.
634  let Predicates = [HasVLX, HasDQI] in
635    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
636                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
637                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
638                                   null_frag, vinsert128_insert, sched>,
639                                   VEX_W1X, EVEX_V256;
640
641  // Even with DQI we'd like to only use these instructions for masking.
642  let Predicates = [HasDQI] in {
643    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
644                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
645                                 X86VectorVTInfo< 8, EltVT64, VR512>,
646                                 null_frag, vinsert128_insert, sched>,
647                                 REX_W, EVEX_V512;
648
649    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
650                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
651                                   X86VectorVTInfo<16, EltVT32, VR512>,
652                                   null_frag, vinsert256_insert, sched>,
653                                   EVEX_V512;
654  }
655}
656
657// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
658defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
659defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
660
661// Codegen pattern with the alternative types,
662// Even with AVX512DQ we'll still use these for unmasked operations.
663defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
664              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
665defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
666              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
667
668defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
669              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
670defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
671              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
672
673defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
674              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
675defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
676              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
677
678// Codegen pattern with the alternative types insert VEC128 into VEC256
679defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
680              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
681defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
682              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
683defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
684              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
685// Codegen pattern with the alternative types insert VEC128 into VEC512
686defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
687              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
688defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
689               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
690defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
691              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
692// Codegen pattern with the alternative types insert VEC256 into VEC512
693defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
694              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
695defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
696              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
697defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
698              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
699
700
701multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
702                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
703                                 PatFrag vinsert_insert,
704                                 SDNodeXForm INSERT_get_vinsert_imm,
705                                 list<Predicate> p> {
706let Predicates = p in {
707  def : Pat<(Cast.VT
708             (vselect_mask Cast.KRCWM:$mask,
709                           (bitconvert
710                            (vinsert_insert:$ins (To.VT To.RC:$src1),
711                                                 (From.VT From.RC:$src2),
712                                                 (iPTR imm))),
713                           Cast.RC:$src0)),
714            (!cast<Instruction>(InstrStr#"rrk")
715             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
716             (INSERT_get_vinsert_imm To.RC:$ins))>;
717  def : Pat<(Cast.VT
718             (vselect_mask Cast.KRCWM:$mask,
719                           (bitconvert
720                            (vinsert_insert:$ins (To.VT To.RC:$src1),
721                                                 (From.VT
722                                                  (bitconvert
723                                                   (From.LdFrag addr:$src2))),
724                                                 (iPTR imm))),
725                           Cast.RC:$src0)),
726            (!cast<Instruction>(InstrStr#"rmk")
727             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
728             (INSERT_get_vinsert_imm To.RC:$ins))>;
729
730  def : Pat<(Cast.VT
731             (vselect_mask Cast.KRCWM:$mask,
732                           (bitconvert
733                            (vinsert_insert:$ins (To.VT To.RC:$src1),
734                                                 (From.VT From.RC:$src2),
735                                                 (iPTR imm))),
736                           Cast.ImmAllZerosV)),
737            (!cast<Instruction>(InstrStr#"rrkz")
738             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
739             (INSERT_get_vinsert_imm To.RC:$ins))>;
740  def : Pat<(Cast.VT
741             (vselect_mask Cast.KRCWM:$mask,
742                           (bitconvert
743                            (vinsert_insert:$ins (To.VT To.RC:$src1),
744                                                 (From.VT (From.LdFrag addr:$src2)),
745                                                 (iPTR imm))),
746                           Cast.ImmAllZerosV)),
747            (!cast<Instruction>(InstrStr#"rmkz")
748             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
749             (INSERT_get_vinsert_imm To.RC:$ins))>;
750}
751}
752
753defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
754                             v8f32x_info, vinsert128_insert,
755                             INSERT_get_vinsert128_imm, [HasVLX]>;
756defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
757                             v4f64x_info, vinsert128_insert,
758                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
759
760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
761                             v8i32x_info, vinsert128_insert,
762                             INSERT_get_vinsert128_imm, [HasVLX]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
764                             v8i32x_info, vinsert128_insert,
765                             INSERT_get_vinsert128_imm, [HasVLX]>;
766defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
767                             v8i32x_info, vinsert128_insert,
768                             INSERT_get_vinsert128_imm, [HasVLX]>;
769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
770                             v4i64x_info, vinsert128_insert,
771                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
773                             v4i64x_info, vinsert128_insert,
774                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
776                             v4i64x_info, vinsert128_insert,
777                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
778
779defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
780                             v16f32_info, vinsert128_insert,
781                             INSERT_get_vinsert128_imm, [HasAVX512]>;
782defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
783                             v8f64_info, vinsert128_insert,
784                             INSERT_get_vinsert128_imm, [HasDQI]>;
785
786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
787                             v16i32_info, vinsert128_insert,
788                             INSERT_get_vinsert128_imm, [HasAVX512]>;
789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
790                             v16i32_info, vinsert128_insert,
791                             INSERT_get_vinsert128_imm, [HasAVX512]>;
792defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
793                             v16i32_info, vinsert128_insert,
794                             INSERT_get_vinsert128_imm, [HasAVX512]>;
795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
796                             v8i64_info, vinsert128_insert,
797                             INSERT_get_vinsert128_imm, [HasDQI]>;
798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
799                             v8i64_info, vinsert128_insert,
800                             INSERT_get_vinsert128_imm, [HasDQI]>;
801defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
802                             v8i64_info, vinsert128_insert,
803                             INSERT_get_vinsert128_imm, [HasDQI]>;
804
805defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
806                             v16f32_info, vinsert256_insert,
807                             INSERT_get_vinsert256_imm, [HasDQI]>;
808defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
809                             v8f64_info, vinsert256_insert,
810                             INSERT_get_vinsert256_imm, [HasAVX512]>;
811
812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
813                             v16i32_info, vinsert256_insert,
814                             INSERT_get_vinsert256_imm, [HasDQI]>;
815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
816                             v16i32_info, vinsert256_insert,
817                             INSERT_get_vinsert256_imm, [HasDQI]>;
818defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
819                             v16i32_info, vinsert256_insert,
820                             INSERT_get_vinsert256_imm, [HasDQI]>;
821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
822                             v8i64_info, vinsert256_insert,
823                             INSERT_get_vinsert256_imm, [HasAVX512]>;
824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
825                             v8i64_info, vinsert256_insert,
826                             INSERT_get_vinsert256_imm, [HasAVX512]>;
827defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
828                             v8i64_info, vinsert256_insert,
829                             INSERT_get_vinsert256_imm, [HasAVX512]>;
830
831// vinsertps - insert f32 to XMM
832let ExeDomain = SSEPackedSingle in {
833let isCommutable = 1 in
834def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
835      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
836      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
837      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
838      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
839def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
840      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
841      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
842      [(set VR128X:$dst, (X86insertps VR128X:$src1,
843                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
844                          timm:$src3))]>,
845      EVEX_4V, EVEX_CD8<32, CD8VT1>,
846      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
847}
848
849//===----------------------------------------------------------------------===//
850// AVX-512 VECTOR EXTRACT
851//---
852
853// Supports two different pattern operators for mask and unmasked ops. Allows
854// null_frag to be passed for one.
855multiclass vextract_for_size_split<int Opcode,
856                                   X86VectorVTInfo From, X86VectorVTInfo To,
857                                   SDPatternOperator vextract_extract,
858                                   SDPatternOperator vextract_for_mask,
859                                   SchedWrite SchedRR, SchedWrite SchedMR> {
860
861  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
862    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
863                (ins From.RC:$src1, u8imm:$idx),
864                "vextract" # To.EltTypeName # "x" # To.NumElts,
865                "$idx, $src1", "$src1, $idx",
866                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
867                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
868                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
869
870    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
871                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
872                    "vextract" # To.EltTypeName # "x" # To.NumElts #
873                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
874                    [(store (To.VT (vextract_extract:$idx
875                                    (From.VT From.RC:$src1), (iPTR imm))),
876                             addr:$dst)]>, EVEX,
877                    Sched<[SchedMR]>;
878
879    let mayStore = 1, hasSideEffects = 0 in
880    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
881                    (ins To.MemOp:$dst, To.KRCWM:$mask,
882                                        From.RC:$src1, u8imm:$idx),
883                     "vextract" # To.EltTypeName # "x" # To.NumElts #
884                          "\t{$idx, $src1, $dst {${mask}}|"
885                          "$dst {${mask}}, $src1, $idx}", []>,
886                    EVEX_K, EVEX, Sched<[SchedMR]>;
887  }
888}
889
890// Passes the same pattern operator for masked and unmasked ops.
891multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
892                             X86VectorVTInfo To,
893                             SDPatternOperator vextract_extract,
894                             SchedWrite SchedRR, SchedWrite SchedMR> :
895  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
896
897// Codegen pattern for the alternative types
898multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
899                X86VectorVTInfo To, PatFrag vextract_extract,
900                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
901  let Predicates = p in {
902     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
903               (To.VT (!cast<Instruction>(InstrStr#"rr")
904                          From.RC:$src1,
905                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
906     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
907                              (iPTR imm))), addr:$dst),
908               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
909                (EXTRACT_get_vextract_imm To.RC:$ext))>;
910  }
911}
912
913multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
914                             ValueType EltVT64, int Opcode256,
915                             SchedWrite SchedRR, SchedWrite SchedMR> {
916  let Predicates = [HasAVX512] in {
917    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
918                                   X86VectorVTInfo<16, EltVT32, VR512>,
919                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
920                                   vextract128_extract, SchedRR, SchedMR>,
921                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
922    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
923                                   X86VectorVTInfo< 8, EltVT64, VR512>,
924                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
925                                   vextract256_extract, SchedRR, SchedMR>,
926                                       REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
927  }
928  let Predicates = [HasVLX] in
929    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
930                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
931                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
932                                 vextract128_extract, SchedRR, SchedMR>,
933                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
934
935  // Even with DQI we'd like to only use these instructions for masking.
936  let Predicates = [HasVLX, HasDQI] in
937    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
938                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
939                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
940                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
941                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
942
943  // Even with DQI we'd like to only use these instructions for masking.
944  let Predicates = [HasDQI] in {
945    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
946                                 X86VectorVTInfo< 8, EltVT64, VR512>,
947                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
948                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
949                                     REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
950    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
951                                 X86VectorVTInfo<16, EltVT32, VR512>,
952                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
953                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
954                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
955  }
956}
957
958// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
959defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
960defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
961
962// extract_subvector codegen patterns with the alternative types.
963// Even with AVX512DQ we'll still use these for unmasked operations.
964defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
965          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
966defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
967          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
968
969defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
970          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
971defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
972          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
973
974defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
975          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
976defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
977          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
978
979// Codegen pattern with the alternative types extract VEC128 from VEC256
980defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
981          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
982defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
983          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
984defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
985          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
986
987// Codegen pattern with the alternative types extract VEC128 from VEC512
988defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
989                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
990defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
991                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
992defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
993                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
994// Codegen pattern with the alternative types extract VEC256 from VEC512
995defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
996                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
997defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
998                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
999defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
1000                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
1001
1002
1003// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1004// smaller extract to enable EVEX->VEX.
1005let Predicates = [NoVLX] in {
1006def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1007          (v2i64 (VEXTRACTI128rr
1008                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1009                  (iPTR 1)))>;
1010def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1011          (v2f64 (VEXTRACTF128rr
1012                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1013                  (iPTR 1)))>;
1014def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1015          (v4i32 (VEXTRACTI128rr
1016                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1017                  (iPTR 1)))>;
1018def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1019          (v4f32 (VEXTRACTF128rr
1020                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1021                  (iPTR 1)))>;
1022def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1023          (v8i16 (VEXTRACTI128rr
1024                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1025                  (iPTR 1)))>;
1026def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1027          (v8f16 (VEXTRACTF128rr
1028                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1029                  (iPTR 1)))>;
1030def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1031          (v16i8 (VEXTRACTI128rr
1032                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1033                  (iPTR 1)))>;
1034}
1035
1036// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1037// smaller extract to enable EVEX->VEX.
1038let Predicates = [HasVLX] in {
1039def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1040          (v2i64 (VEXTRACTI32x4Z256rr
1041                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1042                  (iPTR 1)))>;
1043def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1044          (v2f64 (VEXTRACTF32x4Z256rr
1045                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1046                  (iPTR 1)))>;
1047def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1048          (v4i32 (VEXTRACTI32x4Z256rr
1049                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1050                  (iPTR 1)))>;
1051def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1052          (v4f32 (VEXTRACTF32x4Z256rr
1053                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1054                  (iPTR 1)))>;
1055def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1056          (v8i16 (VEXTRACTI32x4Z256rr
1057                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1058                  (iPTR 1)))>;
1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060          (v8f16 (VEXTRACTF32x4Z256rr
1061                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1062                  (iPTR 1)))>;
1063def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1064          (v16i8 (VEXTRACTI32x4Z256rr
1065                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1066                  (iPTR 1)))>;
1067}
1068
1069
1070// Additional patterns for handling a bitcast between the vselect and the
1071// extract_subvector.
1072multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1073                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1074                                  PatFrag vextract_extract,
1075                                  SDNodeXForm EXTRACT_get_vextract_imm,
1076                                  list<Predicate> p> {
1077let Predicates = p in {
1078  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1079                                   (bitconvert
1080                                    (To.VT (vextract_extract:$ext
1081                                            (From.VT From.RC:$src), (iPTR imm)))),
1082                                   To.RC:$src0)),
1083            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1084                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1085                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1086
1087  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1088                                   (bitconvert
1089                                    (To.VT (vextract_extract:$ext
1090                                            (From.VT From.RC:$src), (iPTR imm)))),
1091                                   Cast.ImmAllZerosV)),
1092            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1093                      Cast.KRCWM:$mask, From.RC:$src,
1094                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1095}
1096}
1097
1098defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1099                              v4f32x_info, vextract128_extract,
1100                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1101defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1102                              v2f64x_info, vextract128_extract,
1103                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1104
1105defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1106                              v4i32x_info, vextract128_extract,
1107                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1108defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1109                              v4i32x_info, vextract128_extract,
1110                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1111defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1112                              v4i32x_info, vextract128_extract,
1113                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1114defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1115                              v2i64x_info, vextract128_extract,
1116                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1117defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1118                              v2i64x_info, vextract128_extract,
1119                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1120defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1121                              v2i64x_info, vextract128_extract,
1122                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1123
1124defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1125                              v4f32x_info, vextract128_extract,
1126                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1127defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1128                              v2f64x_info, vextract128_extract,
1129                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1130
1131defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1132                              v4i32x_info, vextract128_extract,
1133                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1134defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1135                              v4i32x_info, vextract128_extract,
1136                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1137defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1138                              v4i32x_info, vextract128_extract,
1139                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1140defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1141                              v2i64x_info, vextract128_extract,
1142                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1143defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1144                              v2i64x_info, vextract128_extract,
1145                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1146defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1147                              v2i64x_info, vextract128_extract,
1148                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1149
1150defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1151                              v8f32x_info, vextract256_extract,
1152                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1153defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1154                              v4f64x_info, vextract256_extract,
1155                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1156
1157defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1158                              v8i32x_info, vextract256_extract,
1159                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1160defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1161                              v8i32x_info, vextract256_extract,
1162                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1163defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1164                              v8i32x_info, vextract256_extract,
1165                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1166defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1167                              v4i64x_info, vextract256_extract,
1168                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1169defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1170                              v4i64x_info, vextract256_extract,
1171                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1172defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1173                              v4i64x_info, vextract256_extract,
1174                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1175
1176// vextractps - extract 32 bits from XMM
1177def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1178      (ins VR128X:$src1, u8imm:$src2),
1179      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1180      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1181      EVEX, WIG, Sched<[WriteVecExtract]>;
1182
1183def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1184      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1185      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1186      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1187                          addr:$dst)]>,
1188      EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1189
1190//===---------------------------------------------------------------------===//
1191// AVX-512 BROADCAST
1192//---
1193// broadcast with a scalar argument.
1194multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1195                                   X86VectorVTInfo SrcInfo> {
1196  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1197            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1198             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1199  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1200                                       (X86VBroadcast SrcInfo.FRC:$src),
1201                                       DestInfo.RC:$src0)),
1202            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1203             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1204             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1206                                       (X86VBroadcast SrcInfo.FRC:$src),
1207                                       DestInfo.ImmAllZerosV)),
1208            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1209             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1210}
1211
1212// Split version to allow mask and broadcast node to be different types. This
1213// helps support the 32x2 broadcasts.
1214multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1215                                     SchedWrite SchedRR, SchedWrite SchedRM,
1216                                     X86VectorVTInfo MaskInfo,
1217                                     X86VectorVTInfo DestInfo,
1218                                     X86VectorVTInfo SrcInfo,
1219                                     bit IsConvertibleToThreeAddress,
1220                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1221                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1222  let hasSideEffects = 0 in
1223  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1224                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1225                    [(set MaskInfo.RC:$dst,
1226                      (MaskInfo.VT
1227                       (bitconvert
1228                        (DestInfo.VT
1229                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1230                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1231  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1232                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1233                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1234                       "${dst} {${mask}} {z}, $src}"),
1235                       [(set MaskInfo.RC:$dst,
1236                         (vselect_mask MaskInfo.KRCWM:$mask,
1237                          (MaskInfo.VT
1238                           (bitconvert
1239                            (DestInfo.VT
1240                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1241                          MaskInfo.ImmAllZerosV))],
1242                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1243  let Constraints = "$src0 = $dst" in
1244  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1245                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1246                          SrcInfo.RC:$src),
1247                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1248                     "${dst} {${mask}}, $src}"),
1249                     [(set MaskInfo.RC:$dst,
1250                       (vselect_mask MaskInfo.KRCWM:$mask,
1251                        (MaskInfo.VT
1252                         (bitconvert
1253                          (DestInfo.VT
1254                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1255                        MaskInfo.RC:$src0))],
1256                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1257
1258  let hasSideEffects = 0, mayLoad = 1 in
1259  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1260                    (ins SrcInfo.ScalarMemOp:$src),
1261                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1262                    [(set MaskInfo.RC:$dst,
1263                      (MaskInfo.VT
1264                       (bitconvert
1265                        (DestInfo.VT
1266                         (UnmaskedBcastOp addr:$src)))))],
1267                    DestInfo.ExeDomain>, T8PD, EVEX,
1268                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1269
1270  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1271                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1272                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1273                       "${dst} {${mask}} {z}, $src}"),
1274                       [(set MaskInfo.RC:$dst,
1275                         (vselect_mask MaskInfo.KRCWM:$mask,
1276                          (MaskInfo.VT
1277                           (bitconvert
1278                            (DestInfo.VT
1279                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1280                          MaskInfo.ImmAllZerosV))],
1281                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1282                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1283
1284  let Constraints = "$src0 = $dst",
1285      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1286  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1287                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1288                          SrcInfo.ScalarMemOp:$src),
1289                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1290                     "${dst} {${mask}}, $src}"),
1291                     [(set MaskInfo.RC:$dst,
1292                       (vselect_mask MaskInfo.KRCWM:$mask,
1293                        (MaskInfo.VT
1294                         (bitconvert
1295                          (DestInfo.VT
1296                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1297                        MaskInfo.RC:$src0))],
1298                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1299                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1300}
1301
1302// Helper class to force mask and broadcast result to same type.
1303multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1304                               SchedWrite SchedRR, SchedWrite SchedRM,
1305                               X86VectorVTInfo DestInfo,
1306                               X86VectorVTInfo SrcInfo,
1307                               bit IsConvertibleToThreeAddress> :
1308  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1309                            DestInfo, DestInfo, SrcInfo,
1310                            IsConvertibleToThreeAddress>;
1311
1312multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1313                                  AVX512VLVectorVTInfo _> {
1314  let Predicates = [HasAVX512] in {
1315    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1316                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1317              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1318              EVEX_V512;
1319  }
1320
1321  let Predicates = [HasVLX] in {
1322    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1323                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1324                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1325                 EVEX_V256;
1326  }
1327}
1328
1329multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1330                                  AVX512VLVectorVTInfo _> {
1331  let Predicates = [HasAVX512] in {
1332    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1333                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1334              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1335              EVEX_V512;
1336  }
1337
1338  let Predicates = [HasVLX] in {
1339    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1340                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1341                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1342                 EVEX_V256;
1343    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1344                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1345                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1346                 EVEX_V128;
1347  }
1348}
1349defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1350                                       avx512vl_f32_info>;
1351defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1352                                       avx512vl_f64_info>, VEX_W1X;
1353
1354multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1355                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1356                                    RegisterClass SrcRC> {
1357  // Fold with a mask even if it has multiple uses since it is cheap.
1358  let ExeDomain = _.ExeDomain in
1359  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1360                          (ins SrcRC:$src),
1361                          "vpbroadcast"#_.Suffix, "$src", "$src",
1362                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1363                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1364                          T8PD, EVEX, Sched<[SchedRR]>;
1365}
1366
1367multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1368                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1369                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1370  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1371  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1372                         (outs _.RC:$dst), (ins GR32:$src),
1373                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1374                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1375                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1376                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1377
1378  def : Pat <(_.VT (OpNode SrcRC:$src)),
1379             (!cast<Instruction>(Name#rr)
1380              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1381
1382  // Fold with a mask even if it has multiple uses since it is cheap.
1383  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1384             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1385              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1386
1387  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1388             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1389              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1390}
1391
1392multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1393                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1394                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1395  let Predicates = [prd] in
1396    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1397              OpNode, SrcRC, Subreg>, EVEX_V512;
1398  let Predicates = [prd, HasVLX] in {
1399    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1400              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1401    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1402              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1403  }
1404}
1405
1406multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1407                                       SDPatternOperator OpNode,
1408                                       RegisterClass SrcRC, Predicate prd> {
1409  let Predicates = [prd] in
1410    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1411                                      SrcRC>, EVEX_V512;
1412  let Predicates = [prd, HasVLX] in {
1413    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1414                                         SrcRC>, EVEX_V256;
1415    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1416                                         SrcRC>, EVEX_V128;
1417  }
1418}
1419
1420defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1421                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1422defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1423                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1424                       HasBWI>;
1425defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1426                                                 X86VBroadcast, GR32, HasAVX512>;
1427defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1428                                                 X86VBroadcast, GR64, HasAVX512>, REX_W;
1429
1430multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1431                                      AVX512VLVectorVTInfo _, Predicate prd,
1432                                      bit IsConvertibleToThreeAddress> {
1433  let Predicates = [prd] in {
1434    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1435                                   WriteShuffle256Ld, _.info512, _.info128,
1436                                   IsConvertibleToThreeAddress>,
1437                                  EVEX_V512;
1438  }
1439  let Predicates = [prd, HasVLX] in {
1440    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1441                                    WriteShuffle256Ld, _.info256, _.info128,
1442                                    IsConvertibleToThreeAddress>,
1443                                 EVEX_V256;
1444    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1445                                    WriteShuffleXLd, _.info128, _.info128,
1446                                    IsConvertibleToThreeAddress>,
1447                                 EVEX_V128;
1448  }
1449}
1450
1451defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1452                                           avx512vl_i8_info, HasBWI, 0>;
1453defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1454                                           avx512vl_i16_info, HasBWI, 0>;
1455defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1456                                           avx512vl_i32_info, HasAVX512, 1>;
1457defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1458                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1459
1460multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1461                                      SDPatternOperator OpNode,
1462                                      X86VectorVTInfo _Dst,
1463                                      X86VectorVTInfo _Src> {
1464  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1465                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1466                           (_Dst.VT (OpNode addr:$src))>,
1467                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1468                           AVX5128IBase, EVEX;
1469}
1470
1471// This should be used for the AVX512DQ broadcast instructions. It disables
1472// the unmasked patterns so that we only use the DQ instructions when masking
1473//  is requested.
1474multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1475                                         SDPatternOperator OpNode,
1476                                         X86VectorVTInfo _Dst,
1477                                         X86VectorVTInfo _Src> {
1478  let hasSideEffects = 0, mayLoad = 1 in
1479  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1480                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1481                           (null_frag),
1482                           (_Dst.VT (OpNode addr:$src))>,
1483                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1484                           AVX5128IBase, EVEX;
1485}
1486let Predicates = [HasBWI] in {
1487  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1488            (VPBROADCASTWZrm addr:$src)>;
1489
1490  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1491            (VPBROADCASTWZrr VR128X:$src)>;
1492  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1493            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1494}
1495let Predicates = [HasVLX, HasBWI] in {
1496  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1497            (VPBROADCASTWZ128rm addr:$src)>;
1498  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1499            (VPBROADCASTWZ256rm addr:$src)>;
1500
1501  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1502            (VPBROADCASTWZ128rr VR128X:$src)>;
1503  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1504            (VPBROADCASTWZ256rr VR128X:$src)>;
1505
1506  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1507            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1508  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1509            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1510}
1511
1512//===----------------------------------------------------------------------===//
1513// AVX-512 BROADCAST SUBVECTORS
1514//
1515
1516defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1517                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1518                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1519defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1520                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1521                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1522defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1523                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
1524                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1525defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1526                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
1527                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1528
1529let Predicates = [HasAVX512] in {
1530def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1531          (VBROADCASTF64X4rm addr:$src)>;
1532def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1533          (VBROADCASTF64X4rm addr:$src)>;
1534def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1535          (VBROADCASTF64X4rm addr:$src)>;
1536def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1537          (VBROADCASTI64X4rm addr:$src)>;
1538def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1539          (VBROADCASTI64X4rm addr:$src)>;
1540def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1541          (VBROADCASTI64X4rm addr:$src)>;
1542def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1543          (VBROADCASTI64X4rm addr:$src)>;
1544
1545def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1546          (VBROADCASTF32X4rm addr:$src)>;
1547def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1548          (VBROADCASTF32X4rm addr:$src)>;
1549def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1550          (VBROADCASTF32X4rm addr:$src)>;
1551def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1552          (VBROADCASTI32X4rm addr:$src)>;
1553def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1554          (VBROADCASTI32X4rm addr:$src)>;
1555def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1556          (VBROADCASTI32X4rm addr:$src)>;
1557def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1558          (VBROADCASTI32X4rm addr:$src)>;
1559
1560// Patterns for selects of bitcasted operations.
1561def : Pat<(vselect_mask VK16WM:$mask,
1562                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1563                        (v16f32 immAllZerosV)),
1564          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1565def : Pat<(vselect_mask VK16WM:$mask,
1566                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1567                        VR512:$src0),
1568          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1569def : Pat<(vselect_mask VK16WM:$mask,
1570                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1571                        (v16i32 immAllZerosV)),
1572          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1573def : Pat<(vselect_mask VK16WM:$mask,
1574                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1575                        VR512:$src0),
1576          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1577
1578def : Pat<(vselect_mask VK8WM:$mask,
1579                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1580                        (v8f64 immAllZerosV)),
1581          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1582def : Pat<(vselect_mask VK8WM:$mask,
1583                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1584                        VR512:$src0),
1585          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1586def : Pat<(vselect_mask VK8WM:$mask,
1587                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1588                        (v8i64 immAllZerosV)),
1589          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1590def : Pat<(vselect_mask VK8WM:$mask,
1591                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1592                        VR512:$src0),
1593          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1594}
1595
1596let Predicates = [HasVLX] in {
1597defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1598                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1599                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1600defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1601                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1602                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1603
1604def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1605          (VBROADCASTF32X4Z256rm addr:$src)>;
1606def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1607          (VBROADCASTF32X4Z256rm addr:$src)>;
1608def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1609          (VBROADCASTF32X4Z256rm addr:$src)>;
1610def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1611          (VBROADCASTI32X4Z256rm addr:$src)>;
1612def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1613          (VBROADCASTI32X4Z256rm addr:$src)>;
1614def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1615          (VBROADCASTI32X4Z256rm addr:$src)>;
1616def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1617          (VBROADCASTI32X4Z256rm addr:$src)>;
1618
1619// Patterns for selects of bitcasted operations.
1620def : Pat<(vselect_mask VK8WM:$mask,
1621                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1622                        (v8f32 immAllZerosV)),
1623          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1624def : Pat<(vselect_mask VK8WM:$mask,
1625                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1626                        VR256X:$src0),
1627          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1628def : Pat<(vselect_mask VK8WM:$mask,
1629                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1630                        (v8i32 immAllZerosV)),
1631          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1632def : Pat<(vselect_mask VK8WM:$mask,
1633                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1634                        VR256X:$src0),
1635          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1636}
1637
1638let Predicates = [HasVLX, HasDQI] in {
1639defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1640                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1641                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1642defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1643                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1644                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1645
1646// Patterns for selects of bitcasted operations.
1647def : Pat<(vselect_mask VK4WM:$mask,
1648                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1649                        (v4f64 immAllZerosV)),
1650          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1651def : Pat<(vselect_mask VK4WM:$mask,
1652                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1653                        VR256X:$src0),
1654          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1655def : Pat<(vselect_mask VK4WM:$mask,
1656                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1657                        (v4i64 immAllZerosV)),
1658          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1659def : Pat<(vselect_mask VK4WM:$mask,
1660                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1661                        VR256X:$src0),
1662          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1663}
1664
1665let Predicates = [HasDQI] in {
1666defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1667                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
1668                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1669defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1670                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1671                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1672defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1673                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
1674                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1675defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1676                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1677                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1678
1679// Patterns for selects of bitcasted operations.
1680def : Pat<(vselect_mask VK16WM:$mask,
1681                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1682                        (v16f32 immAllZerosV)),
1683          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1684def : Pat<(vselect_mask VK16WM:$mask,
1685                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1686                        VR512:$src0),
1687          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1688def : Pat<(vselect_mask VK16WM:$mask,
1689                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1690                        (v16i32 immAllZerosV)),
1691          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1692def : Pat<(vselect_mask VK16WM:$mask,
1693                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1694                        VR512:$src0),
1695          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1696
1697def : Pat<(vselect_mask VK8WM:$mask,
1698                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1699                        (v8f64 immAllZerosV)),
1700          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1701def : Pat<(vselect_mask VK8WM:$mask,
1702                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1703                        VR512:$src0),
1704          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1705def : Pat<(vselect_mask VK8WM:$mask,
1706                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1707                        (v8i64 immAllZerosV)),
1708          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1709def : Pat<(vselect_mask VK8WM:$mask,
1710                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1711                        VR512:$src0),
1712          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1713}
1714
1715multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1716                                        AVX512VLVectorVTInfo _Dst,
1717                                        AVX512VLVectorVTInfo _Src> {
1718  let Predicates = [HasDQI] in
1719    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720                                          WriteShuffle256Ld, _Dst.info512,
1721                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1722                                          EVEX_V512;
1723  let Predicates = [HasDQI, HasVLX] in
1724    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1725                                          WriteShuffle256Ld, _Dst.info256,
1726                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1727                                          EVEX_V256;
1728}
1729
1730multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1731                                         AVX512VLVectorVTInfo _Dst,
1732                                         AVX512VLVectorVTInfo _Src> :
1733  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1734
1735  let Predicates = [HasDQI, HasVLX] in
1736    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1737                                          WriteShuffleXLd, _Dst.info128,
1738                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1739                                          EVEX_V128;
1740}
1741
1742defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1743                                          avx512vl_i32_info, avx512vl_i64_info>;
1744defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1745                                          avx512vl_f32_info, avx512vl_f64_info>;
1746
1747//===----------------------------------------------------------------------===//
1748// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1749//---
1750multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1751                                  X86VectorVTInfo _, RegisterClass KRC> {
1752  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1753                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1754                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1755                  EVEX, Sched<[WriteShuffle]>;
1756}
1757
1758multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1759                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1760  let Predicates = [HasCDI] in
1761    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1762  let Predicates = [HasCDI, HasVLX] in {
1763    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1764    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1765  }
1766}
1767
1768defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1769                                               avx512vl_i32_info, VK16>;
1770defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1771                                               avx512vl_i64_info, VK8>, REX_W;
1772
1773//===----------------------------------------------------------------------===//
1774// -- VPERMI2 - 3 source operands form --
1775multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1776                         X86FoldableSchedWrite sched,
1777                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1778let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1779    hasSideEffects = 0 in {
1780  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1781          (ins _.RC:$src2, _.RC:$src3),
1782          OpcodeStr, "$src3, $src2", "$src2, $src3",
1783          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1784          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1785
1786  let mayLoad = 1 in
1787  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1788            (ins _.RC:$src2, _.MemOp:$src3),
1789            OpcodeStr, "$src3, $src2", "$src2, $src3",
1790            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1791                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1792            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1793  }
1794}
1795
1796multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1797                            X86FoldableSchedWrite sched,
1798                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1799  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1800      hasSideEffects = 0, mayLoad = 1 in
1801  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1802              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1803              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1804              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1805              (_.VT (X86VPermt2 _.RC:$src2,
1806               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1807              AVX5128IBase, EVEX_4V, EVEX_B,
1808              Sched<[sched.Folded, sched.ReadAfterFold]>;
1809}
1810
1811multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1812                               X86FoldableSchedWrite sched,
1813                               AVX512VLVectorVTInfo VTInfo,
1814                               AVX512VLVectorVTInfo ShuffleMask> {
1815  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1816                           ShuffleMask.info512>,
1817            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1818                             ShuffleMask.info512>, EVEX_V512;
1819  let Predicates = [HasVLX] in {
1820  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1821                               ShuffleMask.info128>,
1822                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1823                                  ShuffleMask.info128>, EVEX_V128;
1824  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1825                               ShuffleMask.info256>,
1826                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1827                                  ShuffleMask.info256>, EVEX_V256;
1828  }
1829}
1830
1831multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1832                                  X86FoldableSchedWrite sched,
1833                                  AVX512VLVectorVTInfo VTInfo,
1834                                  AVX512VLVectorVTInfo Idx,
1835                                  Predicate Prd> {
1836  let Predicates = [Prd] in
1837  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1838                           Idx.info512>, EVEX_V512;
1839  let Predicates = [Prd, HasVLX] in {
1840  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1841                               Idx.info128>, EVEX_V128;
1842  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1843                               Idx.info256>,  EVEX_V256;
1844  }
1845}
1846
1847defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1848                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1849defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1850                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1851defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1852                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1853                  REX_W, EVEX_CD8<16, CD8VF>;
1854defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1855                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1856                  EVEX_CD8<8, CD8VF>;
1857defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1858                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1859defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1860                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1861
1862// Extra patterns to deal with extra bitcasts due to passthru and index being
1863// different types on the fp versions.
1864multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1865                                  X86VectorVTInfo IdxVT,
1866                                  X86VectorVTInfo CastVT> {
1867  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1868                                (X86VPermt2 (_.VT _.RC:$src2),
1869                                            (IdxVT.VT (bitconvert
1870                                                       (CastVT.VT _.RC:$src1))),
1871                                            _.RC:$src3),
1872                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1873            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1874                                                _.RC:$src2, _.RC:$src3)>;
1875  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1876                                (X86VPermt2 _.RC:$src2,
1877                                            (IdxVT.VT (bitconvert
1878                                                       (CastVT.VT _.RC:$src1))),
1879                                            (_.LdFrag addr:$src3)),
1880                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1881            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1882                                                _.RC:$src2, addr:$src3)>;
1883  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1884                                 (X86VPermt2 _.RC:$src2,
1885                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1886                                             (_.BroadcastLdFrag addr:$src3)),
1887                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1888            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1889                                                 _.RC:$src2, addr:$src3)>;
1890}
1891
1892// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1893defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1894defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1895defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1896
1897// VPERMT2
1898multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1899                         X86FoldableSchedWrite sched,
1900                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1901let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1902  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1903          (ins IdxVT.RC:$src2, _.RC:$src3),
1904          OpcodeStr, "$src3, $src2", "$src2, $src3",
1905          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1906          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1907
1908  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1909            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1910            OpcodeStr, "$src3, $src2", "$src2, $src3",
1911            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1912                   (_.LdFrag addr:$src3))), 1>,
1913            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1914  }
1915}
1916multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1917                            X86FoldableSchedWrite sched,
1918                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1919  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1920  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1921              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1922              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1923              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1924              (_.VT (X86VPermt2 _.RC:$src1,
1925               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1926              AVX5128IBase, EVEX_4V, EVEX_B,
1927              Sched<[sched.Folded, sched.ReadAfterFold]>;
1928}
1929
1930multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1931                               X86FoldableSchedWrite sched,
1932                               AVX512VLVectorVTInfo VTInfo,
1933                               AVX512VLVectorVTInfo ShuffleMask> {
1934  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1935                              ShuffleMask.info512>,
1936            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1937                              ShuffleMask.info512>, EVEX_V512;
1938  let Predicates = [HasVLX] in {
1939  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1940                              ShuffleMask.info128>,
1941                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1942                              ShuffleMask.info128>, EVEX_V128;
1943  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1944                              ShuffleMask.info256>,
1945                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1946                              ShuffleMask.info256>, EVEX_V256;
1947  }
1948}
1949
1950multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1951                                  X86FoldableSchedWrite sched,
1952                                  AVX512VLVectorVTInfo VTInfo,
1953                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1954  let Predicates = [Prd] in
1955  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1956                           Idx.info512>, EVEX_V512;
1957  let Predicates = [Prd, HasVLX] in {
1958  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1959                               Idx.info128>, EVEX_V128;
1960  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1961                               Idx.info256>, EVEX_V256;
1962  }
1963}
1964
1965defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1966                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1967defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1968                  avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1969defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1970                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1971                  REX_W, EVEX_CD8<16, CD8VF>;
1972defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1973                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1974                  EVEX_CD8<8, CD8VF>;
1975defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1976                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1977defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1978                  avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
1979
1980//===----------------------------------------------------------------------===//
1981// AVX-512 - BLEND using mask
1982//
1983
1984multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1985                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1986  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1987  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988             (ins _.RC:$src1, _.RC:$src2),
1989             !strconcat(OpcodeStr,
1990             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1991             EVEX_4V, Sched<[sched]>;
1992  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994             !strconcat(OpcodeStr,
1995             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1996             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1997  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1998             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1999             !strconcat(OpcodeStr,
2000             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2001             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
2002  let mayLoad = 1 in {
2003  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2004             (ins _.RC:$src1, _.MemOp:$src2),
2005             !strconcat(OpcodeStr,
2006             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2007             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2008             Sched<[sched.Folded, sched.ReadAfterFold]>;
2009  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2010             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2011             !strconcat(OpcodeStr,
2012             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2013             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2014             Sched<[sched.Folded, sched.ReadAfterFold]>;
2015  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2016             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2017             !strconcat(OpcodeStr,
2018             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2019             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2020             Sched<[sched.Folded, sched.ReadAfterFold]>;
2021  }
2022  }
2023}
2024multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2025                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2026  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2027  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2028      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2029       !strconcat(OpcodeStr,
2030            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2031            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2032      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2033      Sched<[sched.Folded, sched.ReadAfterFold]>;
2034
2035  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2036      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2037       !strconcat(OpcodeStr,
2038            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2039            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2040      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2041      Sched<[sched.Folded, sched.ReadAfterFold]>;
2042
2043  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2044      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2045       !strconcat(OpcodeStr,
2046            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2047            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2048      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2049      Sched<[sched.Folded, sched.ReadAfterFold]>;
2050  }
2051}
2052
2053multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2054                        AVX512VLVectorVTInfo VTInfo> {
2055  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2056           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2057                                 EVEX_V512;
2058
2059  let Predicates = [HasVLX] in {
2060    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2061                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2062                                      EVEX_V256;
2063    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2064                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2065                                      EVEX_V128;
2066  }
2067}
2068
2069multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2070                        AVX512VLVectorVTInfo VTInfo> {
2071  let Predicates = [HasBWI] in
2072    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2073                               EVEX_V512;
2074
2075  let Predicates = [HasBWI, HasVLX] in {
2076    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2077                                  EVEX_V256;
2078    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2079                                  EVEX_V128;
2080  }
2081}
2082
2083defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2084                              avx512vl_f32_info>;
2085defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2086                              avx512vl_f64_info>, REX_W;
2087defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2088                              avx512vl_i32_info>;
2089defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2090                              avx512vl_i64_info>, REX_W;
2091defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2092                              avx512vl_i8_info>;
2093defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2094                              avx512vl_i16_info>, REX_W;
2095
2096//===----------------------------------------------------------------------===//
2097// Compare Instructions
2098//===----------------------------------------------------------------------===//
2099
2100// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2101
2102multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2103                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2104                             X86FoldableSchedWrite sched> {
2105  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2106                      (outs _.KRC:$dst),
2107                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2108                      "vcmp"#_.Suffix,
2109                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2110                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2111                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2112                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2113  let mayLoad = 1 in
2114  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2115                    (outs _.KRC:$dst),
2116                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2117                    "vcmp"#_.Suffix,
2118                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2119                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2120                        timm:$cc),
2121                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2122                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2123                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2124
2125  let Uses = [MXCSR] in
2126  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2127                     (outs _.KRC:$dst),
2128                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2129                     "vcmp"#_.Suffix,
2130                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2131                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2132                                timm:$cc),
2133                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2134                                   timm:$cc)>,
2135                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2136
2137  let isCodeGenOnly = 1 in {
2138    let isCommutable = 1 in
2139    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2140                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2141                !strconcat("vcmp", _.Suffix,
2142                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2143                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2144                                          _.FRC:$src2,
2145                                          timm:$cc))]>,
2146                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2147    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2148              (outs _.KRC:$dst),
2149              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2150              !strconcat("vcmp", _.Suffix,
2151                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2152              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2153                                        (_.ScalarLdFrag addr:$src2),
2154                                        timm:$cc))]>,
2155              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2156              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2157  }
2158}
2159
2160def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2161                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2162  return N->hasOneUse();
2163}]>;
2164def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2165                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2166  return N->hasOneUse();
2167}]>;
2168
2169let Predicates = [HasAVX512] in {
2170  let ExeDomain = SSEPackedSingle in
2171  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2172                                   X86cmpms_su, X86cmpmsSAE_su,
2173                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2174  let ExeDomain = SSEPackedDouble in
2175  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2176                                   X86cmpms_su, X86cmpmsSAE_su,
2177                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
2178}
2179let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2180  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2181                                   X86cmpms_su, X86cmpmsSAE_su,
2182                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2183
2184multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2185                              X86FoldableSchedWrite sched,
2186                              X86VectorVTInfo _, bit IsCommutable> {
2187  let isCommutable = IsCommutable, hasSideEffects = 0 in
2188  def rr : AVX512BI<opc, MRMSrcReg,
2189             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2190             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191             []>, EVEX_4V, Sched<[sched]>;
2192  let mayLoad = 1, hasSideEffects = 0 in
2193  def rm : AVX512BI<opc, MRMSrcMem,
2194             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2195             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2196             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2197  let isCommutable = IsCommutable, hasSideEffects = 0 in
2198  def rrk : AVX512BI<opc, MRMSrcReg,
2199              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2200              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2201                          "$dst {${mask}}, $src1, $src2}"),
2202              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2203  let mayLoad = 1, hasSideEffects = 0 in
2204  def rmk : AVX512BI<opc, MRMSrcMem,
2205              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2206              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2207                          "$dst {${mask}}, $src1, $src2}"),
2208              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2209}
2210
2211multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2212                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2213                                  bit IsCommutable> :
2214           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2215  let mayLoad = 1, hasSideEffects = 0 in {
2216  def rmb : AVX512BI<opc, MRMSrcMem,
2217              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2218              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2219                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2220              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2221  def rmbk : AVX512BI<opc, MRMSrcMem,
2222               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2223                                       _.ScalarMemOp:$src2),
2224               !strconcat(OpcodeStr,
2225                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2226                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2227               []>, EVEX_4V, EVEX_K, EVEX_B,
2228               Sched<[sched.Folded, sched.ReadAfterFold]>;
2229  }
2230}
2231
2232multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2233                                 X86SchedWriteWidths sched,
2234                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2235                                 bit IsCommutable = 0> {
2236  let Predicates = [prd] in
2237  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2238                              VTInfo.info512, IsCommutable>, EVEX_V512;
2239
2240  let Predicates = [prd, HasVLX] in {
2241    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2242                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2243    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2244                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2245  }
2246}
2247
2248multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2249                                     X86SchedWriteWidths sched,
2250                                     AVX512VLVectorVTInfo VTInfo,
2251                                     Predicate prd, bit IsCommutable = 0> {
2252  let Predicates = [prd] in
2253  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2254                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2255
2256  let Predicates = [prd, HasVLX] in {
2257    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2258                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2259    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2260                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2261  }
2262}
2263
2264// This fragment treats X86cmpm as commutable to help match loads in both
2265// operands for PCMPEQ.
2266def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2267def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2268                         (setcc node:$src1, node:$src2, SETGT)>;
2269
2270// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2271// increase the pattern complexity the way an immediate would.
2272let AddedComplexity = 2 in {
2273// FIXME: Is there a better scheduler class for VPCMP?
2274defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2275                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2276                EVEX_CD8<8, CD8VF>, WIG;
2277
2278defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2279                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2280                EVEX_CD8<16, CD8VF>, WIG;
2281
2282defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2283                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2284                EVEX_CD8<32, CD8VF>;
2285
2286defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2287                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2288                T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2289
2290defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2291                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2292                EVEX_CD8<8, CD8VF>, WIG;
2293
2294defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2295                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2296                EVEX_CD8<16, CD8VF>, WIG;
2297
2298defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2299                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2300                EVEX_CD8<32, CD8VF>;
2301
2302defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2303                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2304                T8PD, REX_W, EVEX_CD8<64, CD8VF>;
2305}
2306
2307def X86pcmpm_imm : SDNodeXForm<setcc, [{
2308  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2309  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2310  return getI8Imm(SSECC, SDLoc(N));
2311}]>;
2312
2313// Swapped operand version of the above.
2314def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2315  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2316  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2317  SSECC = X86::getSwappedVPCMPImm(SSECC);
2318  return getI8Imm(SSECC, SDLoc(N));
2319}]>;
2320
2321multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2322                          PatFrag Frag_su,
2323                          X86FoldableSchedWrite sched,
2324                          X86VectorVTInfo _, string Name> {
2325  let isCommutable = 1 in
2326  def rri : AVX512AIi8<opc, MRMSrcReg,
2327             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328             !strconcat("vpcmp", Suffix,
2329                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2330             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2331                                                (_.VT _.RC:$src2),
2332                                                cond)))]>,
2333             EVEX_4V, Sched<[sched]>;
2334  def rmi : AVX512AIi8<opc, MRMSrcMem,
2335             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2336             !strconcat("vpcmp", Suffix,
2337                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2338             [(set _.KRC:$dst, (_.KVT
2339                                (Frag:$cc
2340                                 (_.VT _.RC:$src1),
2341                                 (_.VT (_.LdFrag addr:$src2)),
2342                                 cond)))]>,
2343             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2344  let isCommutable = 1 in
2345  def rrik : AVX512AIi8<opc, MRMSrcReg,
2346              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2347                                      u8imm:$cc),
2348              !strconcat("vpcmp", Suffix,
2349                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350                         "$dst {${mask}}, $src1, $src2, $cc}"),
2351              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2352                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2353                                                         (_.VT _.RC:$src2),
2354                                                         cond))))]>,
2355              EVEX_4V, EVEX_K, Sched<[sched]>;
2356  def rmik : AVX512AIi8<opc, MRMSrcMem,
2357              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2358                                    u8imm:$cc),
2359              !strconcat("vpcmp", Suffix,
2360                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2361                         "$dst {${mask}}, $src1, $src2, $cc}"),
2362              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2363                                     (_.KVT
2364                                      (Frag_su:$cc
2365                                       (_.VT _.RC:$src1),
2366                                       (_.VT (_.LdFrag addr:$src2)),
2367                                       cond))))]>,
2368              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2369
2370  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2371                             (_.VT _.RC:$src1), cond)),
2372            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2373             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2374
2375  def : Pat<(and _.KRCWM:$mask,
2376                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2377                                     (_.VT _.RC:$src1), cond))),
2378            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2379             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2380             (X86pcmpm_imm_commute $cc))>;
2381}
2382
2383multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2384                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2385                              X86VectorVTInfo _, string Name> :
2386           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2387  def rmib : AVX512AIi8<opc, MRMSrcMem,
2388             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2389                                     u8imm:$cc),
2390             !strconcat("vpcmp", Suffix,
2391                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2392                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2393             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2394                                       (_.VT _.RC:$src1),
2395                                       (_.BroadcastLdFrag addr:$src2),
2396                                       cond)))]>,
2397             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2398  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2399              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2400                                       _.ScalarMemOp:$src2, u8imm:$cc),
2401              !strconcat("vpcmp", Suffix,
2402                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2403                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2404              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2405                                     (_.KVT (Frag_su:$cc
2406                                             (_.VT _.RC:$src1),
2407                                             (_.BroadcastLdFrag addr:$src2),
2408                                             cond))))]>,
2409              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2410
2411  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2412                    (_.VT _.RC:$src1), cond)),
2413            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2414             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2415
2416  def : Pat<(and _.KRCWM:$mask,
2417                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2418                                     (_.VT _.RC:$src1), cond))),
2419            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2420             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2421             (X86pcmpm_imm_commute $cc))>;
2422}
2423
2424multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2425                             PatFrag Frag_su, X86SchedWriteWidths sched,
2426                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2427  let Predicates = [prd] in
2428  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2429                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2430
2431  let Predicates = [prd, HasVLX] in {
2432    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2433                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2434    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2435                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2436  }
2437}
2438
2439multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2440                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2441                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2442  let Predicates = [prd] in
2443  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2444                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2445
2446  let Predicates = [prd, HasVLX] in {
2447    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2448                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2449    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2450                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2451  }
2452}
2453
2454def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455                       (setcc node:$src1, node:$src2, node:$cc), [{
2456  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457  return !ISD::isUnsignedIntSetCC(CC);
2458}], X86pcmpm_imm>;
2459
2460def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2461                          (setcc node:$src1, node:$src2, node:$cc), [{
2462  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2463  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2464}], X86pcmpm_imm>;
2465
2466def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2467                        (setcc node:$src1, node:$src2, node:$cc), [{
2468  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2469  return ISD::isUnsignedIntSetCC(CC);
2470}], X86pcmpm_imm>;
2471
2472def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2473                           (setcc node:$src1, node:$src2, node:$cc), [{
2474  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2475  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2476}], X86pcmpm_imm>;
2477
2478// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2479defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2480                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2481                                EVEX_CD8<8, CD8VF>;
2482defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2483                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2484                                 EVEX_CD8<8, CD8VF>;
2485
2486defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2487                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2488                                REX_W, EVEX_CD8<16, CD8VF>;
2489defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2490                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2491                                 REX_W, EVEX_CD8<16, CD8VF>;
2492
2493defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2494                                    SchedWriteVecALU, avx512vl_i32_info,
2495                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2496defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497                                     SchedWriteVecALU, avx512vl_i32_info,
2498                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2499
2500defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2501                                    SchedWriteVecALU, avx512vl_i64_info,
2502                                    HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2503defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2504                                     SchedWriteVecALU, avx512vl_i64_info,
2505                                     HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
2506
2507def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2508                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2509  return N->hasOneUse();
2510}]>;
2511
2512def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2513  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2514  return getI8Imm(Imm, SDLoc(N));
2515}]>;
2516
2517multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2518                              string Name> {
2519let Uses = [MXCSR], mayRaiseFPException = 1 in {
2520  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2521                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2522                   "vcmp"#_.Suffix,
2523                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2524                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2525                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2526                   1>, Sched<[sched]>;
2527
2528  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2529                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2530                "vcmp"#_.Suffix,
2531                "$cc, $src2, $src1", "$src1, $src2, $cc",
2532                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2533                             timm:$cc),
2534                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2535                            timm:$cc)>,
2536                Sched<[sched.Folded, sched.ReadAfterFold]>;
2537
2538  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2539                (outs _.KRC:$dst),
2540                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2541                "vcmp"#_.Suffix,
2542                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2543                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2544                (X86any_cmpm (_.VT _.RC:$src1),
2545                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2546                             timm:$cc),
2547                (X86cmpm_su (_.VT _.RC:$src1),
2548                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2549                            timm:$cc)>,
2550                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2551  }
2552
2553  // Patterns for selecting with loads in other operand.
2554  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2555                         timm:$cc),
2556            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2557                                                      (X86cmpm_imm_commute timm:$cc))>;
2558
2559  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2560                                            (_.VT _.RC:$src1),
2561                                            timm:$cc)),
2562            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2563                                                       _.RC:$src1, addr:$src2,
2564                                                       (X86cmpm_imm_commute timm:$cc))>;
2565
2566  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2567                         (_.VT _.RC:$src1), timm:$cc),
2568            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2569                                                       (X86cmpm_imm_commute timm:$cc))>;
2570
2571  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2572                                            (_.VT _.RC:$src1),
2573                                            timm:$cc)),
2574            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2575                                                        _.RC:$src1, addr:$src2,
2576                                                        (X86cmpm_imm_commute timm:$cc))>;
2577
2578  // Patterns for mask intrinsics.
2579  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2580                      (_.KVT immAllOnesV)),
2581            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2582
2583  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2584            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2585                                                       _.RC:$src2, timm:$cc)>;
2586
2587  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2588                      (_.KVT immAllOnesV)),
2589            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2590
2591  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2592                      _.KRCWM:$mask),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2594                                                       addr:$src2, timm:$cc)>;
2595
2596  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2597                      (_.KVT immAllOnesV)),
2598            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2599
2600  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2601                      _.KRCWM:$mask),
2602            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2603                                                        addr:$src2, timm:$cc)>;
2604
2605  // Patterns for mask intrinsics with loads in other operand.
2606  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607                      (_.KVT immAllOnesV)),
2608            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2609                                                      (X86cmpm_imm_commute timm:$cc))>;
2610
2611  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2612                      _.KRCWM:$mask),
2613            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2614                                                       _.RC:$src1, addr:$src2,
2615                                                       (X86cmpm_imm_commute timm:$cc))>;
2616
2617  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618                      (_.KVT immAllOnesV)),
2619            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2620                                                       (X86cmpm_imm_commute timm:$cc))>;
2621
2622  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2623                      _.KRCWM:$mask),
2624            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2625                                                        _.RC:$src1, addr:$src2,
2626                                                        (X86cmpm_imm_commute  timm:$cc))>;
2627}
2628
2629multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2630  // comparison code form (VCMP[EQ/LT/LE/...]
2631  let Uses = [MXCSR] in
2632  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2633                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2634                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2635                     "vcmp"#_.Suffix,
2636                     "$cc, {sae}, $src2, $src1",
2637                     "$src1, $src2, {sae}, $cc",
2638                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2639                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2640                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2641                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2642                     EVEX_B, Sched<[sched]>;
2643}
2644
2645multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2646                       Predicate Pred = HasAVX512> {
2647  let Predicates = [Pred] in {
2648    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2649                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2650
2651  }
2652  let Predicates = [Pred,HasVLX] in {
2653   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2654   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2655  }
2656}
2657
2658defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2659                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
2660defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2661                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2662defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2663                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2664
2665// Patterns to select fp compares with load as first operand.
2666let Predicates = [HasAVX512] in {
2667  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2668            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2669
2670  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2671            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672}
2673
2674let Predicates = [HasFP16] in {
2675  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2676            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2677}
2678
2679// ----------------------------------------------------------------
2680// FPClass
2681
2682def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2683                              (X86Vfpclasss node:$src1, node:$src2), [{
2684  return N->hasOneUse();
2685}]>;
2686
2687def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2688                             (X86Vfpclass node:$src1, node:$src2), [{
2689  return N->hasOneUse();
2690}]>;
2691
2692//handle fpclass instruction  mask =  op(reg_scalar,imm)
2693//                                    op(mem_scalar,imm)
2694multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2695                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2696                                 Predicate prd> {
2697  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2698      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2699                      (ins _.RC:$src1, i32u8imm:$src2),
2700                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2701                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2702                              (i32 timm:$src2)))]>,
2703                      Sched<[sched]>;
2704      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2705                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2706                      OpcodeStr#_.Suffix#
2707                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2708                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2709                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2710                                      (i32 timm:$src2))))]>,
2711                      EVEX_K, Sched<[sched]>;
2712    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2713                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2714                    OpcodeStr#_.Suffix#
2715                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2716                    [(set _.KRC:$dst,
2717                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2718                                        (i32 timm:$src2)))]>,
2719                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2720    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2721                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2722                    OpcodeStr#_.Suffix#
2723                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2724                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2725                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2726                            (i32 timm:$src2))))]>,
2727                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2728  }
2729}
2730
2731//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2732//                                  fpclass(reg_vec, mem_vec, imm)
2733//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2734multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2735                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2736                                 string mem>{
2737  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2738  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2739                      (ins _.RC:$src1, i32u8imm:$src2),
2740                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2742                                       (i32 timm:$src2)))]>,
2743                      Sched<[sched]>;
2744  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2745                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2746                      OpcodeStr#_.Suffix#
2747                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2748                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2749                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2750                                       (i32 timm:$src2))))]>,
2751                      EVEX_K, Sched<[sched]>;
2752  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2753                    (ins _.MemOp:$src1, i32u8imm:$src2),
2754                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2755                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2756                    [(set _.KRC:$dst,(X86Vfpclass
2757                                     (_.VT (_.LdFrag addr:$src1)),
2758                                     (i32 timm:$src2)))]>,
2759                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2760  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2761                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2762                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2763                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2764                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2765                                  (_.VT (_.LdFrag addr:$src1)),
2766                                  (i32 timm:$src2))))]>,
2767                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2768  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2769                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2770                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2771                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2772                                                  #_.BroadcastStr#", $src2}",
2773                    [(set _.KRC:$dst,(X86Vfpclass
2774                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2775                                     (i32 timm:$src2)))]>,
2776                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2777  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2778                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2779                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2780                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2781                                                   _.BroadcastStr#", $src2}",
2782                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2783                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2784                                     (i32 timm:$src2))))]>,
2785                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2786  }
2787
2788  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2789  // the memory form.
2790  def : InstAlias<OpcodeStr#_.Suffix#mem#
2791                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2792                  (!cast<Instruction>(NAME#"rr")
2793                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2794  def : InstAlias<OpcodeStr#_.Suffix#mem#
2795                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2796                  (!cast<Instruction>(NAME#"rrk")
2797                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2798  def : InstAlias<OpcodeStr#_.Suffix#mem#
2799                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2800                  _.BroadcastStr#", $src2}",
2801                  (!cast<Instruction>(NAME#"rmb")
2802                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803  def : InstAlias<OpcodeStr#_.Suffix#mem#
2804                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2805                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2806                  (!cast<Instruction>(NAME#"rmbk")
2807                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2808}
2809
2810multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2811                                     bits<8> opc, X86SchedWriteWidths sched,
2812                                     Predicate prd>{
2813  let Predicates = [prd] in {
2814    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2815                                      _.info512, "z">, EVEX_V512;
2816  }
2817  let Predicates = [prd, HasVLX] in {
2818    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2819                                      _.info128, "x">, EVEX_V128;
2820    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2821                                      _.info256, "y">, EVEX_V256;
2822  }
2823}
2824
2825multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2826                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2827  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2828                                      sched, HasFP16>,
2829                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2830  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2831                                   sched.Scl, f16x_info, HasFP16>,
2832                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2833  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2834                                      sched, HasDQI>,
2835                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2836  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2837                                      sched, HasDQI>,
2838                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
2839  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2840                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2841                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2842  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2843                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2844                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
2845}
2846
2847defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2848
2849//-----------------------------------------------------------------
2850// Mask register copy, including
2851// - copy between mask registers
2852// - load/store mask registers
2853// - copy from GPR to mask register and vice versa
2854//
2855multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2856                         string OpcodeStr, RegisterClass KRC,
2857                         ValueType vvt, X86MemOperand x86memop> {
2858  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2859  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2860             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2861             Sched<[WriteMove]>;
2862  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2863             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2865             Sched<[WriteLoad]>;
2866  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2867             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2868             [(store KRC:$src, addr:$dst)]>,
2869             Sched<[WriteStore]>;
2870}
2871
2872multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2873                             string OpcodeStr,
2874                             RegisterClass KRC, RegisterClass GRC> {
2875  let hasSideEffects = 0 in {
2876    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2877               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2878               Sched<[WriteMove]>;
2879    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2880               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2881               Sched<[WriteMove]>;
2882  }
2883}
2884
2885let Predicates = [HasDQI] in
2886  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2887               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2888               VEX, PD;
2889
2890let Predicates = [HasAVX512] in
2891  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2892               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2893               VEX, PS;
2894
2895let Predicates = [HasBWI] in {
2896  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2897               VEX, PD, REX_W;
2898  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2899               VEX, XD;
2900  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2901               VEX, PS, REX_W;
2902  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2903               VEX, XD, REX_W;
2904}
2905
2906// GR from/to mask register
2907def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2908          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2909def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2910          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2911def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2912          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2913
2914def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2915          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2916def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2917          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2918
2919def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2920          (KMOVWrk VK16:$src)>;
2921def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2922          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2923def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2924          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2925def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2926          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2927
2928def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2929          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2930def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2931          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2932def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2933          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2934def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2935          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2936
2937def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2938          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2939def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2940          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2941def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2942          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2943def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2944          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2945
2946// Load/store kreg
2947let Predicates = [HasDQI] in {
2948  def : Pat<(v1i1 (load addr:$src)),
2949            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2950  def : Pat<(v2i1 (load addr:$src)),
2951            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2952  def : Pat<(v4i1 (load addr:$src)),
2953            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2954}
2955
2956let Predicates = [HasAVX512] in {
2957  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2958            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2959  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2960            (KMOVWkm addr:$src)>;
2961}
2962
2963def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2964                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2965                                              SDTCVecEltisVT<1, i1>,
2966                                              SDTCisPtrTy<2>]>>;
2967
2968let Predicates = [HasAVX512] in {
2969  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2970    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2971              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2972
2973    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2974              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2975
2976    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2977              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2978
2979    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2980              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2981  }
2982
2983  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2984  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2985  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2986  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2987  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2988  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2989  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2990
2991  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2992                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2993            (KMOVWkr (AND32ri
2994                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2995                      (i32 1)))>;
2996}
2997
2998// Mask unary operation
2999// - KNOT
3000multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
3001                            RegisterClass KRC, SDPatternOperator OpNode,
3002                            X86FoldableSchedWrite sched, Predicate prd> {
3003  let Predicates = [prd] in
3004    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3005               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3006               [(set KRC:$dst, (OpNode KRC:$src))]>,
3007               Sched<[sched]>;
3008}
3009
3010multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3011                                SDPatternOperator OpNode,
3012                                X86FoldableSchedWrite sched> {
3013  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3014                            sched, HasDQI>, VEX, PD;
3015  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3016                            sched, HasAVX512>, VEX, PS;
3017  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3018                            sched, HasBWI>, VEX, PD, REX_W;
3019  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3020                            sched, HasBWI>, VEX, PS, REX_W;
3021}
3022
3023// TODO - do we need a X86SchedWriteWidths::KMASK type?
3024defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3025
3026// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3027let Predicates = [HasAVX512, NoDQI] in
3028def : Pat<(vnot VK8:$src),
3029          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3030
3031def : Pat<(vnot VK4:$src),
3032          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3033def : Pat<(vnot VK2:$src),
3034          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3035def : Pat<(vnot VK1:$src),
3036          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3037
3038// Mask binary operation
3039// - KAND, KANDN, KOR, KXNOR, KXOR
3040multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3041                           RegisterClass KRC, SDPatternOperator OpNode,
3042                           X86FoldableSchedWrite sched, Predicate prd,
3043                           bit IsCommutable> {
3044  let Predicates = [prd], isCommutable = IsCommutable in
3045    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3046               !strconcat(OpcodeStr,
3047                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3048               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3049               Sched<[sched]>;
3050}
3051
3052multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3053                                 SDPatternOperator OpNode,
3054                                 X86FoldableSchedWrite sched, bit IsCommutable,
3055                                 Predicate prdW = HasAVX512> {
3056  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3057                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3058  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3059                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3060  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3061                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD;
3062  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3063                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
3064}
3065
3066// These nodes use 'vnot' instead of 'not' to support vectors.
3067def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3068def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3069
3070// TODO - do we need a X86SchedWriteWidths::KMASK type?
3071defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3072defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3073defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3074defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3075defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3076defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3077
3078multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3079                            Instruction Inst> {
3080  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3081  // for the DQI set, this type is legal and KxxxB instruction is used
3082  let Predicates = [NoDQI] in
3083  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3084            (COPY_TO_REGCLASS
3085              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3086                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3087
3088  // All types smaller than 8 bits require conversion anyway
3089  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3090        (COPY_TO_REGCLASS (Inst
3091                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3092                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3093  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3094        (COPY_TO_REGCLASS (Inst
3095                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3096                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3097  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3098        (COPY_TO_REGCLASS (Inst
3099                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3100                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3101}
3102
3103defm : avx512_binop_pat<and,   KANDWrr>;
3104defm : avx512_binop_pat<vandn, KANDNWrr>;
3105defm : avx512_binop_pat<or,    KORWrr>;
3106defm : avx512_binop_pat<vxnor, KXNORWrr>;
3107defm : avx512_binop_pat<xor,   KXORWrr>;
3108
3109// Mask unpacking
3110multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3111                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3112                             Predicate prd> {
3113  let Predicates = [prd] in {
3114    let hasSideEffects = 0 in
3115    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3116               (ins Src.KRC:$src1, Src.KRC:$src2),
3117               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3118               VEX_4V, VEX_L, Sched<[sched]>;
3119
3120    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3121              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3122  }
3123}
3124
3125defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3126defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3127defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W;
3128
3129// Mask bit testing
3130multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3131                              SDNode OpNode, X86FoldableSchedWrite sched,
3132                              Predicate prd> {
3133  let Predicates = [prd], Defs = [EFLAGS] in
3134    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3135               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3136               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3137               Sched<[sched]>;
3138}
3139
3140multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3141                                X86FoldableSchedWrite sched,
3142                                Predicate prdW = HasAVX512> {
3143  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3144                                                                VEX, PD;
3145  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3146                                                                VEX, PS;
3147  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3148                                                                VEX, PS, REX_W;
3149  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3150                                                                VEX, PD, REX_W;
3151}
3152
3153// TODO - do we need a X86SchedWriteWidths::KMASK type?
3154defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3155defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3156
3157// Mask shift
3158multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3159                               SDNode OpNode, X86FoldableSchedWrite sched> {
3160  let Predicates = [HasAVX512] in
3161    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3162                 !strconcat(OpcodeStr,
3163                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3164                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3165                 Sched<[sched]>;
3166}
3167
3168multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3169                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3170  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3171                               sched>, VEX, TAPD, REX_W;
3172  let Predicates = [HasDQI] in
3173  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3174                               sched>, VEX, TAPD;
3175  let Predicates = [HasBWI] in {
3176  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3177                               sched>, VEX, TAPD, REX_W;
3178  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3179                               sched>, VEX, TAPD;
3180  }
3181}
3182
3183defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3184defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3185
3186// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3187multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3188                                                 string InstStr,
3189                                                 X86VectorVTInfo Narrow,
3190                                                 X86VectorVTInfo Wide> {
3191def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3192                                (Narrow.VT Narrow.RC:$src2), cond)),
3193          (COPY_TO_REGCLASS
3194           (!cast<Instruction>(InstStr#"Zrri")
3195            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3196            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3197            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3198
3199def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3200                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3201                                                    (Narrow.VT Narrow.RC:$src2),
3202                                                    cond)))),
3203          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3204           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3205           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3206           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3207           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3208}
3209
3210multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3211                                                     string InstStr,
3212                                                     X86VectorVTInfo Narrow,
3213                                                     X86VectorVTInfo Wide> {
3214// Broadcast load.
3215def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3216                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3217          (COPY_TO_REGCLASS
3218           (!cast<Instruction>(InstStr#"Zrmib")
3219            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3220            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3221
3222def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3223                           (Narrow.KVT
3224                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3225                                         (Narrow.BroadcastLdFrag addr:$src2),
3226                                         cond)))),
3227          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3228           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3229           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3230           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3231
3232// Commuted with broadcast load.
3233def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3234                                (Narrow.VT Narrow.RC:$src1),
3235                                cond)),
3236          (COPY_TO_REGCLASS
3237           (!cast<Instruction>(InstStr#"Zrmib")
3238            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3239            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3240
3241def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3242                           (Narrow.KVT
3243                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3244                                         (Narrow.VT Narrow.RC:$src1),
3245                                         cond)))),
3246          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3247           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3248           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3249           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3250}
3251
3252// Same as above, but for fp types which don't use PatFrags.
3253multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3254                                                X86VectorVTInfo Narrow,
3255                                                X86VectorVTInfo Wide> {
3256def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3257                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3258          (COPY_TO_REGCLASS
3259           (!cast<Instruction>(InstStr#"Zrri")
3260            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3261            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3262            timm:$cc), Narrow.KRC)>;
3263
3264def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3265                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3266                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3267          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3268           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3269           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3270           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3271           timm:$cc), Narrow.KRC)>;
3272
3273// Broadcast load.
3274def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3275                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3276          (COPY_TO_REGCLASS
3277           (!cast<Instruction>(InstStr#"Zrmbi")
3278            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3279            addr:$src2, timm:$cc), Narrow.KRC)>;
3280
3281def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3282                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3283                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3284          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3285           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3286           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3287           addr:$src2, timm:$cc), Narrow.KRC)>;
3288
3289// Commuted with broadcast load.
3290def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3291                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3292          (COPY_TO_REGCLASS
3293           (!cast<Instruction>(InstStr#"Zrmbi")
3294            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3295            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3296
3297def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3298                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3299                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3300          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3301           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3302           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3303           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3304}
3305
3306let Predicates = [HasAVX512, NoVLX] in {
3307  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3308  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3309
3310  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3311  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3312
3313  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3314  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3315
3316  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3317  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3318
3319  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3320  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3321
3322  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3323  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3324
3325  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3326  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3327
3328  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3329  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3330
3331  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3332  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3333  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3334  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3335}
3336
3337let Predicates = [HasBWI, NoVLX] in {
3338  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3339  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3340
3341  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3342  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3343
3344  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3345  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3346
3347  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3348  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3349}
3350
3351// Mask setting all 0s or 1s
3352multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3353  let Predicates = [HasAVX512] in
3354    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3355        SchedRW = [WriteZero] in
3356      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3357                     [(set KRC:$dst, (VT Val))]>;
3358}
3359
3360multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3361  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3362  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3363  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3364}
3365
3366defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3367defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3368
3369// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3370let Predicates = [HasAVX512] in {
3371  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3372  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3373  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3374  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3375  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3376  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3377  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3378  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3379}
3380
3381// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3382multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3383                                             RegisterClass RC, ValueType VT> {
3384  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3385            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3386
3387  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3388            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3389}
3390defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3391defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3392defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3393defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3394defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3395defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3396
3397defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3398defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3399defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3400defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3401defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3402
3403defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3404defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3405defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3406defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3407
3408defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3409defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3410defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3411
3412defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3413defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3414
3415defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3416
3417//===----------------------------------------------------------------------===//
3418// AVX-512 - Aligned and unaligned load and store
3419//
3420
3421multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3422                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3423                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3424                       bit NoRMPattern = 0,
3425                       SDPatternOperator SelectOprr = vselect> {
3426  let hasSideEffects = 0 in {
3427  let isMoveReg = 1 in
3428  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3429                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3430                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3431                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3432  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3433                      (ins _.KRCWM:$mask,  _.RC:$src),
3434                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3435                       "${dst} {${mask}} {z}, $src}"),
3436                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3437                                           (_.VT _.RC:$src),
3438                                           _.ImmAllZerosV)))], _.ExeDomain>,
3439                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3440
3441  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3442  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3443                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3444                    !if(NoRMPattern, [],
3445                        [(set _.RC:$dst,
3446                          (_.VT (ld_frag addr:$src)))]),
3447                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3448                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3449
3450  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3451    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3452                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3453                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3454                      "${dst} {${mask}}, $src1}"),
3455                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3456                                          (_.VT _.RC:$src1),
3457                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3458                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3459    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3460                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3461                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3462                      "${dst} {${mask}}, $src1}"),
3463                     [(set _.RC:$dst, (_.VT
3464                         (vselect_mask _.KRCWM:$mask,
3465                          (_.VT (ld_frag addr:$src1)),
3466                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3467                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3468  }
3469  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3470                  (ins _.KRCWM:$mask, _.MemOp:$src),
3471                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3472                                "${dst} {${mask}} {z}, $src}",
3473                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3474                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3475                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3476  }
3477  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3478            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3479
3480  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3481            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3482
3483  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3484            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3485             _.KRCWM:$mask, addr:$ptr)>;
3486}
3487
3488multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3489                                 AVX512VLVectorVTInfo _, Predicate prd,
3490                                 X86SchedWriteMoveLSWidths Sched,
3491                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3492  let Predicates = [prd] in
3493  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3494                       _.info512.AlignedLdFrag, masked_load_aligned,
3495                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3496
3497  let Predicates = [prd, HasVLX] in {
3498  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3499                          _.info256.AlignedLdFrag, masked_load_aligned,
3500                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3501  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3502                          _.info128.AlignedLdFrag, masked_load_aligned,
3503                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3504  }
3505}
3506
3507multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3508                          AVX512VLVectorVTInfo _, Predicate prd,
3509                          X86SchedWriteMoveLSWidths Sched,
3510                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3511                          SDPatternOperator SelectOprr = vselect> {
3512  let Predicates = [prd] in
3513  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3514                       masked_load, Sched.ZMM, "",
3515                       NoRMPattern, SelectOprr>, EVEX_V512;
3516
3517  let Predicates = [prd, HasVLX] in {
3518  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3519                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3520                         NoRMPattern, SelectOprr>, EVEX_V256;
3521  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3522                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3523                         NoRMPattern, SelectOprr>, EVEX_V128;
3524  }
3525}
3526
3527multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3528                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3529                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3530                        bit NoMRPattern = 0> {
3531  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3532  let isMoveReg = 1 in
3533  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3534                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3535                         [], _.ExeDomain>, EVEX,
3536                         Sched<[Sched.RR]>,
3537                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3538  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3539                         (ins _.KRCWM:$mask, _.RC:$src),
3540                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3541                         "${dst} {${mask}}, $src}",
3542                         [], _.ExeDomain>,  EVEX, EVEX_K,
3543                         Sched<[Sched.RR]>;
3544  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3545                          (ins _.KRCWM:$mask, _.RC:$src),
3546                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3547                          "${dst} {${mask}} {z}, $src}",
3548                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3549                          Sched<[Sched.RR]>;
3550  }
3551
3552  let hasSideEffects = 0, mayStore = 1 in
3553  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3554                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3555                    !if(NoMRPattern, [],
3556                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3557                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3558                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3559  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3560                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3561              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3562               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
3563
3564  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3565           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3566                                                        _.KRCWM:$mask, _.RC:$src)>;
3567
3568  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3569                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3570                   _.RC:$dst, _.RC:$src), 0>;
3571  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3572                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3573                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3574  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3575                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3576                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3577}
3578
3579multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3580                            AVX512VLVectorVTInfo _, Predicate prd,
3581                            X86SchedWriteMoveLSWidths Sched,
3582                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3583  let Predicates = [prd] in
3584  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3585                        masked_store, Sched.ZMM, "",
3586                        NoMRPattern>, EVEX_V512;
3587  let Predicates = [prd, HasVLX] in {
3588    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3589                             masked_store, Sched.YMM,
3590                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3591    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3592                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3593                             NoMRPattern>, EVEX_V128;
3594  }
3595}
3596
3597multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3598                                  AVX512VLVectorVTInfo _, Predicate prd,
3599                                  X86SchedWriteMoveLSWidths Sched,
3600                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3601  let Predicates = [prd] in
3602  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3603                        masked_store_aligned, Sched.ZMM, "",
3604                        NoMRPattern>, EVEX_V512;
3605
3606  let Predicates = [prd, HasVLX] in {
3607    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3608                             masked_store_aligned, Sched.YMM,
3609                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3610    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3611                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3612                             NoMRPattern>, EVEX_V128;
3613  }
3614}
3615
3616defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3617                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3618               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3619                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3620               PS, EVEX_CD8<32, CD8VF>;
3621
3622defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3623                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3624               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3625                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3626               PD, REX_W, EVEX_CD8<64, CD8VF>;
3627
3628defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3629                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3630               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3631                               SchedWriteFMoveLS, "VMOVUPS">,
3632                               PS, EVEX_CD8<32, CD8VF>;
3633
3634defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3635                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3636               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3637                               SchedWriteFMoveLS, "VMOVUPD">,
3638               PD, REX_W, EVEX_CD8<64, CD8VF>;
3639
3640defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3641                                       HasAVX512, SchedWriteVecMoveLS,
3642                                       "VMOVDQA", 1>,
3643                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3644                                        HasAVX512, SchedWriteVecMoveLS,
3645                                        "VMOVDQA", 1>,
3646                 PD, EVEX_CD8<32, CD8VF>;
3647
3648defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3649                                       HasAVX512, SchedWriteVecMoveLS,
3650                                       "VMOVDQA">,
3651                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3652                                        HasAVX512, SchedWriteVecMoveLS,
3653                                        "VMOVDQA">,
3654                 PD, REX_W, EVEX_CD8<64, CD8VF>;
3655
3656defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3657                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3658                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3659                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3660                XD, EVEX_CD8<8, CD8VF>;
3661
3662defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3663                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3664                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3665                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3666                 XD, REX_W, EVEX_CD8<16, CD8VF>;
3667
3668defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3669                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3670                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3671                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3672                 XS, EVEX_CD8<32, CD8VF>;
3673
3674defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3675                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3676                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3677                                 SchedWriteVecMoveLS, "VMOVDQU">,
3678                 XS, REX_W, EVEX_CD8<64, CD8VF>;
3679
3680// Special instructions to help with spilling when we don't have VLX. We need
3681// to load or store from a ZMM register instead. These are converted in
3682// expandPostRAPseudos.
3683let isReMaterializable = 1, canFoldAsLoad = 1,
3684    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3685def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3686                            "", []>, Sched<[WriteFLoadX]>;
3687def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3688                            "", []>, Sched<[WriteFLoadY]>;
3689def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3690                            "", []>, Sched<[WriteFLoadX]>;
3691def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3692                            "", []>, Sched<[WriteFLoadY]>;
3693}
3694
3695let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3696def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3697                            "", []>, Sched<[WriteFStoreX]>;
3698def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3699                            "", []>, Sched<[WriteFStoreY]>;
3700def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3701                            "", []>, Sched<[WriteFStoreX]>;
3702def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3703                            "", []>, Sched<[WriteFStoreY]>;
3704}
3705
3706def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3707                          (v8i64 VR512:$src))),
3708   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3709                                              VK8), VR512:$src)>;
3710
3711def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3712                           (v16i32 VR512:$src))),
3713                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3714
3715// These patterns exist to prevent the above patterns from introducing a second
3716// mask inversion when one already exists.
3717def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3718                          (v8i64 immAllZerosV),
3719                          (v8i64 VR512:$src))),
3720                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3721def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3722                           (v16i32 immAllZerosV),
3723                           (v16i32 VR512:$src))),
3724                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3725
3726multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3727                              X86VectorVTInfo Wide> {
3728 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3729                               Narrow.RC:$src1, Narrow.RC:$src0)),
3730           (EXTRACT_SUBREG
3731            (Wide.VT
3732             (!cast<Instruction>(InstrStr#"rrk")
3733              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3734              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3735              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3736            Narrow.SubRegIdx)>;
3737
3738 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3739                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3740           (EXTRACT_SUBREG
3741            (Wide.VT
3742             (!cast<Instruction>(InstrStr#"rrkz")
3743              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3744              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3745            Narrow.SubRegIdx)>;
3746}
3747
3748// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3749// available. Use a 512-bit operation and extract.
3750let Predicates = [HasAVX512, NoVLX] in {
3751  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3752  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3753  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3754  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3755
3756  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3757  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3758  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3759  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3760}
3761
3762let Predicates = [HasBWI, NoVLX] in {
3763  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3764  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3765
3766  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3767  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3768
3769  defm : mask_move_lowering<"VMOVDQU16Z", v8f16x_info, v32f16_info>;
3770  defm : mask_move_lowering<"VMOVDQU16Z", v16f16x_info, v32f16_info>;
3771
3772  defm : mask_move_lowering<"VMOVDQU16Z", v8bf16x_info, v32bf16_info>;
3773  defm : mask_move_lowering<"VMOVDQU16Z", v16bf16x_info, v32bf16_info>;
3774}
3775
3776let Predicates = [HasAVX512] in {
3777  // 512-bit load.
3778  def : Pat<(alignedloadv16i32 addr:$src),
3779            (VMOVDQA64Zrm addr:$src)>;
3780  def : Pat<(alignedloadv32i16 addr:$src),
3781            (VMOVDQA64Zrm addr:$src)>;
3782  def : Pat<(alignedloadv32f16 addr:$src),
3783            (VMOVAPSZrm addr:$src)>;
3784  def : Pat<(alignedloadv32bf16 addr:$src),
3785            (VMOVAPSZrm addr:$src)>;
3786  def : Pat<(alignedloadv64i8 addr:$src),
3787            (VMOVDQA64Zrm addr:$src)>;
3788  def : Pat<(loadv16i32 addr:$src),
3789            (VMOVDQU64Zrm addr:$src)>;
3790  def : Pat<(loadv32i16 addr:$src),
3791            (VMOVDQU64Zrm addr:$src)>;
3792  def : Pat<(loadv32f16 addr:$src),
3793            (VMOVUPSZrm addr:$src)>;
3794  def : Pat<(loadv32bf16 addr:$src),
3795            (VMOVUPSZrm addr:$src)>;
3796  def : Pat<(loadv64i8 addr:$src),
3797            (VMOVDQU64Zrm addr:$src)>;
3798
3799  // 512-bit store.
3800  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3801            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3802  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3803            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3804  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3805            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3806  def : Pat<(alignedstore (v32bf16 VR512:$src), addr:$dst),
3807            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3808  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3809            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3810  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3811            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3812  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3813            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3814  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3815            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3816  def : Pat<(store (v32bf16 VR512:$src), addr:$dst),
3817            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3818  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3819            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3820}
3821
3822let Predicates = [HasVLX] in {
3823  // 128-bit load.
3824  def : Pat<(alignedloadv4i32 addr:$src),
3825            (VMOVDQA64Z128rm addr:$src)>;
3826  def : Pat<(alignedloadv8i16 addr:$src),
3827            (VMOVDQA64Z128rm addr:$src)>;
3828  def : Pat<(alignedloadv8f16 addr:$src),
3829            (VMOVAPSZ128rm addr:$src)>;
3830  def : Pat<(alignedloadv8bf16 addr:$src),
3831            (VMOVAPSZ128rm addr:$src)>;
3832  def : Pat<(alignedloadv16i8 addr:$src),
3833            (VMOVDQA64Z128rm addr:$src)>;
3834  def : Pat<(loadv4i32 addr:$src),
3835            (VMOVDQU64Z128rm addr:$src)>;
3836  def : Pat<(loadv8i16 addr:$src),
3837            (VMOVDQU64Z128rm addr:$src)>;
3838  def : Pat<(loadv8f16 addr:$src),
3839            (VMOVUPSZ128rm addr:$src)>;
3840  def : Pat<(loadv8bf16 addr:$src),
3841            (VMOVUPSZ128rm addr:$src)>;
3842  def : Pat<(loadv16i8 addr:$src),
3843            (VMOVDQU64Z128rm addr:$src)>;
3844
3845  // 128-bit store.
3846  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3847            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3848  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3849            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3850  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3851            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3852  def : Pat<(alignedstore (v8bf16 VR128X:$src), addr:$dst),
3853            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3854  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3855            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3856  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3857            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3858  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3859            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3860  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3861            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3862  def : Pat<(store (v8bf16 VR128X:$src), addr:$dst),
3863            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3864  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3865            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3866
3867  // 256-bit load.
3868  def : Pat<(alignedloadv8i32 addr:$src),
3869            (VMOVDQA64Z256rm addr:$src)>;
3870  def : Pat<(alignedloadv16i16 addr:$src),
3871            (VMOVDQA64Z256rm addr:$src)>;
3872  def : Pat<(alignedloadv16f16 addr:$src),
3873            (VMOVAPSZ256rm addr:$src)>;
3874  def : Pat<(alignedloadv16bf16 addr:$src),
3875            (VMOVAPSZ256rm addr:$src)>;
3876  def : Pat<(alignedloadv32i8 addr:$src),
3877            (VMOVDQA64Z256rm addr:$src)>;
3878  def : Pat<(loadv8i32 addr:$src),
3879            (VMOVDQU64Z256rm addr:$src)>;
3880  def : Pat<(loadv16i16 addr:$src),
3881            (VMOVDQU64Z256rm addr:$src)>;
3882  def : Pat<(loadv16f16 addr:$src),
3883            (VMOVUPSZ256rm addr:$src)>;
3884  def : Pat<(loadv16bf16 addr:$src),
3885            (VMOVUPSZ256rm addr:$src)>;
3886  def : Pat<(loadv32i8 addr:$src),
3887            (VMOVDQU64Z256rm addr:$src)>;
3888
3889  // 256-bit store.
3890  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3891            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3892  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3893            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3894  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3895            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3896  def : Pat<(alignedstore (v16bf16 VR256X:$src), addr:$dst),
3897            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3898  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3899            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3900  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3901            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3902  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3903            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3904  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3905            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3906  def : Pat<(store (v16bf16 VR256X:$src), addr:$dst),
3907            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3908  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3909            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3910}
3911
3912multiclass mask_move_lowering_f16_bf16<AVX512VLVectorVTInfo _> {
3913let Predicates = [HasBWI] in {
3914  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), (_.info512.VT VR512:$src0))),
3915            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3916  def : Pat<(_.info512.VT (vselect VK32WM:$mask, (_.info512.VT VR512:$src1), _.info512.ImmAllZerosV)),
3917            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3918  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3919                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3920            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3921  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3922                     (_.info512.VT (_.info512.AlignedLdFrag addr:$src)), _.info512.ImmAllZerosV)),
3923            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3924  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3925                     (_.info512.VT (_.info512.LdFrag addr:$src)), (_.info512.VT VR512:$src0))),
3926            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3927  def : Pat<(_.info512.VT (vselect VK32WM:$mask,
3928                     (_.info512.VT (_.info512.LdFrag addr:$src)), _.info512.ImmAllZerosV)),
3929            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3930  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, (_.info512.VT VR512:$src0))),
3931            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3932  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, undef)),
3933            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3934  def : Pat<(_.info512.VT (masked_load addr:$src, VK32WM:$mask, _.info512.ImmAllZerosV)),
3935            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3936
3937  def : Pat<(masked_store (_.info512.VT VR512:$src), addr:$dst, VK32WM:$mask),
3938            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3939}
3940let Predicates = [HasBWI, HasVLX] in {
3941  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src0))),
3942            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3943  def : Pat<(_.info256.VT (vselect VK16WM:$mask, (_.info256.VT VR256X:$src1), _.info256.ImmAllZerosV)),
3944            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3945  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3946                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3947            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3948  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3949                     (_.info256.VT (_.info256.AlignedLdFrag addr:$src)), _.info256.ImmAllZerosV)),
3950            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3951  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3952                     (_.info256.VT (_.info256.LdFrag addr:$src)), (_.info256.VT VR256X:$src0))),
3953            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3954  def : Pat<(_.info256.VT (vselect VK16WM:$mask,
3955                     (_.info256.VT (_.info256.LdFrag addr:$src)), _.info256.ImmAllZerosV)),
3956            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3957  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, (_.info256.VT VR256X:$src0))),
3958            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3959  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, undef)),
3960            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3961  def : Pat<(_.info256.VT (masked_load addr:$src, VK16WM:$mask, _.info256.ImmAllZerosV)),
3962            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3963
3964  def : Pat<(masked_store (_.info256.VT VR256X:$src), addr:$dst, VK16WM:$mask),
3965            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3966
3967  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), (_.info128.VT VR128X:$src0))),
3968            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3969  def : Pat<(_.info128.VT (vselect VK8WM:$mask, (_.info128.VT VR128X:$src1), _.info128.ImmAllZerosV)),
3970            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3971  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3972                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3973            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3974  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3975                     (_.info128.VT (_.info128.AlignedLdFrag addr:$src)), _.info128.ImmAllZerosV)),
3976            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3977  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3978                     (_.info128.VT (_.info128.LdFrag addr:$src)), (_.info128.VT VR128X:$src0))),
3979            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3980  def : Pat<(_.info128.VT (vselect VK8WM:$mask,
3981                     (_.info128.VT (_.info128.LdFrag addr:$src)), _.info128.ImmAllZerosV)),
3982            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3983  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, (_.info128.VT VR128X:$src0))),
3984            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3985  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, undef)),
3986            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3987  def : Pat<(_.info128.VT (masked_load addr:$src, VK8WM:$mask, _.info128.ImmAllZerosV)),
3988            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3989
3990  def : Pat<(masked_store (_.info128.VT VR128X:$src), addr:$dst, VK8WM:$mask),
3991            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3992}
3993}
3994
3995defm : mask_move_lowering_f16_bf16<avx512vl_f16_info>;
3996defm : mask_move_lowering_f16_bf16<avx512vl_bf16_info>;
3997
3998// Move Int Doubleword to Packed Double Int
3999//
4000let ExeDomain = SSEPackedInt in {
4001def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
4002                      "vmovd\t{$src, $dst|$dst, $src}",
4003                      [(set VR128X:$dst,
4004                        (v4i32 (scalar_to_vector GR32:$src)))]>,
4005                        EVEX, Sched<[WriteVecMoveFromGpr]>;
4006def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
4007                      "vmovd\t{$src, $dst|$dst, $src}",
4008                      [(set VR128X:$dst,
4009                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
4010                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
4011def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
4012                      "vmovq\t{$src, $dst|$dst, $src}",
4013                        [(set VR128X:$dst,
4014                          (v2i64 (scalar_to_vector GR64:$src)))]>,
4015                      EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4016let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
4017def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
4018                      (ins i64mem:$src),
4019                      "vmovq\t{$src, $dst|$dst, $src}", []>,
4020                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
4021let isCodeGenOnly = 1 in {
4022def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
4023                       "vmovq\t{$src, $dst|$dst, $src}",
4024                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
4025                       EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4026def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
4027                         "vmovq\t{$src, $dst|$dst, $src}",
4028                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
4029                         EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
4030}
4031} // ExeDomain = SSEPackedInt
4032
4033// Move Int Doubleword to Single Scalar
4034//
4035let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4036def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
4037                      "vmovd\t{$src, $dst|$dst, $src}",
4038                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4039                      EVEX, Sched<[WriteVecMoveFromGpr]>;
4040} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4041
4042// Move doubleword from xmm register to r/m32
4043//
4044let ExeDomain = SSEPackedInt in {
4045def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4046                       "vmovd\t{$src, $dst|$dst, $src}",
4047                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4048                                        (iPTR 0)))]>,
4049                       EVEX, Sched<[WriteVecMoveToGpr]>;
4050def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4051                       (ins i32mem:$dst, VR128X:$src),
4052                       "vmovd\t{$src, $dst|$dst, $src}",
4053                       [(store (i32 (extractelt (v4i32 VR128X:$src),
4054                                     (iPTR 0))), addr:$dst)]>,
4055                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4056} // ExeDomain = SSEPackedInt
4057
4058// Move quadword from xmm1 register to r/m64
4059//
4060let ExeDomain = SSEPackedInt in {
4061def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4062                      "vmovq\t{$src, $dst|$dst, $src}",
4063                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4064                                                   (iPTR 0)))]>,
4065                      PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
4066                      Requires<[HasAVX512]>;
4067
4068let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4069def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4070                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4071                      EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
4072                      Requires<[HasAVX512, In64BitMode]>;
4073
4074def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4075                      (ins i64mem:$dst, VR128X:$src),
4076                      "vmovq\t{$src, $dst|$dst, $src}",
4077                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4078                              addr:$dst)]>,
4079                      EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>,
4080                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4081
4082let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4083def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4084                             (ins VR128X:$src),
4085                             "vmovq\t{$src, $dst|$dst, $src}", []>,
4086                             EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
4087} // ExeDomain = SSEPackedInt
4088
4089def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4090                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4091
4092let Predicates = [HasAVX512] in {
4093  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4094            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4095}
4096
4097// Move Scalar Single to Double Int
4098//
4099let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4100def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4101                      (ins FR32X:$src),
4102                      "vmovd\t{$src, $dst|$dst, $src}",
4103                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4104                      EVEX, Sched<[WriteVecMoveToGpr]>;
4105} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4106
4107// Move Quadword Int to Packed Quadword Int
4108//
4109let ExeDomain = SSEPackedInt in {
4110def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4111                      (ins i64mem:$src),
4112                      "vmovq\t{$src, $dst|$dst, $src}",
4113                      [(set VR128X:$dst,
4114                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4115                      EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4116} // ExeDomain = SSEPackedInt
4117
4118// Allow "vmovd" but print "vmovq".
4119def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4120                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4121def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4122                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4123
4124// Conversions between masks and scalar fp.
4125def : Pat<(v32i1 (bitconvert FR32X:$src)),
4126          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4127def : Pat<(f32 (bitconvert VK32:$src)),
4128          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4129
4130def : Pat<(v64i1 (bitconvert FR64X:$src)),
4131          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4132def : Pat<(f64 (bitconvert VK64:$src)),
4133          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4134
4135//===----------------------------------------------------------------------===//
4136// AVX-512  MOVSH, MOVSS, MOVSD
4137//===----------------------------------------------------------------------===//
4138
4139multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4140                              X86VectorVTInfo _, Predicate prd = HasAVX512> {
4141  let Predicates = !if (!eq (prd, HasFP16), [HasFP16], [prd, OptForSize]) in
4142  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4143             (ins _.RC:$src1, _.RC:$src2),
4144             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4145             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4146             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4147  let Predicates = [prd] in {
4148  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4149              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4150              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4151              "$dst {${mask}} {z}, $src1, $src2}"),
4152              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4153                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4154                                      _.ImmAllZerosV)))],
4155              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4156  let Constraints = "$src0 = $dst"  in
4157  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4158             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4159             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4160             "$dst {${mask}}, $src1, $src2}"),
4161             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4162                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4163                                     (_.VT _.RC:$src0))))],
4164             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4165  let canFoldAsLoad = 1, isReMaterializable = 1 in {
4166  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4167             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4168             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4169             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4170  // _alt version uses FR32/FR64 register class.
4171  let isCodeGenOnly = 1 in
4172  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4173                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4174                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4175                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4176  }
4177  let mayLoad = 1, hasSideEffects = 0 in {
4178    let Constraints = "$src0 = $dst" in
4179    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4180               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4181               !strconcat(asm, "\t{$src, $dst {${mask}}|",
4182               "$dst {${mask}}, $src}"),
4183               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4184    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4185               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4186               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4187               "$dst {${mask}} {z}, $src}"),
4188               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4189  }
4190  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4191             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4192             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4193             EVEX, Sched<[WriteFStore]>;
4194  let mayStore = 1, hasSideEffects = 0 in
4195  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4196              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4197              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4198              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
4199  }
4200}
4201
4202defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4203                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4204
4205defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4206                                  VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>;
4207
4208defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4209                                  HasFP16>,
4210                                  VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4211
4212multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4213                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4214
4215def : Pat<(_.VT (OpNode _.RC:$src0,
4216                        (_.VT (scalar_to_vector
4217                                  (_.EltVT (X86selects VK1WM:$mask,
4218                                                       (_.EltVT _.FRC:$src1),
4219                                                       (_.EltVT _.FRC:$src2))))))),
4220          (!cast<Instruction>(InstrStr#rrk)
4221                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4222                        VK1WM:$mask,
4223                        (_.VT _.RC:$src0),
4224                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4225
4226def : Pat<(_.VT (OpNode _.RC:$src0,
4227                        (_.VT (scalar_to_vector
4228                                  (_.EltVT (X86selects VK1WM:$mask,
4229                                                       (_.EltVT _.FRC:$src1),
4230                                                       (_.EltVT ZeroFP))))))),
4231          (!cast<Instruction>(InstrStr#rrkz)
4232                        VK1WM:$mask,
4233                        (_.VT _.RC:$src0),
4234                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4235}
4236
4237multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4238                                        dag Mask, RegisterClass MaskRC> {
4239
4240def : Pat<(masked_store
4241             (_.info512.VT (insert_subvector undef,
4242                               (_.info128.VT _.info128.RC:$src),
4243                               (iPTR 0))), addr:$dst, Mask),
4244          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4245                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4246                      _.info128.RC:$src)>;
4247
4248}
4249
4250multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4251                                               AVX512VLVectorVTInfo _,
4252                                               dag Mask, RegisterClass MaskRC,
4253                                               SubRegIndex subreg> {
4254
4255def : Pat<(masked_store
4256             (_.info512.VT (insert_subvector undef,
4257                               (_.info128.VT _.info128.RC:$src),
4258                               (iPTR 0))), addr:$dst, Mask),
4259          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4260                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4261                      _.info128.RC:$src)>;
4262
4263}
4264
4265// This matches the more recent codegen from clang that avoids emitting a 512
4266// bit masked store directly. Codegen will widen 128-bit masked store to 512
4267// bits on AVX512F only targets.
4268multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4269                                               AVX512VLVectorVTInfo _,
4270                                               dag Mask512, dag Mask128,
4271                                               RegisterClass MaskRC,
4272                                               SubRegIndex subreg> {
4273
4274// AVX512F pattern.
4275def : Pat<(masked_store
4276             (_.info512.VT (insert_subvector undef,
4277                               (_.info128.VT _.info128.RC:$src),
4278                               (iPTR 0))), addr:$dst, Mask512),
4279          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4280                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4281                      _.info128.RC:$src)>;
4282
4283// AVX512VL pattern.
4284def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4285          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4286                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4287                      _.info128.RC:$src)>;
4288}
4289
4290multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4291                                       dag Mask, RegisterClass MaskRC> {
4292
4293def : Pat<(_.info128.VT (extract_subvector
4294                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4295                                        _.info512.ImmAllZerosV)),
4296                           (iPTR 0))),
4297          (!cast<Instruction>(InstrStr#rmkz)
4298                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4299                      addr:$srcAddr)>;
4300
4301def : Pat<(_.info128.VT (extract_subvector
4302                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4303                      (_.info512.VT (insert_subvector undef,
4304                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4305                            (iPTR 0))))),
4306                (iPTR 0))),
4307          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4308                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4309                      addr:$srcAddr)>;
4310
4311}
4312
4313multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4314                                              AVX512VLVectorVTInfo _,
4315                                              dag Mask, RegisterClass MaskRC,
4316                                              SubRegIndex subreg> {
4317
4318def : Pat<(_.info128.VT (extract_subvector
4319                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4320                                        _.info512.ImmAllZerosV)),
4321                           (iPTR 0))),
4322          (!cast<Instruction>(InstrStr#rmkz)
4323                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4324                      addr:$srcAddr)>;
4325
4326def : Pat<(_.info128.VT (extract_subvector
4327                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4328                      (_.info512.VT (insert_subvector undef,
4329                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4330                            (iPTR 0))))),
4331                (iPTR 0))),
4332          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4333                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4334                      addr:$srcAddr)>;
4335
4336}
4337
4338// This matches the more recent codegen from clang that avoids emitting a 512
4339// bit masked load directly. Codegen will widen 128-bit masked load to 512
4340// bits on AVX512F only targets.
4341multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4342                                              AVX512VLVectorVTInfo _,
4343                                              dag Mask512, dag Mask128,
4344                                              RegisterClass MaskRC,
4345                                              SubRegIndex subreg> {
4346// AVX512F patterns.
4347def : Pat<(_.info128.VT (extract_subvector
4348                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4349                                        _.info512.ImmAllZerosV)),
4350                           (iPTR 0))),
4351          (!cast<Instruction>(InstrStr#rmkz)
4352                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4353                      addr:$srcAddr)>;
4354
4355def : Pat<(_.info128.VT (extract_subvector
4356                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4357                      (_.info512.VT (insert_subvector undef,
4358                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4359                            (iPTR 0))))),
4360                (iPTR 0))),
4361          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4362                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4363                      addr:$srcAddr)>;
4364
4365// AVX512Vl patterns.
4366def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4367                         _.info128.ImmAllZerosV)),
4368          (!cast<Instruction>(InstrStr#rmkz)
4369                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4370                      addr:$srcAddr)>;
4371
4372def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4373                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4374          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4375                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4376                      addr:$srcAddr)>;
4377}
4378
4379defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4380defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4381
4382defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4383                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4384defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4385                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4386defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4387                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4388
4389let Predicates = [HasFP16] in {
4390defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4391defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4392                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4393defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4394                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4395defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4396                   (v32i1 (insert_subvector
4397                           (v32i1 immAllZerosV),
4398                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4399                           (iPTR 0))),
4400                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4401                   GR8, sub_8bit>;
4402
4403defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4404                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4405defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4406                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4407defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4408                   (v32i1 (insert_subvector
4409                           (v32i1 immAllZerosV),
4410                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4411                           (iPTR 0))),
4412                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4413                   GR8, sub_8bit>;
4414
4415def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4416          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4417           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4418           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4419           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4420
4421def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4422          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4423           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4424}
4425
4426defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4427                   (v16i1 (insert_subvector
4428                           (v16i1 immAllZerosV),
4429                           (v4i1 (extract_subvector
4430                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4431                                  (iPTR 0))),
4432                           (iPTR 0))),
4433                   (v4i1 (extract_subvector
4434                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4435                          (iPTR 0))), GR8, sub_8bit>;
4436defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4437                   (v8i1
4438                    (extract_subvector
4439                     (v16i1
4440                      (insert_subvector
4441                       (v16i1 immAllZerosV),
4442                       (v2i1 (extract_subvector
4443                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4444                              (iPTR 0))),
4445                       (iPTR 0))),
4446                     (iPTR 0))),
4447                   (v2i1 (extract_subvector
4448                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4449                          (iPTR 0))), GR8, sub_8bit>;
4450
4451defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4452                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4453defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4454                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4455defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4456                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4457
4458defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4459                   (v16i1 (insert_subvector
4460                           (v16i1 immAllZerosV),
4461                           (v4i1 (extract_subvector
4462                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4463                                  (iPTR 0))),
4464                           (iPTR 0))),
4465                   (v4i1 (extract_subvector
4466                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4467                          (iPTR 0))), GR8, sub_8bit>;
4468defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4469                   (v8i1
4470                    (extract_subvector
4471                     (v16i1
4472                      (insert_subvector
4473                       (v16i1 immAllZerosV),
4474                       (v2i1 (extract_subvector
4475                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4476                              (iPTR 0))),
4477                       (iPTR 0))),
4478                     (iPTR 0))),
4479                   (v2i1 (extract_subvector
4480                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4481                          (iPTR 0))), GR8, sub_8bit>;
4482
4483def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4484          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4485           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4486           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4487           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4488
4489def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4490          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4491           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4492
4493def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4494          (COPY_TO_REGCLASS
4495           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4496                                                       VK1WM:$mask, addr:$src)),
4497           FR32X)>;
4498def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4499          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4500
4501def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4502          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4503           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4504           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4505           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4506
4507def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4508          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4509           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4510
4511def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4512          (COPY_TO_REGCLASS
4513           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4514                                                       VK1WM:$mask, addr:$src)),
4515           FR64X)>;
4516def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4517          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4518
4519
4520def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4521          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4522def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4523          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4524
4525def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4526          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4527def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4528          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4529
4530let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4531  let Predicates = [HasFP16] in {
4532    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4533        (ins VR128X:$src1, VR128X:$src2),
4534        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4535        []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4536        Sched<[SchedWriteFShuffle.XMM]>;
4537
4538    let Constraints = "$src0 = $dst" in
4539    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4540        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4541         VR128X:$src1, VR128X:$src2),
4542        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4543          "$dst {${mask}}, $src1, $src2}",
4544        []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4545        Sched<[SchedWriteFShuffle.XMM]>;
4546
4547    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4548        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4549        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4550          "$dst {${mask}} {z}, $src1, $src2}",
4551        []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4552        Sched<[SchedWriteFShuffle.XMM]>;
4553  }
4554  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4555                           (ins VR128X:$src1, VR128X:$src2),
4556                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4557                           []>, XS, EVEX_4V, VEX_LIG,
4558                           Sched<[SchedWriteFShuffle.XMM]>;
4559
4560  let Constraints = "$src0 = $dst" in
4561  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4562                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4563                                                   VR128X:$src1, VR128X:$src2),
4564                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4565                                        "$dst {${mask}}, $src1, $src2}",
4566                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4567                             Sched<[SchedWriteFShuffle.XMM]>;
4568
4569  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4570                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4571                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4572                                    "$dst {${mask}} {z}, $src1, $src2}",
4573                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4574                         Sched<[SchedWriteFShuffle.XMM]>;
4575
4576  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4577                           (ins VR128X:$src1, VR128X:$src2),
4578                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4579                           []>, XD, EVEX_4V, VEX_LIG, REX_W,
4580                           Sched<[SchedWriteFShuffle.XMM]>;
4581
4582  let Constraints = "$src0 = $dst" in
4583  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4584                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4585                                                   VR128X:$src1, VR128X:$src2),
4586                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4587                                        "$dst {${mask}}, $src1, $src2}",
4588                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4589                             REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4590
4591  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4592                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4593                                                          VR128X:$src2),
4594                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4595                                         "$dst {${mask}} {z}, $src1, $src2}",
4596                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4597                              REX_W, Sched<[SchedWriteFShuffle.XMM]>;
4598}
4599
4600def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4601                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4602def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4603                             "$dst {${mask}}, $src1, $src2}",
4604                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4605                                VR128X:$src1, VR128X:$src2), 0>;
4606def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4607                             "$dst {${mask}} {z}, $src1, $src2}",
4608                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4609                                 VR128X:$src1, VR128X:$src2), 0>;
4610def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4611                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4612def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4613                             "$dst {${mask}}, $src1, $src2}",
4614                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4615                                VR128X:$src1, VR128X:$src2), 0>;
4616def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4617                             "$dst {${mask}} {z}, $src1, $src2}",
4618                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4619                                 VR128X:$src1, VR128X:$src2), 0>;
4620def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4621                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4622def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4623                             "$dst {${mask}}, $src1, $src2}",
4624                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4625                                VR128X:$src1, VR128X:$src2), 0>;
4626def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4627                             "$dst {${mask}} {z}, $src1, $src2}",
4628                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4629                                 VR128X:$src1, VR128X:$src2), 0>;
4630
4631let Predicates = [HasAVX512, OptForSize] in {
4632  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4633            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4634  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4635            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4636
4637  // Move low f32 and clear high bits.
4638  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4639            (SUBREG_TO_REG (i32 0),
4640             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4641              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4642  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4643            (SUBREG_TO_REG (i32 0),
4644             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4645              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4646
4647  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4648            (SUBREG_TO_REG (i32 0),
4649             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4650              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4651  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4652            (SUBREG_TO_REG (i32 0),
4653             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4654              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4655}
4656
4657// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4658// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4659let Predicates = [HasAVX512, OptForSpeed] in {
4660  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4661            (SUBREG_TO_REG (i32 0),
4662             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4663                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4664                          (i8 1))), sub_xmm)>;
4665  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4666            (SUBREG_TO_REG (i32 0),
4667             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4668                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4669                          (i8 3))), sub_xmm)>;
4670}
4671
4672let Predicates = [HasAVX512] in {
4673  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4674            (VMOVSSZrm addr:$src)>;
4675  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4676            (VMOVSDZrm addr:$src)>;
4677
4678  // Represent the same patterns above but in the form they appear for
4679  // 256-bit types
4680  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4681            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4682  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4683            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4684
4685  // Represent the same patterns above but in the form they appear for
4686  // 512-bit types
4687  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4688            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4689  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4690            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4691}
4692let Predicates = [HasFP16] in {
4693  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4694            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4695  def : Pat<(v8i16 (X86vzmovl (v8i16 VR128X:$src))),
4696            (VMOVSHZrr (v8i16 (AVX512_128_SET0)), VR128X:$src)>;
4697
4698  // FIXME we need better canonicalization in dag combine
4699  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4700            (SUBREG_TO_REG (i32 0),
4701             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4702              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4703  def : Pat<(v16i16 (X86vzmovl (v16i16 VR256X:$src))),
4704            (SUBREG_TO_REG (i32 0),
4705             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4706              (v8i16 (EXTRACT_SUBREG (v16i16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4707
4708  // FIXME we need better canonicalization in dag combine
4709  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4710            (SUBREG_TO_REG (i32 0),
4711             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4712              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4713  def : Pat<(v32i16 (X86vzmovl (v32i16 VR512:$src))),
4714            (SUBREG_TO_REG (i32 0),
4715             (v8i16 (VMOVSHZrr (v8i16 (AVX512_128_SET0)),
4716              (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm)))), sub_xmm)>;
4717
4718  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4719            (VMOVSHZrm addr:$src)>;
4720
4721  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4722            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4723
4724  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4725            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4726}
4727
4728let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4729def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4730                                (ins VR128X:$src),
4731                                "vmovq\t{$src, $dst|$dst, $src}",
4732                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4733                                                   (v2i64 VR128X:$src))))]>,
4734                                EVEX, REX_W;
4735}
4736
4737let Predicates = [HasAVX512] in {
4738  def : Pat<(v4i32 (scalar_to_vector (i32 (anyext GR8:$src)))),
4739            (VMOVDI2PDIZrr (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
4740                                              GR8:$src, sub_8bit)))>;
4741  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4742            (VMOVDI2PDIZrr GR32:$src)>;
4743
4744  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4745            (VMOV64toPQIZrr GR64:$src)>;
4746
4747  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4748  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4749            (VMOVDI2PDIZrm addr:$src)>;
4750  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4751            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4752  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4753            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4754  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4755            (VMOVQI2PQIZrm addr:$src)>;
4756  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4757            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4758
4759  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4760  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4761            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4762  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4763            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4764
4765  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4766            (SUBREG_TO_REG (i32 0),
4767             (v2f64 (VMOVZPQILo2PQIZrr
4768                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4769             sub_xmm)>;
4770  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4771            (SUBREG_TO_REG (i32 0),
4772             (v2i64 (VMOVZPQILo2PQIZrr
4773                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4774             sub_xmm)>;
4775
4776  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4777            (SUBREG_TO_REG (i32 0),
4778             (v2f64 (VMOVZPQILo2PQIZrr
4779                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4780             sub_xmm)>;
4781  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4782            (SUBREG_TO_REG (i32 0),
4783             (v2i64 (VMOVZPQILo2PQIZrr
4784                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4785             sub_xmm)>;
4786}
4787
4788//===----------------------------------------------------------------------===//
4789// AVX-512 - Non-temporals
4790//===----------------------------------------------------------------------===//
4791
4792def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4793                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4794                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4795                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4796
4797let Predicates = [HasVLX] in {
4798  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4799                       (ins i256mem:$src),
4800                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4801                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4802                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4803
4804  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4805                      (ins i128mem:$src),
4806                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4807                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4808                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4809}
4810
4811multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4812                        X86SchedWriteMoveLS Sched,
4813                        PatFrag st_frag = alignednontemporalstore> {
4814  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4815  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4816                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4817                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4818                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4819}
4820
4821multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4822                           AVX512VLVectorVTInfo VTInfo,
4823                           X86SchedWriteMoveLSWidths Sched> {
4824  let Predicates = [HasAVX512] in
4825    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4826
4827  let Predicates = [HasAVX512, HasVLX] in {
4828    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4829    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4830  }
4831}
4832
4833defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4834                                SchedWriteVecMoveLSNT>, PD;
4835defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4836                                SchedWriteFMoveLSNT>, PD, REX_W;
4837defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4838                                SchedWriteFMoveLSNT>, PS;
4839
4840let Predicates = [HasAVX512], AddedComplexity = 400 in {
4841  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4842            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4843  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4844            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4845  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4846            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4847
4848  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4849            (VMOVNTDQAZrm addr:$src)>;
4850  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4851            (VMOVNTDQAZrm addr:$src)>;
4852  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4853            (VMOVNTDQAZrm addr:$src)>;
4854  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4855            (VMOVNTDQAZrm addr:$src)>;
4856  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4857            (VMOVNTDQAZrm addr:$src)>;
4858  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4859            (VMOVNTDQAZrm addr:$src)>;
4860}
4861
4862let Predicates = [HasVLX], AddedComplexity = 400 in {
4863  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4864            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4865  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4866            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4867  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4868            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4869
4870  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4871            (VMOVNTDQAZ256rm addr:$src)>;
4872  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4873            (VMOVNTDQAZ256rm addr:$src)>;
4874  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4875            (VMOVNTDQAZ256rm addr:$src)>;
4876  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4877            (VMOVNTDQAZ256rm addr:$src)>;
4878  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4879            (VMOVNTDQAZ256rm addr:$src)>;
4880  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4881            (VMOVNTDQAZ256rm addr:$src)>;
4882
4883  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4884            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4885  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4886            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4887  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4888            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4889
4890  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4891            (VMOVNTDQAZ128rm addr:$src)>;
4892  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4893            (VMOVNTDQAZ128rm addr:$src)>;
4894  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4895            (VMOVNTDQAZ128rm addr:$src)>;
4896  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4897            (VMOVNTDQAZ128rm addr:$src)>;
4898  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4899            (VMOVNTDQAZ128rm addr:$src)>;
4900  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4901            (VMOVNTDQAZ128rm addr:$src)>;
4902}
4903
4904//===----------------------------------------------------------------------===//
4905// AVX-512 - Integer arithmetic
4906//
4907multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4909                           bit IsCommutable = 0> {
4910  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4911                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4912                    "$src2, $src1", "$src1, $src2",
4913                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4914                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4915                    Sched<[sched]>;
4916
4917  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4918                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4919                  "$src2, $src1", "$src1, $src2",
4920                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4921                  AVX512BIBase, EVEX_4V,
4922                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4923}
4924
4925multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4926                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4927                            bit IsCommutable = 0> :
4928           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4929  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4930                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4931                  "${src2}"#_.BroadcastStr#", $src1",
4932                  "$src1, ${src2}"#_.BroadcastStr,
4933                  (_.VT (OpNode _.RC:$src1,
4934                                (_.BroadcastLdFrag addr:$src2)))>,
4935                  AVX512BIBase, EVEX_4V, EVEX_B,
4936                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4937}
4938
4939multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4940                              AVX512VLVectorVTInfo VTInfo,
4941                              X86SchedWriteWidths sched, Predicate prd,
4942                              bit IsCommutable = 0> {
4943  let Predicates = [prd] in
4944    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4945                             IsCommutable>, EVEX_V512;
4946
4947  let Predicates = [prd, HasVLX] in {
4948    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4949                                sched.YMM, IsCommutable>, EVEX_V256;
4950    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4951                                sched.XMM, IsCommutable>, EVEX_V128;
4952  }
4953}
4954
4955multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4956                               AVX512VLVectorVTInfo VTInfo,
4957                               X86SchedWriteWidths sched, Predicate prd,
4958                               bit IsCommutable = 0> {
4959  let Predicates = [prd] in
4960    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4961                             IsCommutable>, EVEX_V512;
4962
4963  let Predicates = [prd, HasVLX] in {
4964    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4965                                 sched.YMM, IsCommutable>, EVEX_V256;
4966    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4967                                 sched.XMM, IsCommutable>, EVEX_V128;
4968  }
4969}
4970
4971multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4972                                X86SchedWriteWidths sched, Predicate prd,
4973                                bit IsCommutable = 0> {
4974  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4975                                  sched, prd, IsCommutable>,
4976                                  REX_W, EVEX_CD8<64, CD8VF>;
4977}
4978
4979multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4980                                X86SchedWriteWidths sched, Predicate prd,
4981                                bit IsCommutable = 0> {
4982  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4983                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4984}
4985
4986multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4987                                X86SchedWriteWidths sched, Predicate prd,
4988                                bit IsCommutable = 0> {
4989  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4990                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4991                                 WIG;
4992}
4993
4994multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4995                                X86SchedWriteWidths sched, Predicate prd,
4996                                bit IsCommutable = 0> {
4997  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4998                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4999                                 WIG;
5000}
5001
5002multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5003                                 SDNode OpNode, X86SchedWriteWidths sched,
5004                                 Predicate prd, bit IsCommutable = 0> {
5005  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
5006                                   IsCommutable>;
5007
5008  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
5009                                   IsCommutable>;
5010}
5011
5012multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
5013                                 SDNode OpNode, X86SchedWriteWidths sched,
5014                                 Predicate prd, bit IsCommutable = 0> {
5015  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
5016                                   IsCommutable>;
5017
5018  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
5019                                   IsCommutable>;
5020}
5021
5022multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
5023                                  bits<8> opc_d, bits<8> opc_q,
5024                                  string OpcodeStr, SDNode OpNode,
5025                                  X86SchedWriteWidths sched,
5026                                  bit IsCommutable = 0> {
5027  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
5028                                    sched, HasAVX512, IsCommutable>,
5029              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
5030                                    sched, HasBWI, IsCommutable>;
5031}
5032
5033multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
5034                            X86FoldableSchedWrite sched,
5035                            SDNode OpNode,X86VectorVTInfo _Src,
5036                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
5037                            bit IsCommutable = 0> {
5038  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5039                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5040                            "$src2, $src1","$src1, $src2",
5041                            (_Dst.VT (OpNode
5042                                         (_Src.VT _Src.RC:$src1),
5043                                         (_Src.VT _Src.RC:$src2))),
5044                            IsCommutable>,
5045                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
5046  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5047                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5048                        "$src2, $src1", "$src1, $src2",
5049                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5050                                      (_Src.LdFrag addr:$src2)))>,
5051                        AVX512BIBase, EVEX_4V,
5052                        Sched<[sched.Folded, sched.ReadAfterFold]>;
5053
5054  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5055                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5056                    OpcodeStr,
5057                    "${src2}"#_Brdct.BroadcastStr#", $src1",
5058                     "$src1, ${src2}"#_Brdct.BroadcastStr,
5059                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5060                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5061                    AVX512BIBase, EVEX_4V, EVEX_B,
5062                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5063}
5064
5065defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5066                                    SchedWriteVecALU, 1>;
5067defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5068                                    SchedWriteVecALU, 0>;
5069defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5070                                    SchedWriteVecALU, HasBWI, 1>;
5071defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5072                                    SchedWriteVecALU, HasBWI, 0>;
5073defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5074                                     SchedWriteVecALU, HasBWI, 1>;
5075defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5076                                     SchedWriteVecALU, HasBWI, 0>;
5077defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5078                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
5079defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5080                                    SchedWriteVecIMul, HasBWI, 1>;
5081defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5082                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
5083                                    NotEVEX2VEXConvertible;
5084defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5085                                    HasBWI, 1>;
5086defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5087                                     HasBWI, 1>;
5088defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5089                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
5090defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", avgceilu,
5091                                   SchedWriteVecALU, HasBWI, 1>;
5092defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5093                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5094defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5095                                     SchedWriteVecIMul, HasAVX512, 1>;
5096
5097multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5098                            X86SchedWriteWidths sched,
5099                            AVX512VLVectorVTInfo _SrcVTInfo,
5100                            AVX512VLVectorVTInfo _DstVTInfo,
5101                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5102  let Predicates = [prd] in
5103    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5104                                 _SrcVTInfo.info512, _DstVTInfo.info512,
5105                                 v8i64_info, IsCommutable>,
5106                                  EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
5107  let Predicates = [HasVLX, prd] in {
5108    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5109                                      _SrcVTInfo.info256, _DstVTInfo.info256,
5110                                      v4i64x_info, IsCommutable>,
5111                                      EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
5112    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5113                                      _SrcVTInfo.info128, _DstVTInfo.info128,
5114                                      v2i64x_info, IsCommutable>,
5115                                     EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
5116  }
5117}
5118
5119defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5120                                avx512vl_i8_info, avx512vl_i8_info,
5121                                X86multishift, HasVBMI, 0>, T8PD;
5122
5123multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5124                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5125                            X86FoldableSchedWrite sched> {
5126  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5127                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5128                    OpcodeStr,
5129                    "${src2}"#_Src.BroadcastStr#", $src1",
5130                     "$src1, ${src2}"#_Src.BroadcastStr,
5131                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5132                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5133                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5134                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5135}
5136
5137multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5138                            SDNode OpNode,X86VectorVTInfo _Src,
5139                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5140                            bit IsCommutable = 0> {
5141  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5142                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5143                            "$src2, $src1","$src1, $src2",
5144                            (_Dst.VT (OpNode
5145                                         (_Src.VT _Src.RC:$src1),
5146                                         (_Src.VT _Src.RC:$src2))),
5147                            IsCommutable, IsCommutable>,
5148                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5149  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5150                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5151                        "$src2, $src1", "$src1, $src2",
5152                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5153                                      (_Src.LdFrag addr:$src2)))>,
5154                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5155                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5156}
5157
5158multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5159                                    SDNode OpNode> {
5160  let Predicates = [HasBWI] in
5161  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5162                                 v32i16_info, SchedWriteShuffle.ZMM>,
5163                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5164                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5165  let Predicates = [HasBWI, HasVLX] in {
5166    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5167                                     v16i16x_info, SchedWriteShuffle.YMM>,
5168                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5169                                      v16i16x_info, SchedWriteShuffle.YMM>,
5170                                      EVEX_V256;
5171    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5172                                     v8i16x_info, SchedWriteShuffle.XMM>,
5173                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5174                                      v8i16x_info, SchedWriteShuffle.XMM>,
5175                                      EVEX_V128;
5176  }
5177}
5178multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5179                            SDNode OpNode> {
5180  let Predicates = [HasBWI] in
5181  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5182                                SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
5183  let Predicates = [HasBWI, HasVLX] in {
5184    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5185                                     v32i8x_info, SchedWriteShuffle.YMM>,
5186                                     EVEX_V256, WIG;
5187    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5188                                     v16i8x_info, SchedWriteShuffle.XMM>,
5189                                     EVEX_V128, WIG;
5190  }
5191}
5192
5193multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5194                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
5195                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5196  let Predicates = [HasBWI] in
5197  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5198                                _Dst.info512, SchedWriteVecIMul.ZMM,
5199                                IsCommutable>, EVEX_V512;
5200  let Predicates = [HasBWI, HasVLX] in {
5201    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5202                                     _Dst.info256, SchedWriteVecIMul.YMM,
5203                                     IsCommutable>, EVEX_V256;
5204    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5205                                     _Dst.info128, SchedWriteVecIMul.XMM,
5206                                     IsCommutable>, EVEX_V128;
5207  }
5208}
5209
5210defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5211defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5212defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5213defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5214
5215defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5216                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG;
5217defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5218                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
5219
5220defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5221                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5222defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5223                                    SchedWriteVecALU, HasBWI, 1>;
5224defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5225                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5226defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5227                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5228                                    NotEVEX2VEXConvertible;
5229
5230defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5231                                    SchedWriteVecALU, HasBWI, 1>;
5232defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5233                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5234defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5235                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5236defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5237                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5238                                    NotEVEX2VEXConvertible;
5239
5240defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5241                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5242defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5243                                    SchedWriteVecALU, HasBWI, 1>;
5244defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5245                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5246defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5247                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5248                                    NotEVEX2VEXConvertible;
5249
5250defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5251                                    SchedWriteVecALU, HasBWI, 1>;
5252defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5253                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5254defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5255                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5256defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5257                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5258                                    NotEVEX2VEXConvertible;
5259
5260// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5261let Predicates = [HasDQI, NoVLX] in {
5262  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5263            (EXTRACT_SUBREG
5264                (VPMULLQZrr
5265                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5266                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5267             sub_ymm)>;
5268  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5269            (EXTRACT_SUBREG
5270                (VPMULLQZrmb
5271                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5272                    addr:$src2),
5273             sub_ymm)>;
5274
5275  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5276            (EXTRACT_SUBREG
5277                (VPMULLQZrr
5278                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5279                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5280             sub_xmm)>;
5281  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5282            (EXTRACT_SUBREG
5283                (VPMULLQZrmb
5284                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5285                    addr:$src2),
5286             sub_xmm)>;
5287}
5288
5289multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5290  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5291            (EXTRACT_SUBREG
5292                (!cast<Instruction>(Instr#"rr")
5293                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5294                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5295             sub_ymm)>;
5296  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5297            (EXTRACT_SUBREG
5298                (!cast<Instruction>(Instr#"rmb")
5299                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5300                    addr:$src2),
5301             sub_ymm)>;
5302
5303  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5304            (EXTRACT_SUBREG
5305                (!cast<Instruction>(Instr#"rr")
5306                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5307                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5308             sub_xmm)>;
5309  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5310            (EXTRACT_SUBREG
5311                (!cast<Instruction>(Instr#"rmb")
5312                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5313                    addr:$src2),
5314             sub_xmm)>;
5315}
5316
5317let Predicates = [HasAVX512, NoVLX] in {
5318  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5319  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5320  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5321  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5322}
5323
5324//===----------------------------------------------------------------------===//
5325// AVX-512  Logical Instructions
5326//===----------------------------------------------------------------------===//
5327
5328defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5329                                   SchedWriteVecLogic, HasAVX512, 1>;
5330defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5331                                  SchedWriteVecLogic, HasAVX512, 1>;
5332defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5333                                   SchedWriteVecLogic, HasAVX512, 1>;
5334defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5335                                    SchedWriteVecLogic, HasAVX512>;
5336
5337let Predicates = [HasVLX] in {
5338  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5339            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5340  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5341            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5342
5343  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5344            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5345  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5346            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5347
5348  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5349            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5350  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5351            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5352
5353  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5354            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5355  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5356            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5357
5358  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5359            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5360  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5361            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5362
5363  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5364            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5365  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5366            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5367
5368  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5369            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5370  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5371            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5372
5373  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5374            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5375  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5376            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5377
5378  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5379            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5380  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5381            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5382
5383  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5384            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5385  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5386            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5387
5388  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5389            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5390  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5391            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5392
5393  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5394            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5395  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5396            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5397
5398  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5399            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5400  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5401            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5402
5403  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5404            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5405  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5406            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5407
5408  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5409            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5410  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5411            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5412
5413  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5414            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5415  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5416            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5417}
5418
5419let Predicates = [HasAVX512] in {
5420  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5421            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5422  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5423            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5424
5425  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5426            (VPORQZrr VR512:$src1, VR512:$src2)>;
5427  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5428            (VPORQZrr VR512:$src1, VR512:$src2)>;
5429
5430  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5431            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5432  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5433            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5434
5435  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5436            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5437  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5438            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5439
5440  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5441            (VPANDQZrm VR512:$src1, addr:$src2)>;
5442  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5443            (VPANDQZrm VR512:$src1, addr:$src2)>;
5444
5445  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5446            (VPORQZrm VR512:$src1, addr:$src2)>;
5447  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5448            (VPORQZrm VR512:$src1, addr:$src2)>;
5449
5450  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5451            (VPXORQZrm VR512:$src1, addr:$src2)>;
5452  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5453            (VPXORQZrm VR512:$src1, addr:$src2)>;
5454
5455  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5456            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5457  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5458            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5459}
5460
5461// Patterns to catch vselect with different type than logic op.
5462multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5463                                    X86VectorVTInfo _,
5464                                    X86VectorVTInfo IntInfo> {
5465  // Masked register-register logical operations.
5466  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5467                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5468                   _.RC:$src0)),
5469            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5470             _.RC:$src1, _.RC:$src2)>;
5471
5472  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5473                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5474                   _.ImmAllZerosV)),
5475            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5476             _.RC:$src2)>;
5477
5478  // Masked register-memory logical operations.
5479  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5480                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5481                                            (load addr:$src2)))),
5482                   _.RC:$src0)),
5483            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5484             _.RC:$src1, addr:$src2)>;
5485  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5486                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5487                                            (load addr:$src2)))),
5488                   _.ImmAllZerosV)),
5489            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5490             addr:$src2)>;
5491}
5492
5493multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5494                                         X86VectorVTInfo _,
5495                                         X86VectorVTInfo IntInfo> {
5496  // Register-broadcast logical operations.
5497  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5498                   (bitconvert
5499                    (IntInfo.VT (OpNode _.RC:$src1,
5500                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5501                   _.RC:$src0)),
5502            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5503             _.RC:$src1, addr:$src2)>;
5504  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5505                   (bitconvert
5506                    (IntInfo.VT (OpNode _.RC:$src1,
5507                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5508                   _.ImmAllZerosV)),
5509            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5510             _.RC:$src1, addr:$src2)>;
5511}
5512
5513multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5514                                         AVX512VLVectorVTInfo SelectInfo,
5515                                         AVX512VLVectorVTInfo IntInfo> {
5516let Predicates = [HasVLX] in {
5517  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5518                                 IntInfo.info128>;
5519  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5520                                 IntInfo.info256>;
5521}
5522let Predicates = [HasAVX512] in {
5523  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5524                                 IntInfo.info512>;
5525}
5526}
5527
5528multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5529                                               AVX512VLVectorVTInfo SelectInfo,
5530                                               AVX512VLVectorVTInfo IntInfo> {
5531let Predicates = [HasVLX] in {
5532  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5533                                       SelectInfo.info128, IntInfo.info128>;
5534  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5535                                       SelectInfo.info256, IntInfo.info256>;
5536}
5537let Predicates = [HasAVX512] in {
5538  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5539                                       SelectInfo.info512, IntInfo.info512>;
5540}
5541}
5542
5543multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5544  // i64 vselect with i32/i16/i8 logic op
5545  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5546                                       avx512vl_i32_info>;
5547  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5548                                       avx512vl_i16_info>;
5549  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5550                                       avx512vl_i8_info>;
5551
5552  // i32 vselect with i64/i16/i8 logic op
5553  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5554                                       avx512vl_i64_info>;
5555  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5556                                       avx512vl_i16_info>;
5557  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5558                                       avx512vl_i8_info>;
5559
5560  // f32 vselect with i64/i32/i16/i8 logic op
5561  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5562                                       avx512vl_i64_info>;
5563  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5564                                       avx512vl_i32_info>;
5565  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5566                                       avx512vl_i16_info>;
5567  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5568                                       avx512vl_i8_info>;
5569
5570  // f64 vselect with i64/i32/i16/i8 logic op
5571  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5572                                       avx512vl_i64_info>;
5573  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5574                                       avx512vl_i32_info>;
5575  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5576                                       avx512vl_i16_info>;
5577  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5578                                       avx512vl_i8_info>;
5579
5580  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5581                                             avx512vl_f32_info,
5582                                             avx512vl_i32_info>;
5583  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5584                                             avx512vl_f64_info,
5585                                             avx512vl_i64_info>;
5586}
5587
5588defm : avx512_logical_lowering_types<"VPAND", and>;
5589defm : avx512_logical_lowering_types<"VPOR",  or>;
5590defm : avx512_logical_lowering_types<"VPXOR", xor>;
5591defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5592
5593//===----------------------------------------------------------------------===//
5594// AVX-512  FP arithmetic
5595//===----------------------------------------------------------------------===//
5596
5597multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5598                            SDPatternOperator OpNode, SDNode VecNode,
5599                            X86FoldableSchedWrite sched, bit IsCommutable> {
5600  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5601  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5602                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5603                           "$src2, $src1", "$src1, $src2",
5604                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5605                           Sched<[sched]>;
5606
5607  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5608                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5609                         "$src2, $src1", "$src1, $src2",
5610                         (_.VT (VecNode _.RC:$src1,
5611                                        (_.ScalarIntMemFrags addr:$src2)))>,
5612                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5613  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5614  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5615                         (ins _.FRC:$src1, _.FRC:$src2),
5616                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5617                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5618                          Sched<[sched]> {
5619    let isCommutable = IsCommutable;
5620  }
5621  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5622                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5623                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5624                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5625                         (_.ScalarLdFrag addr:$src2)))]>,
5626                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5627  }
5628  }
5629}
5630
5631multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5632                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5633  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5634  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5635                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5636                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5637                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5638                          (i32 timm:$rc))>,
5639                          EVEX_B, EVEX_RC, Sched<[sched]>;
5640}
5641multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5642                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5643                                X86FoldableSchedWrite sched, bit IsCommutable,
5644                                string EVEX2VexOvrd> {
5645  let ExeDomain = _.ExeDomain in {
5646  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5647                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5648                           "$src2, $src1", "$src1, $src2",
5649                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5650                           Sched<[sched]>, SIMD_EXC;
5651
5652  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5653                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5654                         "$src2, $src1", "$src1, $src2",
5655                         (_.VT (VecNode _.RC:$src1,
5656                                        (_.ScalarIntMemFrags addr:$src2)))>,
5657                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5658
5659  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5660      Uses = [MXCSR], mayRaiseFPException = 1 in {
5661  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5662                         (ins _.FRC:$src1, _.FRC:$src2),
5663                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5664                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5665                          Sched<[sched]>,
5666                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5667    let isCommutable = IsCommutable;
5668  }
5669  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5670                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5671                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5672                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5673                         (_.ScalarLdFrag addr:$src2)))]>,
5674                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5675                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5676  }
5677
5678  let Uses = [MXCSR] in
5679  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5680                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5681                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5682                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5683                            EVEX_B, Sched<[sched]>;
5684  }
5685}
5686
5687multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5688                                SDNode VecNode, SDNode RndNode,
5689                                X86SchedWriteSizes sched, bit IsCommutable> {
5690  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5691                              sched.PS.Scl, IsCommutable>,
5692             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5693                              sched.PS.Scl>,
5694                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5695  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5696                              sched.PD.Scl, IsCommutable>,
5697             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5698                              sched.PD.Scl>,
5699                              XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5700  let Predicates = [HasFP16] in
5701    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5702                                VecNode, sched.PH.Scl, IsCommutable>,
5703               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5704                                sched.PH.Scl>,
5705                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5706}
5707
5708multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5709                              SDNode VecNode, SDNode SaeNode,
5710                              X86SchedWriteSizes sched, bit IsCommutable> {
5711  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5712                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5713                              NAME#"SS">,
5714                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5715  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5716                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5717                              NAME#"SD">,
5718                              XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5719  let Predicates = [HasFP16] in {
5720    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5721                                VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5722                                NAME#"SH">,
5723                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5724                                NotEVEX2VEXConvertible;
5725  }
5726}
5727defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5728                                 SchedWriteFAddSizes, 1>;
5729defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5730                                 SchedWriteFMulSizes, 1>;
5731defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5732                                 SchedWriteFAddSizes, 0>;
5733defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5734                                 SchedWriteFDivSizes, 0>;
5735defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5736                               SchedWriteFCmpSizes, 0>;
5737defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5738                               SchedWriteFCmpSizes, 0>;
5739
5740// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5741// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5742multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5743                                    X86VectorVTInfo _, SDNode OpNode,
5744                                    X86FoldableSchedWrite sched,
5745                                    string EVEX2VEXOvrd> {
5746  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5747  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5748                         (ins _.FRC:$src1, _.FRC:$src2),
5749                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5750                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5751                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5752    let isCommutable = 1;
5753  }
5754  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5755                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5756                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5757                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5758                         (_.ScalarLdFrag addr:$src2)))]>,
5759                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5760                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5761  }
5762}
5763defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5764                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5765                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5766
5767defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5768                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5769                                         REX_W, EVEX_4V, VEX_LIG,
5770                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5771
5772defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5773                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5774                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5775
5776defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5777                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5778                                         REX_W, EVEX_4V, VEX_LIG,
5779                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5780
5781defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5782                                         SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5783                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5784                                         NotEVEX2VEXConvertible;
5785defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5786                                         SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5787                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5788                                         NotEVEX2VEXConvertible;
5789
5790multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5791                            SDPatternOperator MaskOpNode,
5792                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5793                            bit IsCommutable,
5794                            bit IsKCommutable = IsCommutable,
5795                            string suffix = _.Suffix,
5796                            string ClobberConstraint = "",
5797                            bit MayRaiseFPException = 1> {
5798  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5799      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5800  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5802                                 "$src2, $src1", "$src1, $src2",
5803                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5804                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5805                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5806  let mayLoad = 1 in {
5807    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5808                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5809                                   "$src2, $src1", "$src1, $src2",
5810                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5811                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5812                                   ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5813    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5814                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5815                                    "${src2}"#_.BroadcastStr#", $src1",
5816                                    "$src1, ${src2}"#_.BroadcastStr,
5817                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5818                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5819                                    ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5820    }
5821  }
5822}
5823
5824multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5825                                  SDPatternOperator OpNodeRnd,
5826                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5827                                  string suffix = _.Suffix,
5828                                  string ClobberConstraint = ""> {
5829  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5830  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5831                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5832                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5833                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5834                  0, 0, 0, vselect_mask, ClobberConstraint>,
5835                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5836}
5837
5838multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5839                                SDPatternOperator OpNodeSAE,
5840                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5841  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5842  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5843                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5844                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5845                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5846                  EVEX_4V, EVEX_B, Sched<[sched]>;
5847}
5848
5849multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5850                             SDPatternOperator MaskOpNode,
5851                             Predicate prd, X86SchedWriteSizes sched,
5852                             bit IsCommutable = 0,
5853                             bit IsPD128Commutable = IsCommutable> {
5854  let Predicates = [prd] in {
5855  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5856                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5857                              EVEX_CD8<32, CD8VF>;
5858  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5859                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W,
5860                              EVEX_CD8<64, CD8VF>;
5861  }
5862
5863    // Define only if AVX512VL feature is present.
5864  let Predicates = [prd, HasVLX] in {
5865    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5866                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5867                                   EVEX_CD8<32, CD8VF>;
5868    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5869                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5870                                   EVEX_CD8<32, CD8VF>;
5871    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5872                                   sched.PD.XMM, IsPD128Commutable,
5873                                   IsCommutable>, EVEX_V128, PD, REX_W,
5874                                   EVEX_CD8<64, CD8VF>;
5875    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5876                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W,
5877                                   EVEX_CD8<64, CD8VF>;
5878  }
5879}
5880
5881multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5882                              SDPatternOperator MaskOpNode,
5883                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5884  let Predicates = [HasFP16] in {
5885    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5886                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5887                                EVEX_CD8<16, CD8VF>;
5888  }
5889  let Predicates = [HasVLX, HasFP16] in {
5890    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5891                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5892                                   EVEX_CD8<16, CD8VF>;
5893    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5894                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5895                                   EVEX_CD8<16, CD8VF>;
5896  }
5897}
5898
5899let Uses = [MXCSR] in
5900multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5901                                   X86SchedWriteSizes sched> {
5902  let Predicates = [HasFP16] in {
5903    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5904                                      v32f16_info>,
5905                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5906  }
5907  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5908                                    v16f32_info>,
5909                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5910  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5911                                    v8f64_info>,
5912                                    EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5913}
5914
5915let Uses = [MXCSR] in
5916multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5917                                 X86SchedWriteSizes sched> {
5918  let Predicates = [HasFP16] in {
5919    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5920                                    v32f16_info>,
5921                                    EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5922  }
5923  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5924                                  v16f32_info>,
5925                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5926  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5927                                  v8f64_info>,
5928                                  EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
5929}
5930
5931defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5932                              SchedWriteFAddSizes, 1>,
5933            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5934            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5935defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5936                              SchedWriteFMulSizes, 1>,
5937            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5938            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5939defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5940                              SchedWriteFAddSizes>,
5941            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5942            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5943defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5944                              SchedWriteFDivSizes>,
5945            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5946            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5947defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5948                              SchedWriteFCmpSizes, 0>,
5949            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5950            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5951defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5952                              SchedWriteFCmpSizes, 0>,
5953            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5954            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5955let isCodeGenOnly = 1 in {
5956  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5957                                 SchedWriteFCmpSizes, 1>,
5958               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5959                                 SchedWriteFCmpSizes, 1>;
5960  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5961                                 SchedWriteFCmpSizes, 1>,
5962               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5963                                 SchedWriteFCmpSizes, 1>;
5964}
5965let Uses = []<Register>, mayRaiseFPException = 0 in {
5966defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5967                               SchedWriteFLogicSizes, 1>;
5968defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5969                               SchedWriteFLogicSizes, 0>;
5970defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5971                               SchedWriteFLogicSizes, 1>;
5972defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5973                               SchedWriteFLogicSizes, 1>;
5974}
5975
5976multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5977                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5978  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5979  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5980                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5981                  "$src2, $src1", "$src1, $src2",
5982                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5983                  EVEX_4V, Sched<[sched]>;
5984  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5985                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5986                  "$src2, $src1", "$src1, $src2",
5987                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5988                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5989  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5990                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5991                   "${src2}"#_.BroadcastStr#", $src1",
5992                   "$src1, ${src2}"#_.BroadcastStr,
5993                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5994                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5995  }
5996}
5997
5998multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5999                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6000  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
6001  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6002                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
6003                  "$src2, $src1", "$src1, $src2",
6004                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
6005                  Sched<[sched]>;
6006  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6007                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
6008                  "$src2, $src1", "$src1, $src2",
6009                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
6010                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6011  }
6012}
6013
6014multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
6015                                X86SchedWriteWidths sched> {
6016  let Predicates = [HasFP16] in {
6017    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
6018               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
6019                                EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
6020    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
6021               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
6022                             EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
6023  }
6024  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
6025             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
6026                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
6027  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
6028             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
6029                              EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6030  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
6031             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
6032                                    X86scalefsRnd, sched.Scl>,
6033                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
6034  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
6035             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
6036                                    X86scalefsRnd, sched.Scl>,
6037                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD;
6038
6039  // Define only if AVX512VL feature is present.
6040  let Predicates = [HasVLX] in {
6041    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
6042                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
6043    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
6044                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
6045    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
6046                                   EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6047    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6048                                   EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
6049  }
6050
6051  let Predicates = [HasFP16, HasVLX] in {
6052    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6053                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6054    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6055                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6056  }
6057}
6058defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6059                                    SchedWriteFAdd>, NotEVEX2VEXConvertible;
6060
6061//===----------------------------------------------------------------------===//
6062// AVX-512  VPTESTM instructions
6063//===----------------------------------------------------------------------===//
6064
6065multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6066                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6067  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6068  // There are just too many permutations due to commutability and bitcasts.
6069  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6070  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6071                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6072                      "$src2, $src1", "$src1, $src2",
6073                   (null_frag), (null_frag), 1>,
6074                   EVEX_4V, Sched<[sched]>;
6075  let mayLoad = 1 in
6076  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6077                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6078                       "$src2, $src1", "$src1, $src2",
6079                   (null_frag), (null_frag)>,
6080                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6081                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6082  }
6083}
6084
6085multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6086                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6087  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6088  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6089                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6090                    "${src2}"#_.BroadcastStr#", $src1",
6091                    "$src1, ${src2}"#_.BroadcastStr,
6092                    (null_frag), (null_frag)>,
6093                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6094                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6095}
6096
6097multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6098                                  X86SchedWriteWidths sched,
6099                                  AVX512VLVectorVTInfo _> {
6100  let Predicates  = [HasAVX512] in
6101  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6102           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6103
6104  let Predicates = [HasAVX512, HasVLX] in {
6105  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6106              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6107  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6108              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6109  }
6110}
6111
6112multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6113                            X86SchedWriteWidths sched> {
6114  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6115                                 avx512vl_i32_info>;
6116  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6117                                 avx512vl_i64_info>, REX_W;
6118}
6119
6120multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6121                            X86SchedWriteWidths sched> {
6122  let Predicates = [HasBWI] in {
6123  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6124                            v32i16_info>, EVEX_V512, REX_W;
6125  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6126                            v64i8_info>, EVEX_V512;
6127  }
6128
6129  let Predicates = [HasVLX, HasBWI] in {
6130  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6131                            v16i16x_info>, EVEX_V256, REX_W;
6132  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6133                            v8i16x_info>, EVEX_V128, REX_W;
6134  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6135                            v32i8x_info>, EVEX_V256;
6136  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6137                            v16i8x_info>, EVEX_V128;
6138  }
6139}
6140
6141multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6142                                   X86SchedWriteWidths sched> :
6143  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6144  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6145
6146defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6147                                         SchedWriteVecLogic>, T8PD;
6148defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6149                                         SchedWriteVecLogic>, T8XS;
6150
6151//===----------------------------------------------------------------------===//
6152// AVX-512  Shift instructions
6153//===----------------------------------------------------------------------===//
6154
6155multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6156                            string OpcodeStr, SDNode OpNode,
6157                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6158  let ExeDomain = _.ExeDomain in {
6159  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6160                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6161                      "$src2, $src1", "$src1, $src2",
6162                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6163                   Sched<[sched]>;
6164  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6165                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6166                       "$src2, $src1", "$src1, $src2",
6167                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6168                          (i8 timm:$src2)))>,
6169                   Sched<[sched.Folded]>;
6170  }
6171}
6172
6173multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6174                             string OpcodeStr, SDNode OpNode,
6175                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6176  let ExeDomain = _.ExeDomain in
6177  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6178                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6179      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6180     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6181     EVEX_B, Sched<[sched.Folded]>;
6182}
6183
6184multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6185                            X86FoldableSchedWrite sched, ValueType SrcVT,
6186                            X86VectorVTInfo _> {
6187   // src2 is always 128-bit
6188  let ExeDomain = _.ExeDomain in {
6189  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6190                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6191                      "$src2, $src1", "$src1, $src2",
6192                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6193                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
6194  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6195                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6196                       "$src2, $src1", "$src1, $src2",
6197                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6198                   AVX512BIBase,
6199                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6200  }
6201}
6202
6203multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6204                              X86SchedWriteWidths sched, ValueType SrcVT,
6205                              AVX512VLVectorVTInfo VTInfo,
6206                              Predicate prd> {
6207  let Predicates = [prd] in
6208  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6209                               VTInfo.info512>, EVEX_V512,
6210                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6211  let Predicates = [prd, HasVLX] in {
6212  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6213                               VTInfo.info256>, EVEX_V256,
6214                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6215  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6216                               VTInfo.info128>, EVEX_V128,
6217                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6218  }
6219}
6220
6221multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6222                              string OpcodeStr, SDNode OpNode,
6223                              X86SchedWriteWidths sched,
6224                              bit NotEVEX2VEXConvertibleQ = 0> {
6225  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6226                              avx512vl_i32_info, HasAVX512>;
6227  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6228  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6229                              avx512vl_i64_info, HasAVX512>, REX_W;
6230  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6231                              avx512vl_i16_info, HasBWI>;
6232}
6233
6234multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6235                                  string OpcodeStr, SDNode OpNode,
6236                                  X86SchedWriteWidths sched,
6237                                  AVX512VLVectorVTInfo VTInfo> {
6238  let Predicates = [HasAVX512] in
6239  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6240                              sched.ZMM, VTInfo.info512>,
6241             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6242                               VTInfo.info512>, EVEX_V512;
6243  let Predicates = [HasAVX512, HasVLX] in {
6244  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6245                              sched.YMM, VTInfo.info256>,
6246             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6247                               VTInfo.info256>, EVEX_V256;
6248  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6249                              sched.XMM, VTInfo.info128>,
6250             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6251                               VTInfo.info128>, EVEX_V128;
6252  }
6253}
6254
6255multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6256                              string OpcodeStr, SDNode OpNode,
6257                              X86SchedWriteWidths sched> {
6258  let Predicates = [HasBWI] in
6259  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6260                               sched.ZMM, v32i16_info>, EVEX_V512, WIG;
6261  let Predicates = [HasVLX, HasBWI] in {
6262  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6263                               sched.YMM, v16i16x_info>, EVEX_V256, WIG;
6264  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6265                               sched.XMM, v8i16x_info>, EVEX_V128, WIG;
6266  }
6267}
6268
6269multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6270                               Format ImmFormR, Format ImmFormM,
6271                               string OpcodeStr, SDNode OpNode,
6272                               X86SchedWriteWidths sched,
6273                               bit NotEVEX2VEXConvertibleQ = 0> {
6274  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6275                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6276  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6277  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6278                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
6279}
6280
6281defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6282                                 SchedWriteVecShiftImm>,
6283             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6284                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6285
6286defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6287                                 SchedWriteVecShiftImm>,
6288             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6289                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6290
6291defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6292                                 SchedWriteVecShiftImm, 1>,
6293             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6294                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6295
6296defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6297                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6298defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6299                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6300
6301defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6302                                SchedWriteVecShift>;
6303defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6304                                SchedWriteVecShift, 1>;
6305defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6306                                SchedWriteVecShift>;
6307
6308// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6309let Predicates = [HasAVX512, NoVLX] in {
6310  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6311            (EXTRACT_SUBREG (v8i64
6312              (VPSRAQZrr
6313                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6314                 VR128X:$src2)), sub_ymm)>;
6315
6316  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6317            (EXTRACT_SUBREG (v8i64
6318              (VPSRAQZrr
6319                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6320                 VR128X:$src2)), sub_xmm)>;
6321
6322  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6323            (EXTRACT_SUBREG (v8i64
6324              (VPSRAQZri
6325                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6326                 timm:$src2)), sub_ymm)>;
6327
6328  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6329            (EXTRACT_SUBREG (v8i64
6330              (VPSRAQZri
6331                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6332                 timm:$src2)), sub_xmm)>;
6333}
6334
6335//===-------------------------------------------------------------------===//
6336// Variable Bit Shifts
6337//===-------------------------------------------------------------------===//
6338
6339multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6340                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6341  let ExeDomain = _.ExeDomain in {
6342  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6343                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6344                      "$src2, $src1", "$src1, $src2",
6345                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6346                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
6347  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6348                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6349                       "$src2, $src1", "$src1, $src2",
6350                   (_.VT (OpNode _.RC:$src1,
6351                   (_.VT (_.LdFrag addr:$src2))))>,
6352                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6353                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6354  }
6355}
6356
6357multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6358                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6359  let ExeDomain = _.ExeDomain in
6360  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6361                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6362                    "${src2}"#_.BroadcastStr#", $src1",
6363                    "$src1, ${src2}"#_.BroadcastStr,
6364                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6365                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6366                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6367}
6368
6369multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6370                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6371  let Predicates  = [HasAVX512] in
6372  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6373           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6374
6375  let Predicates = [HasAVX512, HasVLX] in {
6376  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6377              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6378  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6379              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6380  }
6381}
6382
6383multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6384                                  SDNode OpNode, X86SchedWriteWidths sched> {
6385  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6386                                 avx512vl_i32_info>;
6387  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6388                                 avx512vl_i64_info>, REX_W;
6389}
6390
6391// Use 512bit version to implement 128/256 bit in case NoVLX.
6392multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6393                                     SDNode OpNode, list<Predicate> p> {
6394  let Predicates = p in {
6395  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6396                                  (_.info256.VT _.info256.RC:$src2))),
6397            (EXTRACT_SUBREG
6398                (!cast<Instruction>(OpcodeStr#"Zrr")
6399                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6400                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6401             sub_ymm)>;
6402
6403  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6404                                  (_.info128.VT _.info128.RC:$src2))),
6405            (EXTRACT_SUBREG
6406                (!cast<Instruction>(OpcodeStr#"Zrr")
6407                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6408                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6409             sub_xmm)>;
6410  }
6411}
6412multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6413                              SDNode OpNode, X86SchedWriteWidths sched> {
6414  let Predicates = [HasBWI] in
6415  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6416              EVEX_V512, REX_W;
6417  let Predicates = [HasVLX, HasBWI] in {
6418
6419  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6420              EVEX_V256, REX_W;
6421  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6422              EVEX_V128, REX_W;
6423  }
6424}
6425
6426defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6427              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6428
6429defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6430              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6431
6432defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6433              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6434
6435defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6436defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6437
6438defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6439defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6440defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6441defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6442
6443
6444// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6445let Predicates = [HasAVX512, NoVLX] in {
6446  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6447            (EXTRACT_SUBREG (v8i64
6448              (VPROLVQZrr
6449                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6450                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6451                       sub_xmm)>;
6452  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6453            (EXTRACT_SUBREG (v8i64
6454              (VPROLVQZrr
6455                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6456                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6457                       sub_ymm)>;
6458
6459  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6460            (EXTRACT_SUBREG (v16i32
6461              (VPROLVDZrr
6462                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6463                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6464                        sub_xmm)>;
6465  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6466            (EXTRACT_SUBREG (v16i32
6467              (VPROLVDZrr
6468                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6469                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6470                        sub_ymm)>;
6471
6472  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6473            (EXTRACT_SUBREG (v8i64
6474              (VPROLQZri
6475                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6476                        timm:$src2)), sub_xmm)>;
6477  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6478            (EXTRACT_SUBREG (v8i64
6479              (VPROLQZri
6480                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6481                       timm:$src2)), sub_ymm)>;
6482
6483  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6484            (EXTRACT_SUBREG (v16i32
6485              (VPROLDZri
6486                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6487                        timm:$src2)), sub_xmm)>;
6488  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6489            (EXTRACT_SUBREG (v16i32
6490              (VPROLDZri
6491                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6492                        timm:$src2)), sub_ymm)>;
6493}
6494
6495// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6496let Predicates = [HasAVX512, NoVLX] in {
6497  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6498            (EXTRACT_SUBREG (v8i64
6499              (VPRORVQZrr
6500                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6501                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6502                       sub_xmm)>;
6503  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6504            (EXTRACT_SUBREG (v8i64
6505              (VPRORVQZrr
6506                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6507                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6508                       sub_ymm)>;
6509
6510  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6511            (EXTRACT_SUBREG (v16i32
6512              (VPRORVDZrr
6513                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6514                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6515                        sub_xmm)>;
6516  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6517            (EXTRACT_SUBREG (v16i32
6518              (VPRORVDZrr
6519                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6520                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6521                        sub_ymm)>;
6522
6523  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6524            (EXTRACT_SUBREG (v8i64
6525              (VPRORQZri
6526                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6527                        timm:$src2)), sub_xmm)>;
6528  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6529            (EXTRACT_SUBREG (v8i64
6530              (VPRORQZri
6531                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6532                       timm:$src2)), sub_ymm)>;
6533
6534  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6535            (EXTRACT_SUBREG (v16i32
6536              (VPRORDZri
6537                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6538                        timm:$src2)), sub_xmm)>;
6539  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6540            (EXTRACT_SUBREG (v16i32
6541              (VPRORDZri
6542                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6543                        timm:$src2)), sub_ymm)>;
6544}
6545
6546//===-------------------------------------------------------------------===//
6547// 1-src variable permutation VPERMW/D/Q
6548//===-------------------------------------------------------------------===//
6549
6550multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6552  let Predicates  = [HasAVX512] in
6553  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6554           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6555
6556  let Predicates = [HasAVX512, HasVLX] in
6557  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6558              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6559}
6560
6561multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6562                                 string OpcodeStr, SDNode OpNode,
6563                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6564  let Predicates = [HasAVX512] in
6565  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6566                              sched, VTInfo.info512>,
6567             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6568                               sched, VTInfo.info512>, EVEX_V512;
6569  let Predicates = [HasAVX512, HasVLX] in
6570  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6571                              sched, VTInfo.info256>,
6572             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6573                               sched, VTInfo.info256>, EVEX_V256;
6574}
6575
6576multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6577                              Predicate prd, SDNode OpNode,
6578                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6579  let Predicates = [prd] in
6580  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6581              EVEX_V512 ;
6582  let Predicates = [HasVLX, prd] in {
6583  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6584              EVEX_V256 ;
6585  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6586              EVEX_V128 ;
6587  }
6588}
6589
6590defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6591                               WriteVarShuffle256, avx512vl_i16_info>, REX_W;
6592defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6593                               WriteVarShuffle256, avx512vl_i8_info>;
6594
6595defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6596                                    WriteVarShuffle256, avx512vl_i32_info>;
6597defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6598                                    WriteVarShuffle256, avx512vl_i64_info>, REX_W;
6599defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6600                                     WriteFVarShuffle256, avx512vl_f32_info>;
6601defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6602                                     WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
6603
6604defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6605                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6606                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6607defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6608                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6609                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
6610
6611//===----------------------------------------------------------------------===//
6612// AVX-512 - VPERMIL
6613//===----------------------------------------------------------------------===//
6614
6615multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6616                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6617                             X86VectorVTInfo Ctrl> {
6618  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6619                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6620                  "$src2, $src1", "$src1, $src2",
6621                  (_.VT (OpNode _.RC:$src1,
6622                               (Ctrl.VT Ctrl.RC:$src2)))>,
6623                  T8PD, EVEX_4V, Sched<[sched]>;
6624  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6625                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6626                  "$src2, $src1", "$src1, $src2",
6627                  (_.VT (OpNode
6628                           _.RC:$src1,
6629                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6630                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6631                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6632  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6633                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6634                   "${src2}"#_.BroadcastStr#", $src1",
6635                   "$src1, ${src2}"#_.BroadcastStr,
6636                   (_.VT (OpNode
6637                            _.RC:$src1,
6638                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6639                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6640                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6641}
6642
6643multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6644                                    X86SchedWriteWidths sched,
6645                                    AVX512VLVectorVTInfo _,
6646                                    AVX512VLVectorVTInfo Ctrl> {
6647  let Predicates = [HasAVX512] in {
6648    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6649                                  _.info512, Ctrl.info512>, EVEX_V512;
6650  }
6651  let Predicates = [HasAVX512, HasVLX] in {
6652    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6653                                  _.info128, Ctrl.info128>, EVEX_V128;
6654    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6655                                  _.info256, Ctrl.info256>, EVEX_V256;
6656  }
6657}
6658
6659multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6660                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6661  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6662                                      _, Ctrl>;
6663  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6664                                    X86VPermilpi, SchedWriteFShuffle, _>,
6665                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6666}
6667
6668let ExeDomain = SSEPackedSingle in
6669defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6670                               avx512vl_i32_info>;
6671let ExeDomain = SSEPackedDouble in
6672defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6673                               avx512vl_i64_info>, VEX_W1X;
6674
6675//===----------------------------------------------------------------------===//
6676// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6677//===----------------------------------------------------------------------===//
6678
6679defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6680                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6681                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6682defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6683                                  X86PShufhw, SchedWriteShuffle>,
6684                                  EVEX, AVX512XSIi8Base;
6685defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6686                                  X86PShuflw, SchedWriteShuffle>,
6687                                  EVEX, AVX512XDIi8Base;
6688
6689//===----------------------------------------------------------------------===//
6690// AVX-512 - VPSHUFB
6691//===----------------------------------------------------------------------===//
6692
6693multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6694                               X86SchedWriteWidths sched> {
6695  let Predicates = [HasBWI] in
6696  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6697                              EVEX_V512;
6698
6699  let Predicates = [HasVLX, HasBWI] in {
6700  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6701                              EVEX_V256;
6702  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6703                              EVEX_V128;
6704  }
6705}
6706
6707defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6708                                  SchedWriteVarShuffle>, WIG;
6709
6710//===----------------------------------------------------------------------===//
6711// Move Low to High and High to Low packed FP Instructions
6712//===----------------------------------------------------------------------===//
6713
6714def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6715          (ins VR128X:$src1, VR128X:$src2),
6716          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6717          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6718          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6719let isCommutable = 1 in
6720def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6721          (ins VR128X:$src1, VR128X:$src2),
6722          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6723          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6724          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6725
6726//===----------------------------------------------------------------------===//
6727// VMOVHPS/PD VMOVLPS Instructions
6728// All patterns was taken from SSS implementation.
6729//===----------------------------------------------------------------------===//
6730
6731multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6732                                  SDPatternOperator OpNode,
6733                                  X86VectorVTInfo _> {
6734  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6735  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6736                  (ins _.RC:$src1, f64mem:$src2),
6737                  !strconcat(OpcodeStr,
6738                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6739                  [(set _.RC:$dst,
6740                     (OpNode _.RC:$src1,
6741                       (_.VT (bitconvert
6742                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6743                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6744}
6745
6746// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6747// SSE1. And MOVLPS pattern is even more complex.
6748defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6749                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6750defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6751                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6752defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6753                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6754defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6755                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
6756
6757let Predicates = [HasAVX512] in {
6758  // VMOVHPD patterns
6759  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6760            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6761
6762  // VMOVLPD patterns
6763  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6764            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6765}
6766
6767let SchedRW = [WriteFStore] in {
6768let mayStore = 1, hasSideEffects = 0 in
6769def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6770                       (ins f64mem:$dst, VR128X:$src),
6771                       "vmovhps\t{$src, $dst|$dst, $src}",
6772                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6773def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6774                       (ins f64mem:$dst, VR128X:$src),
6775                       "vmovhpd\t{$src, $dst|$dst, $src}",
6776                       [(store (f64 (extractelt
6777                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6778                                     (iPTR 0))), addr:$dst)]>,
6779                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6780let mayStore = 1, hasSideEffects = 0 in
6781def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6782                       (ins f64mem:$dst, VR128X:$src),
6783                       "vmovlps\t{$src, $dst|$dst, $src}",
6784                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6785def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6786                       (ins f64mem:$dst, VR128X:$src),
6787                       "vmovlpd\t{$src, $dst|$dst, $src}",
6788                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6789                                     (iPTR 0))), addr:$dst)]>,
6790                       EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
6791} // SchedRW
6792
6793let Predicates = [HasAVX512] in {
6794  // VMOVHPD patterns
6795  def : Pat<(store (f64 (extractelt
6796                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6797                           (iPTR 0))), addr:$dst),
6798           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6799}
6800//===----------------------------------------------------------------------===//
6801// FMA - Fused Multiply Operations
6802//
6803
6804multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6805                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6806                               X86VectorVTInfo _> {
6807  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6808      Uses = [MXCSR], mayRaiseFPException = 1 in {
6809  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6810          (ins _.RC:$src2, _.RC:$src3),
6811          OpcodeStr, "$src3, $src2", "$src2, $src3",
6812          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6813          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6814          EVEX_4V, Sched<[sched]>;
6815
6816  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6817          (ins _.RC:$src2, _.MemOp:$src3),
6818          OpcodeStr, "$src3, $src2", "$src2, $src3",
6819          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6820          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6821          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6822                          sched.ReadAfterFold]>;
6823
6824  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6825            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6826            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6827            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6828            (OpNode _.RC:$src2,
6829             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6830            (MaskOpNode _.RC:$src2,
6831             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6832            EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
6833                                    sched.ReadAfterFold]>;
6834  }
6835}
6836
6837multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6838                                 X86FoldableSchedWrite sched,
6839                                 X86VectorVTInfo _> {
6840  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6841      Uses = [MXCSR] in
6842  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6843          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6844          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6845          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6846          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6847          EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6848}
6849
6850multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6851                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6852                                   X86SchedWriteWidths sched,
6853                                   AVX512VLVectorVTInfo _,
6854                                   Predicate prd = HasAVX512> {
6855  let Predicates = [prd] in {
6856    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6857                                      sched.ZMM, _.info512>,
6858                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6859                                        _.info512>,
6860                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6861  }
6862  let Predicates = [HasVLX, prd] in {
6863    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6864                                    sched.YMM, _.info256>,
6865                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6866    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6867                                    sched.XMM, _.info128>,
6868                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6869  }
6870}
6871
6872multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6873                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6874    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6875                                      OpNodeRnd, SchedWriteFMA,
6876                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6877    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6878                                      OpNodeRnd, SchedWriteFMA,
6879                                      avx512vl_f32_info>, T8PD;
6880    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6881                                      OpNodeRnd, SchedWriteFMA,
6882                                      avx512vl_f64_info>, T8PD, REX_W;
6883}
6884
6885defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6886                                       fma, X86FmaddRnd>;
6887defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6888                                       X86Fmsub, X86FmsubRnd>;
6889defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6890                                       X86Fmaddsub, X86FmaddsubRnd>;
6891defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6892                                       X86Fmsubadd, X86FmsubaddRnd>;
6893defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6894                                       X86Fnmadd, X86FnmaddRnd>;
6895defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6896                                       X86Fnmsub, X86FnmsubRnd>;
6897
6898
6899multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6900                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6901                               X86VectorVTInfo _> {
6902  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6903      Uses = [MXCSR], mayRaiseFPException = 1 in {
6904  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6905          (ins _.RC:$src2, _.RC:$src3),
6906          OpcodeStr, "$src3, $src2", "$src2, $src3",
6907          (null_frag),
6908          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6909          EVEX_4V, Sched<[sched]>;
6910
6911  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6912          (ins _.RC:$src2, _.MemOp:$src3),
6913          OpcodeStr, "$src3, $src2", "$src2, $src3",
6914          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6915          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6916          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
6917                          sched.ReadAfterFold]>;
6918
6919  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6920         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6921         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6922         "$src2, ${src3}"#_.BroadcastStr,
6923         (_.VT (OpNode _.RC:$src2,
6924                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6925                      _.RC:$src1)),
6926         (_.VT (MaskOpNode _.RC:$src2,
6927                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6928                           _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6929         Sched<[sched.Folded, sched.ReadAfterFold,
6930                sched.ReadAfterFold]>;
6931  }
6932}
6933
6934multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6935                                 X86FoldableSchedWrite sched,
6936                                 X86VectorVTInfo _> {
6937  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6938      Uses = [MXCSR] in
6939  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6940          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6941          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6942          (null_frag),
6943          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6944          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6945}
6946
6947multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6948                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6949                                   X86SchedWriteWidths sched,
6950                                   AVX512VLVectorVTInfo _,
6951                                   Predicate prd = HasAVX512> {
6952  let Predicates = [prd] in {
6953    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6954                                      sched.ZMM, _.info512>,
6955                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6956                                        _.info512>,
6957                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6958  }
6959  let Predicates = [HasVLX, prd] in {
6960    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6961                                    sched.YMM, _.info256>,
6962                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6963    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6964                                    sched.XMM, _.info128>,
6965                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6966  }
6967}
6968
6969multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6970                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6971    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6972                                      OpNodeRnd, SchedWriteFMA,
6973                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6974    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6975                                      OpNodeRnd, SchedWriteFMA,
6976                                      avx512vl_f32_info>, T8PD;
6977    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6978                                      OpNodeRnd, SchedWriteFMA,
6979                                      avx512vl_f64_info>, T8PD, REX_W;
6980}
6981
6982defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6983                                       fma, X86FmaddRnd>;
6984defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6985                                       X86Fmsub, X86FmsubRnd>;
6986defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6987                                       X86Fmaddsub, X86FmaddsubRnd>;
6988defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6989                                       X86Fmsubadd, X86FmsubaddRnd>;
6990defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6991                                       X86Fnmadd, X86FnmaddRnd>;
6992defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6993                                       X86Fnmsub, X86FnmsubRnd>;
6994
6995multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6996                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6997                               X86VectorVTInfo _> {
6998  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6999      Uses = [MXCSR], mayRaiseFPException = 1 in {
7000  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7001          (ins _.RC:$src2, _.RC:$src3),
7002          OpcodeStr, "$src3, $src2", "$src2, $src3",
7003          (null_frag),
7004          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
7005          EVEX_4V, Sched<[sched]>;
7006
7007  // Pattern is 312 order so that the load is in a different place from the
7008  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7009  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7010          (ins _.RC:$src2, _.MemOp:$src3),
7011          OpcodeStr, "$src3, $src2", "$src2, $src3",
7012          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
7013          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7014          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7015                          sched.ReadAfterFold]>;
7016
7017  // Pattern is 312 order so that the load is in a different place from the
7018  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7019  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
7020         (ins _.RC:$src2, _.ScalarMemOp:$src3),
7021         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
7022         "$src2, ${src3}"#_.BroadcastStr,
7023         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7024                       _.RC:$src1, _.RC:$src2)),
7025         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
7026                           _.RC:$src1, _.RC:$src2)), 1, 0>,
7027         EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7028                                 sched.ReadAfterFold]>;
7029  }
7030}
7031
7032multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7033                                 X86FoldableSchedWrite sched,
7034                                 X86VectorVTInfo _> {
7035  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
7036      Uses = [MXCSR] in
7037  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
7038          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7039          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7040          (null_frag),
7041          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
7042          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7043}
7044
7045multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7046                                   SDNode MaskOpNode, SDNode OpNodeRnd,
7047                                   X86SchedWriteWidths sched,
7048                                   AVX512VLVectorVTInfo _,
7049                                   Predicate prd = HasAVX512> {
7050  let Predicates = [prd] in {
7051    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7052                                      sched.ZMM, _.info512>,
7053                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7054                                        _.info512>,
7055                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7056  }
7057  let Predicates = [HasVLX, prd] in {
7058    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7059                                    sched.YMM, _.info256>,
7060                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7061    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7062                                    sched.XMM, _.info128>,
7063                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7064  }
7065}
7066
7067multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7068                              SDNode MaskOpNode, SDNode OpNodeRnd > {
7069    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7070                                      OpNodeRnd, SchedWriteFMA,
7071                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
7072    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7073                                      OpNodeRnd, SchedWriteFMA,
7074                                      avx512vl_f32_info>, T8PD;
7075    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7076                                      OpNodeRnd, SchedWriteFMA,
7077                                      avx512vl_f64_info>, T8PD, REX_W;
7078}
7079
7080defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7081                                       fma, X86FmaddRnd>;
7082defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7083                                       X86Fmsub, X86FmsubRnd>;
7084defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7085                                       X86Fmaddsub, X86FmaddsubRnd>;
7086defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7087                                       X86Fmsubadd, X86FmsubaddRnd>;
7088defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7089                                       X86Fnmadd, X86FnmaddRnd>;
7090defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7091                                       X86Fnmsub, X86FnmsubRnd>;
7092
7093// Scalar FMA
7094multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7095                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7096let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7097  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7098          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7099          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7100          EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7101
7102  let mayLoad = 1 in
7103  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7104          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7105          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7106          EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7107                          SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7108
7109  let Uses = [MXCSR] in
7110  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7111         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7112         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7113         EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7114
7115  let isCodeGenOnly = 1, isCommutable = 1 in {
7116    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7117                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7118                     !strconcat(OpcodeStr,
7119                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7120                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7121    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7122                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7123                    !strconcat(OpcodeStr,
7124                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7125                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
7126                                     SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7127
7128    let Uses = [MXCSR] in
7129    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7130                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7131                     !strconcat(OpcodeStr,
7132                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7133                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7134                     Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7135  }// isCodeGenOnly = 1
7136}// Constraints = "$src1 = $dst"
7137}
7138
7139multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7140                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7141                            X86VectorVTInfo _, string SUFF> {
7142  let ExeDomain = _.ExeDomain in {
7143  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7144                // Operands for intrinsic are in 123 order to preserve passthu
7145                // semantics.
7146                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7147                         _.FRC:$src3))),
7148                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7149                         (_.ScalarLdFrag addr:$src3)))),
7150                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7151                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
7152
7153  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7154                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7155                                          _.FRC:$src1))),
7156                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7157                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7158                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7159                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
7160
7161  // One pattern is 312 order so that the load is in a different place from the
7162  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7163  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7164                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7165                         _.FRC:$src2))),
7166                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7167                                 _.FRC:$src1, _.FRC:$src2))),
7168                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7169                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
7170  }
7171}
7172
7173multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7174                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7175  let Predicates = [HasAVX512] in {
7176    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7177                                 OpNodeRnd, f32x_info, "SS">,
7178                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7179    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7180                                 OpNodeRnd, f64x_info, "SD">,
7181                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD;
7182  }
7183  let Predicates = [HasFP16] in {
7184    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7185                                 OpNodeRnd, f16x_info, "SH">,
7186                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7187  }
7188}
7189
7190defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7191defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7192defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7193defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7194
7195multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7196                                      SDNode RndOp, string Prefix,
7197                                      string Suffix, SDNode Move,
7198                                      X86VectorVTInfo _, PatLeaf ZeroFP,
7199                                      Predicate prd = HasAVX512> {
7200  let Predicates = [prd] in {
7201    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7202                (Op _.FRC:$src2,
7203                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7204                    _.FRC:$src3))))),
7205              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7206               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7207               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7208
7209    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7210                (Op _.FRC:$src2, _.FRC:$src3,
7211                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7212              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7213               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7214               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7215
7216    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7217                (Op _.FRC:$src2,
7218                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7219                    (_.ScalarLdFrag addr:$src3)))))),
7220              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7221               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7222               addr:$src3)>;
7223
7224    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7225                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7226                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7227              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7228               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7229               addr:$src3)>;
7230
7231    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7233                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7234              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7235               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7236               addr:$src3)>;
7237
7238    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7239               (X86selects_mask VK1WM:$mask,
7240                (MaskedOp _.FRC:$src2,
7241                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7242                    _.FRC:$src3),
7243                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7244              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7245               VR128X:$src1, VK1WM:$mask,
7246               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7247               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7248
7249    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7250               (X86selects_mask VK1WM:$mask,
7251                (MaskedOp _.FRC:$src2,
7252                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7253                    (_.ScalarLdFrag addr:$src3)),
7254                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7255              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7256               VR128X:$src1, VK1WM:$mask,
7257               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7258
7259    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7260               (X86selects_mask VK1WM:$mask,
7261                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7262                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7263                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7264              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7265               VR128X:$src1, VK1WM:$mask,
7266               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7267
7268    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7269               (X86selects_mask VK1WM:$mask,
7270                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7271                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7272                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7273              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7274               VR128X:$src1, VK1WM:$mask,
7275               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7276               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7277
7278    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7279               (X86selects_mask VK1WM:$mask,
7280                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7281                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7282                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7283              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7284               VR128X:$src1, VK1WM:$mask,
7285               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7286
7287    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7288               (X86selects_mask VK1WM:$mask,
7289                (MaskedOp _.FRC:$src2,
7290                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7291                          _.FRC:$src3),
7292                (_.EltVT ZeroFP)))))),
7293              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7294               VR128X:$src1, VK1WM:$mask,
7295               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7296               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7297
7298    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7299               (X86selects_mask VK1WM:$mask,
7300                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7301                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7302                (_.EltVT ZeroFP)))))),
7303              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7304               VR128X:$src1, VK1WM:$mask,
7305               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7306               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7307
7308    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7309               (X86selects_mask VK1WM:$mask,
7310                (MaskedOp _.FRC:$src2,
7311                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7312                          (_.ScalarLdFrag addr:$src3)),
7313                (_.EltVT ZeroFP)))))),
7314              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7315               VR128X:$src1, VK1WM:$mask,
7316               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7317
7318    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7319               (X86selects_mask VK1WM:$mask,
7320                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7321                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7322                (_.EltVT ZeroFP)))))),
7323              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7324               VR128X:$src1, VK1WM:$mask,
7325               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7326
7327    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7328               (X86selects_mask VK1WM:$mask,
7329                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7330                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7331                (_.EltVT ZeroFP)))))),
7332              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7333               VR128X:$src1, VK1WM:$mask,
7334               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7335
7336    // Patterns with rounding mode.
7337    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7338                (RndOp _.FRC:$src2,
7339                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7340                       _.FRC:$src3, (i32 timm:$rc)))))),
7341              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7342               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7343               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7344
7345    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7346                (RndOp _.FRC:$src2, _.FRC:$src3,
7347                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7348                       (i32 timm:$rc)))))),
7349              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7350               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7351               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7352
7353    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7354               (X86selects_mask VK1WM:$mask,
7355                (RndOp _.FRC:$src2,
7356                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7357                       _.FRC:$src3, (i32 timm:$rc)),
7358                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7359              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7360               VR128X:$src1, VK1WM:$mask,
7361               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7362               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7363
7364    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7365               (X86selects_mask VK1WM:$mask,
7366                (RndOp _.FRC:$src2, _.FRC:$src3,
7367                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7368                       (i32 timm:$rc)),
7369                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7370              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7371               VR128X:$src1, VK1WM:$mask,
7372               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7373               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7374
7375    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7376               (X86selects_mask VK1WM:$mask,
7377                (RndOp _.FRC:$src2,
7378                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7379                       _.FRC:$src3, (i32 timm:$rc)),
7380                (_.EltVT ZeroFP)))))),
7381              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7382               VR128X:$src1, VK1WM:$mask,
7383               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7384               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7385
7386    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7387               (X86selects_mask VK1WM:$mask,
7388                (RndOp _.FRC:$src2, _.FRC:$src3,
7389                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7390                       (i32 timm:$rc)),
7391                (_.EltVT ZeroFP)))))),
7392              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7393               VR128X:$src1, VK1WM:$mask,
7394               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7395               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7396  }
7397}
7398defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7399                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7400defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7401                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7402defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7403                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7404defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7405                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7406
7407defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7408                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7409defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7410                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7411defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7412                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7413defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7414                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7415
7416defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7417                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7418defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7419                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7420defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7421                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7422defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7423                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7424
7425//===----------------------------------------------------------------------===//
7426// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7427//===----------------------------------------------------------------------===//
7428let Constraints = "$src1 = $dst" in {
7429multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7430                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7431  // NOTE: The SDNode have the multiply operands first with the add last.
7432  // This enables commuted load patterns to be autogenerated by tablegen.
7433  let ExeDomain = _.ExeDomain in {
7434  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7435          (ins _.RC:$src2, _.RC:$src3),
7436          OpcodeStr, "$src3, $src2", "$src2, $src3",
7437          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7438          T8PD, EVEX_4V, Sched<[sched]>;
7439
7440  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7441          (ins _.RC:$src2, _.MemOp:$src3),
7442          OpcodeStr, "$src3, $src2", "$src2, $src3",
7443          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7444          T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
7445                                sched.ReadAfterFold]>;
7446
7447  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7448            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7449            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7450            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7451            (OpNode _.RC:$src2,
7452                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7453                    _.RC:$src1)>,
7454            T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
7455                                          sched.ReadAfterFold]>;
7456  }
7457}
7458} // Constraints = "$src1 = $dst"
7459
7460multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7461                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7462  let Predicates = [HasIFMA] in {
7463    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7464                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7465  }
7466  let Predicates = [HasVLX, HasIFMA] in {
7467    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7468                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7469    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7470                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7471  }
7472}
7473
7474defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7475                                         SchedWriteVecIMul, avx512vl_i64_info>,
7476                                         REX_W;
7477defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7478                                         SchedWriteVecIMul, avx512vl_i64_info>,
7479                                         REX_W;
7480
7481//===----------------------------------------------------------------------===//
7482// AVX-512  Scalar convert from sign integer to float/double
7483//===----------------------------------------------------------------------===//
7484
7485multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7486                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7487                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7488                    string mem, list<Register> _Uses = [MXCSR],
7489                    bit _mayRaiseFPException = 1> {
7490let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7491    mayRaiseFPException = _mayRaiseFPException in {
7492  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7493    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7494              (ins DstVT.FRC:$src1, SrcRC:$src),
7495              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7496              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7497    let mayLoad = 1 in
7498      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7499              (ins DstVT.FRC:$src1, x86memop:$src),
7500              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7501              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7502  } // hasSideEffects = 0
7503  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7504                (ins DstVT.RC:$src1, SrcRC:$src2),
7505                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7506                [(set DstVT.RC:$dst,
7507                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7508               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7509
7510  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7511                (ins DstVT.RC:$src1, x86memop:$src2),
7512                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7513                [(set DstVT.RC:$dst,
7514                      (OpNode (DstVT.VT DstVT.RC:$src1),
7515                               (ld_frag addr:$src2)))]>,
7516                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7517}
7518  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7519                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7520                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7521}
7522
7523multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7524                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7525                               X86VectorVTInfo DstVT, string asm,
7526                               string mem> {
7527  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7528  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7529              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7530              !strconcat(asm,
7531                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7532              [(set DstVT.RC:$dst,
7533                    (OpNode (DstVT.VT DstVT.RC:$src1),
7534                             SrcRC:$src2,
7535                             (i32 timm:$rc)))]>,
7536              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7537  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7538                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7539                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7540}
7541
7542multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7543                                X86FoldableSchedWrite sched,
7544                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7545                                X86MemOperand x86memop, PatFrag ld_frag,
7546                                string asm, string mem> {
7547  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7548              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7549                            ld_frag, asm, mem>, VEX_LIG;
7550}
7551
7552let Predicates = [HasAVX512] in {
7553defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7554                                 WriteCvtI2SS, GR32,
7555                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7556                                 XS, EVEX_CD8<32, CD8VT1>;
7557defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7558                                 WriteCvtI2SS, GR64,
7559                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7560                                 XS, REX_W, EVEX_CD8<64, CD8VT1>;
7561defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7562                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7563                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7564defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7565                                 WriteCvtI2SD, GR64,
7566                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7567                                 XD, REX_W, EVEX_CD8<64, CD8VT1>;
7568
7569def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7570              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7571def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7572              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7573
7574def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7575          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7576def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7577          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7578def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7579          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7580def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7581          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7582
7583def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7584          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7585def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7586          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7587def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7588          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7589def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7590          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7591
7592defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7593                                  WriteCvtI2SS, GR32,
7594                                  v4f32x_info, i32mem, loadi32,
7595                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7596defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7597                                  WriteCvtI2SS, GR64,
7598                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7599                                  XS, REX_W, EVEX_CD8<64, CD8VT1>;
7600defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7601                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7602                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7603defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7604                                  WriteCvtI2SD, GR64,
7605                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7606                                  XD, REX_W, EVEX_CD8<64, CD8VT1>;
7607
7608def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7609              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7610def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7611              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7612
7613def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7614          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7615def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7616          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7617def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7618          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7619def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7620          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7621
7622def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7623          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7624def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7625          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7626def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7627          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7628def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7629          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7630}
7631
7632//===----------------------------------------------------------------------===//
7633// AVX-512  Scalar convert from float/double to integer
7634//===----------------------------------------------------------------------===//
7635
7636multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7637                                  X86VectorVTInfo DstVT, SDNode OpNode,
7638                                  SDNode OpNodeRnd,
7639                                  X86FoldableSchedWrite sched, string asm,
7640                                  string aliasStr, Predicate prd = HasAVX512> {
7641  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7642    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7643                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7644                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7645                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7646    let Uses = [MXCSR] in
7647    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7648                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7649                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7650                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7651                 Sched<[sched]>;
7652    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7653                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7654                [(set DstVT.RC:$dst, (OpNode
7655                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7656                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7657  } // Predicates = [prd]
7658
7659  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7660          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7661  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7662          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7663  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7664          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7665                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7666}
7667
7668// Convert float/double to signed/unsigned int 32/64
7669defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7670                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7671                                   XS, EVEX_CD8<32, CD8VT1>;
7672defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7673                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7674                                   XS, REX_W, EVEX_CD8<32, CD8VT1>;
7675defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7676                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7677                                   XS, EVEX_CD8<32, CD8VT1>;
7678defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7679                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7680                                   XS, REX_W, EVEX_CD8<32, CD8VT1>;
7681defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7682                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7683                                   XD, EVEX_CD8<64, CD8VT1>;
7684defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7685                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7686                                   XD, REX_W, EVEX_CD8<64, CD8VT1>;
7687defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7688                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7689                                   XD, EVEX_CD8<64, CD8VT1>;
7690defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7691                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7692                                   XD, REX_W, EVEX_CD8<64, CD8VT1>;
7693
7694multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7695                        X86VectorVTInfo DstVT, SDNode OpNode,
7696                        X86FoldableSchedWrite sched> {
7697  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7698    let isCodeGenOnly = 1 in {
7699    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7700                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7701                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7702                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7703    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7704                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7705                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7706                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7707    }
7708  } // Predicates = [HasAVX512]
7709}
7710
7711defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7712                       lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7713defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7714                       llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7715defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7716                       lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7717defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7718                       llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7719
7720let Predicates = [HasAVX512] in {
7721  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7722  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7723
7724  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7725  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7726}
7727
7728// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7729// which produce unnecessary vmovs{s,d} instructions
7730let Predicates = [HasAVX512] in {
7731def : Pat<(v4f32 (X86Movss
7732                   (v4f32 VR128X:$dst),
7733                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7734          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7735
7736def : Pat<(v4f32 (X86Movss
7737                   (v4f32 VR128X:$dst),
7738                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7739          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7740
7741def : Pat<(v4f32 (X86Movss
7742                   (v4f32 VR128X:$dst),
7743                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7744          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7745
7746def : Pat<(v4f32 (X86Movss
7747                   (v4f32 VR128X:$dst),
7748                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7749          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7750
7751def : Pat<(v2f64 (X86Movsd
7752                   (v2f64 VR128X:$dst),
7753                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7754          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7755
7756def : Pat<(v2f64 (X86Movsd
7757                   (v2f64 VR128X:$dst),
7758                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7759          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7760
7761def : Pat<(v2f64 (X86Movsd
7762                   (v2f64 VR128X:$dst),
7763                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7764          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7765
7766def : Pat<(v2f64 (X86Movsd
7767                   (v2f64 VR128X:$dst),
7768                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7769          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7770
7771def : Pat<(v4f32 (X86Movss
7772                   (v4f32 VR128X:$dst),
7773                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7774          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7775
7776def : Pat<(v4f32 (X86Movss
7777                   (v4f32 VR128X:$dst),
7778                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7779          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7780
7781def : Pat<(v4f32 (X86Movss
7782                   (v4f32 VR128X:$dst),
7783                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7784          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7785
7786def : Pat<(v4f32 (X86Movss
7787                   (v4f32 VR128X:$dst),
7788                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7789          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7790
7791def : Pat<(v2f64 (X86Movsd
7792                   (v2f64 VR128X:$dst),
7793                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7794          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7795
7796def : Pat<(v2f64 (X86Movsd
7797                   (v2f64 VR128X:$dst),
7798                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7799          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7800
7801def : Pat<(v2f64 (X86Movsd
7802                   (v2f64 VR128X:$dst),
7803                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7804          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7805
7806def : Pat<(v2f64 (X86Movsd
7807                   (v2f64 VR128X:$dst),
7808                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7809          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7810} // Predicates = [HasAVX512]
7811
7812// Convert float/double to signed/unsigned int 32/64 with truncation
7813multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7814                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7815                            SDNode OpNodeInt, SDNode OpNodeSAE,
7816                            X86FoldableSchedWrite sched, string aliasStr,
7817                            Predicate prd = HasAVX512> {
7818let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7819  let isCodeGenOnly = 1 in {
7820  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7821              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7822              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7823              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7824  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7825              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7826              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7827              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7828  }
7829
7830  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7831            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7832           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7833           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7834  let Uses = [MXCSR] in
7835  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7836            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7837            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7838                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7839  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7840              (ins _SrcRC.IntScalarMemOp:$src),
7841              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7842              [(set _DstRC.RC:$dst,
7843                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7844              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7845} // Predicates = [prd]
7846
7847  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7848          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7849  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7850          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7851  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7852          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7853                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7854}
7855
7856defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7857                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7858                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7859defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7860                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7861                        "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>;
7862defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7863                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7864                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7865defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7866                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7867                        "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>;
7868
7869defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7870                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7871                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7872defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7873                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7874                        "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>;
7875defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7876                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7877                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7878defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7879                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7880                        "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>;
7881
7882//===----------------------------------------------------------------------===//
7883// AVX-512  Convert form float to double and back
7884//===----------------------------------------------------------------------===//
7885
7886let Uses = [MXCSR], mayRaiseFPException = 1 in
7887multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7888                                X86VectorVTInfo _Src, SDNode OpNode,
7889                                X86FoldableSchedWrite sched> {
7890  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7891                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7892                         "$src2, $src1", "$src1, $src2",
7893                         (_.VT (OpNode (_.VT _.RC:$src1),
7894                                       (_Src.VT _Src.RC:$src2)))>,
7895                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7896  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7897                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7898                         "$src2, $src1", "$src1, $src2",
7899                         (_.VT (OpNode (_.VT _.RC:$src1),
7900                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7901                         EVEX_4V, VEX_LIG,
7902                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7903
7904  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7905    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7906               (ins _.FRC:$src1, _Src.FRC:$src2),
7907               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7908               EVEX_4V, VEX_LIG, Sched<[sched]>;
7909    let mayLoad = 1 in
7910    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7911               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7912               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7913               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7914  }
7915}
7916
7917// Scalar Conversion with SAE - suppress all exceptions
7918multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7919                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7920                                    X86FoldableSchedWrite sched> {
7921  let Uses = [MXCSR] in
7922  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7923                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7924                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7925                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7926                                         (_Src.VT _Src.RC:$src2)))>,
7927                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7928}
7929
7930// Scalar Conversion with rounding control (RC)
7931multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7932                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7933                                   X86FoldableSchedWrite sched> {
7934  let Uses = [MXCSR] in
7935  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7936                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7937                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7938                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7939                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7940                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7941                        EVEX_B, EVEX_RC;
7942}
7943multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7944                                      SDNode OpNode, SDNode OpNodeRnd,
7945                                      X86FoldableSchedWrite sched,
7946                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7947                                      Predicate prd = HasAVX512> {
7948  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7949    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7950             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7951                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7952  }
7953}
7954
7955multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7956                                       SDNode OpNode, SDNode OpNodeSAE,
7957                                       X86FoldableSchedWrite sched,
7958                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7959                                       Predicate prd = HasAVX512> {
7960  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7961    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7962             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7963             EVEX_CD8<_src.EltSize, CD8VT1>;
7964  }
7965}
7966defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7967                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7968                                         f32x_info>, XD, REX_W;
7969defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7970                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7971                                          f64x_info>, XS;
7972defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7973                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7974                                          f16x_info, HasFP16>, T_MAP5XD, REX_W;
7975defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7976                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7977                                          f64x_info, HasFP16>, T_MAP5XS;
7978defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7979                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7980                                          f16x_info, HasFP16>, T_MAP5PS;
7981defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7982                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7983                                          f32x_info, HasFP16>, T_MAP6PS;
7984
7985def : Pat<(f64 (any_fpextend FR32X:$src)),
7986          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7987          Requires<[HasAVX512]>;
7988def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7989          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7990          Requires<[HasAVX512, OptForSize]>;
7991
7992def : Pat<(f32 (any_fpround FR64X:$src)),
7993          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7994           Requires<[HasAVX512]>;
7995
7996def : Pat<(f32 (any_fpextend FR16X:$src)),
7997          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7998          Requires<[HasFP16]>;
7999def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
8000          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
8001          Requires<[HasFP16, OptForSize]>;
8002
8003def : Pat<(f64 (any_fpextend FR16X:$src)),
8004          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
8005          Requires<[HasFP16]>;
8006def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
8007          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
8008          Requires<[HasFP16, OptForSize]>;
8009
8010def : Pat<(f16 (any_fpround FR32X:$src)),
8011          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
8012           Requires<[HasFP16]>;
8013def : Pat<(f16 (any_fpround FR64X:$src)),
8014          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
8015           Requires<[HasFP16]>;
8016
8017def : Pat<(v4f32 (X86Movss
8018                   (v4f32 VR128X:$dst),
8019                   (v4f32 (scalar_to_vector
8020                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
8021          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
8022          Requires<[HasAVX512]>;
8023
8024def : Pat<(v2f64 (X86Movsd
8025                   (v2f64 VR128X:$dst),
8026                   (v2f64 (scalar_to_vector
8027                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
8028          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
8029          Requires<[HasAVX512]>;
8030
8031//===----------------------------------------------------------------------===//
8032// AVX-512  Vector convert from signed/unsigned integer to float/double
8033//          and from float/double to signed/unsigned integer
8034//===----------------------------------------------------------------------===//
8035
8036multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
8038                          X86FoldableSchedWrite sched,
8039                          string Broadcast = _.BroadcastStr,
8040                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8041                          RegisterClass MaskRC = _.KRCWM,
8042                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
8043                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
8044let Uses = [MXCSR], mayRaiseFPException = 1 in {
8045  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
8046                         (ins _Src.RC:$src),
8047                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
8048                         (ins MaskRC:$mask, _Src.RC:$src),
8049                          OpcodeStr, "$src", "$src",
8050                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8051                         (vselect_mask MaskRC:$mask,
8052                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8053                                       _.RC:$src0),
8054                         (vselect_mask MaskRC:$mask,
8055                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
8056                                       _.ImmAllZerosV)>,
8057                         EVEX, Sched<[sched]>;
8058
8059  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8060                         (ins MemOp:$src),
8061                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8062                         (ins MaskRC:$mask, MemOp:$src),
8063                         OpcodeStr#Alias, "$src", "$src",
8064                         LdDAG,
8065                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8066                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8067                         EVEX, Sched<[sched.Folded]>;
8068
8069  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8070                         (ins _Src.ScalarMemOp:$src),
8071                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8072                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8073                         OpcodeStr,
8074                         "${src}"#Broadcast, "${src}"#Broadcast,
8075                         (_.VT (OpNode (_Src.VT
8076                                  (_Src.BroadcastLdFrag addr:$src))
8077                            )),
8078                         (vselect_mask MaskRC:$mask,
8079                                       (_.VT
8080                                        (MaskOpNode
8081                                         (_Src.VT
8082                                          (_Src.BroadcastLdFrag addr:$src)))),
8083                                       _.RC:$src0),
8084                         (vselect_mask MaskRC:$mask,
8085                                       (_.VT
8086                                        (MaskOpNode
8087                                         (_Src.VT
8088                                          (_Src.BroadcastLdFrag addr:$src)))),
8089                                       _.ImmAllZerosV)>,
8090                         EVEX, EVEX_B, Sched<[sched.Folded]>;
8091  }
8092}
8093// Conversion with SAE - suppress all exceptions
8094multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8095                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
8096                              X86FoldableSchedWrite sched> {
8097  let Uses = [MXCSR] in
8098  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8099                        (ins _Src.RC:$src), OpcodeStr,
8100                        "{sae}, $src", "$src, {sae}",
8101                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8102                        EVEX, EVEX_B, Sched<[sched]>;
8103}
8104
8105// Conversion with rounding control (RC)
8106multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8107                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8108                         X86FoldableSchedWrite sched> {
8109  let Uses = [MXCSR] in
8110  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8111                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8112                        "$rc, $src", "$src, $rc",
8113                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8114                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8115}
8116
8117// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8118multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8119                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
8120                                SDNode MaskOpNode,
8121                                X86FoldableSchedWrite sched,
8122                                string Broadcast = _.BroadcastStr,
8123                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8124                                RegisterClass MaskRC = _.KRCWM>
8125  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8126                   Alias, MemOp, MaskRC,
8127                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8128                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8129
8130// Extend [Float to Double, Half to Float]
8131multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8132                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8133                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8134  let Predicates = [prd] in {
8135    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8136                            any_fpextend, fpextend, sched.ZMM>,
8137             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8138                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8139  }
8140  let Predicates = [prd, HasVLX] in {
8141    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8142                               X86any_vfpext, X86vfpext, sched.XMM,
8143                               _dst.info128.BroadcastStr,
8144                               "", f64mem>, EVEX_V128;
8145    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8146                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8147  }
8148}
8149
8150// Truncate [Double to Float, Float to Half]
8151multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8152                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8153                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8154                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8155                            PatFrag loadVT128 = _src.info128.LdFrag,
8156                            RegisterClass maskRC128 = _src.info128.KRCWM> {
8157  let Predicates = [prd] in {
8158    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8159                            X86any_vfpround, X86vfpround, sched.ZMM>,
8160             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8161                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8162  }
8163  let Predicates = [prd, HasVLX] in {
8164    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8165                               null_frag, null_frag, sched.XMM,
8166                               _src.info128.BroadcastStr, "{x}",
8167                               f128mem, maskRC128>, EVEX_V128;
8168    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8169                               X86any_vfpround, X86vfpround,
8170                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8171
8172    // Special patterns to allow use of X86vmfpround for masking. Instruction
8173    // patterns have been disabled with null_frag.
8174    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8175              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8176    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8177                            maskRC128:$mask),
8178              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8179    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8180                            maskRC128:$mask),
8181              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8182
8183    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8184              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8185    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8186                            maskRC128:$mask),
8187              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8188    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8189                            maskRC128:$mask),
8190              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8191
8192    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8193              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8194    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8195                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8196              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8197    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8198                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
8199              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8200  }
8201
8202  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8203                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8204  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8205                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8206                  VK2WM:$mask, VR128X:$src), 0, "att">;
8207  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8208                  "$dst {${mask}} {z}, $src}",
8209                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8210                  VK2WM:$mask, VR128X:$src), 0, "att">;
8211  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8212                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8213  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8214                  "$dst {${mask}}, ${src}{1to2}}",
8215                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8216                  VK2WM:$mask, f64mem:$src), 0, "att">;
8217  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8218                  "$dst {${mask}} {z}, ${src}{1to2}}",
8219                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8220                  VK2WM:$mask, f64mem:$src), 0, "att">;
8221
8222  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8223                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8224  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8225                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8226                  VK4WM:$mask, VR256X:$src), 0, "att">;
8227  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8228                  "$dst {${mask}} {z}, $src}",
8229                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8230                  VK4WM:$mask, VR256X:$src), 0, "att">;
8231  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8232                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8233  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8234                  "$dst {${mask}}, ${src}{1to4}}",
8235                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8236                  VK4WM:$mask, f64mem:$src), 0, "att">;
8237  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8238                  "$dst {${mask}} {z}, ${src}{1to4}}",
8239                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8240                  VK4WM:$mask, f64mem:$src), 0, "att">;
8241}
8242
8243defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8244                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8245                                  REX_W, PD, EVEX_CD8<64, CD8VF>;
8246defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8247                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8248                                   PS, EVEX_CD8<32, CD8VH>;
8249
8250// Extend Half to Double
8251multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8252                            X86SchedWriteWidths sched> {
8253  let Predicates = [HasFP16] in {
8254    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8255                                  any_fpextend, fpextend, sched.ZMM>,
8256             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8257                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8258    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8259                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8260  }
8261  let Predicates = [HasFP16, HasVLX] in {
8262    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8263                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8264                                     f32mem>, EVEX_V128;
8265    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8266                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8267                                     f64mem>, EVEX_V256;
8268  }
8269}
8270
8271// Truncate Double to Half
8272multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8273  let Predicates = [HasFP16] in {
8274    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8275                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8276             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8277                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8278  }
8279  let Predicates = [HasFP16, HasVLX] in {
8280    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8281                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8282                               VK2WM>, EVEX_V128;
8283    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8284                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8285                               VK4WM>, EVEX_V256;
8286  }
8287  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8288                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8289                  VR128X:$src), 0, "att">;
8290  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8291                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8292                  VK2WM:$mask, VR128X:$src), 0, "att">;
8293  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8294                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8295                  VK2WM:$mask, VR128X:$src), 0, "att">;
8296  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8297                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8298                  i64mem:$src), 0, "att">;
8299  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8300                  "$dst {${mask}}, ${src}{1to2}}",
8301                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8302                  VK2WM:$mask, i64mem:$src), 0, "att">;
8303  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8304                  "$dst {${mask}} {z}, ${src}{1to2}}",
8305                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8306                  VK2WM:$mask, i64mem:$src), 0, "att">;
8307
8308  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8309                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8310                  VR256X:$src), 0, "att">;
8311  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8312                  "$dst {${mask}}, $src}",
8313                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8314                  VK4WM:$mask, VR256X:$src), 0, "att">;
8315  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8316                  "$dst {${mask}} {z}, $src}",
8317                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8318                  VK4WM:$mask, VR256X:$src), 0, "att">;
8319  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8320                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8321                  i64mem:$src), 0, "att">;
8322  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8323                  "$dst {${mask}}, ${src}{1to4}}",
8324                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8325                  VK4WM:$mask, i64mem:$src), 0, "att">;
8326  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8327                  "$dst {${mask}} {z}, ${src}{1to4}}",
8328                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8329                  VK4WM:$mask, i64mem:$src), 0, "att">;
8330
8331  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8332                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8333                  VR512:$src), 0, "att">;
8334  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8335                  "$dst {${mask}}, $src}",
8336                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8337                  VK8WM:$mask, VR512:$src), 0, "att">;
8338  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8339                  "$dst {${mask}} {z}, $src}",
8340                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8341                  VK8WM:$mask, VR512:$src), 0, "att">;
8342  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8343                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8344                  i64mem:$src), 0, "att">;
8345  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8346                  "$dst {${mask}}, ${src}{1to8}}",
8347                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8348                  VK8WM:$mask, i64mem:$src), 0, "att">;
8349  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8350                  "$dst {${mask}} {z}, ${src}{1to8}}",
8351                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8352                  VK8WM:$mask, i64mem:$src), 0, "att">;
8353}
8354
8355defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8356                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8357                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8358defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8359                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8360                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8361defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8362                                 REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8363defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8364                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8365
8366let Predicates = [HasFP16, HasVLX] in {
8367  // Special patterns to allow use of X86vmfpround for masking. Instruction
8368  // patterns have been disabled with null_frag.
8369  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8370            (VCVTPD2PHZ256rr VR256X:$src)>;
8371  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8372                          VK4WM:$mask)),
8373            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8374  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8375                          VK4WM:$mask),
8376            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8377
8378  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8379            (VCVTPD2PHZ256rm addr:$src)>;
8380  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8381                          VK4WM:$mask),
8382            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8383  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8384                          VK4WM:$mask),
8385            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8386
8387  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8388            (VCVTPD2PHZ256rmb addr:$src)>;
8389  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8390                          (v8f16 VR128X:$src0), VK4WM:$mask),
8391            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8392  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8393                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8394            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8395
8396  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8397            (VCVTPD2PHZ128rr VR128X:$src)>;
8398  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8399                          VK2WM:$mask),
8400            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8401  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8402                          VK2WM:$mask),
8403            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8404
8405  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8406            (VCVTPD2PHZ128rm addr:$src)>;
8407  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8408                          VK2WM:$mask),
8409            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8410  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8411                          VK2WM:$mask),
8412            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8413
8414  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8415            (VCVTPD2PHZ128rmb addr:$src)>;
8416  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8417                          (v8f16 VR128X:$src0), VK2WM:$mask),
8418            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8419  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8420                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8421            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8422}
8423
8424// Convert Signed/Unsigned Doubleword to Double
8425let Uses = []<Register>, mayRaiseFPException = 0 in
8426multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8427                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8428                           SDNode MaskOpNode128,
8429                           X86SchedWriteWidths sched> {
8430  // No rounding in this op
8431  let Predicates = [HasAVX512] in
8432    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8433                            MaskOpNode, sched.ZMM>, EVEX_V512;
8434
8435  let Predicates = [HasVLX] in {
8436    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8437                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8438                               "", i64mem, VK2WM,
8439                               (v2f64 (OpNode128 (bc_v4i32
8440                                (v2i64
8441                                 (scalar_to_vector (loadi64 addr:$src)))))),
8442                               (v2f64 (MaskOpNode128 (bc_v4i32
8443                                (v2i64
8444                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8445                               EVEX_V128;
8446    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8447                               MaskOpNode, sched.YMM>, EVEX_V256;
8448  }
8449}
8450
8451// Convert Signed/Unsigned Doubleword to Float
8452multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8453                           SDNode MaskOpNode, SDNode OpNodeRnd,
8454                           X86SchedWriteWidths sched> {
8455  let Predicates = [HasAVX512] in
8456    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8457                            MaskOpNode, sched.ZMM>,
8458             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8459                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8460
8461  let Predicates = [HasVLX] in {
8462    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8463                               MaskOpNode, sched.XMM>, EVEX_V128;
8464    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8465                               MaskOpNode, sched.YMM>, EVEX_V256;
8466  }
8467}
8468
8469// Convert Float to Signed/Unsigned Doubleword with truncation
8470multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8471                            SDNode MaskOpNode,
8472                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8473  let Predicates = [HasAVX512] in {
8474    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8475                            MaskOpNode, sched.ZMM>,
8476             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8477                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8478  }
8479  let Predicates = [HasVLX] in {
8480    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8481                               MaskOpNode, sched.XMM>, EVEX_V128;
8482    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8483                               MaskOpNode, sched.YMM>, EVEX_V256;
8484  }
8485}
8486
8487// Convert Float to Signed/Unsigned Doubleword
8488multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8489                           SDNode MaskOpNode, SDNode OpNodeRnd,
8490                           X86SchedWriteWidths sched> {
8491  let Predicates = [HasAVX512] in {
8492    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8493                            MaskOpNode, sched.ZMM>,
8494             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8495                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8496  }
8497  let Predicates = [HasVLX] in {
8498    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8499                               MaskOpNode, sched.XMM>, EVEX_V128;
8500    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8501                               MaskOpNode, sched.YMM>, EVEX_V256;
8502  }
8503}
8504
8505// Convert Double to Signed/Unsigned Doubleword with truncation
8506multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8507                            SDNode MaskOpNode, SDNode OpNodeSAE,
8508                            X86SchedWriteWidths sched> {
8509  let Predicates = [HasAVX512] in {
8510    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8511                            MaskOpNode, sched.ZMM>,
8512             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8513                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8514  }
8515  let Predicates = [HasVLX] in {
8516    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8517    // memory forms of these instructions in Asm Parser. They have the same
8518    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8519    // due to the same reason.
8520    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8521                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8522                               VK2WM>, EVEX_V128;
8523    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8524                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8525  }
8526
8527  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8528                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8529                  VR128X:$src), 0, "att">;
8530  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8531                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8532                  VK2WM:$mask, VR128X:$src), 0, "att">;
8533  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8534                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8535                  VK2WM:$mask, VR128X:$src), 0, "att">;
8536  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8537                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8538                  f64mem:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8540                  "$dst {${mask}}, ${src}{1to2}}",
8541                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8542                  VK2WM:$mask, f64mem:$src), 0, "att">;
8543  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8544                  "$dst {${mask}} {z}, ${src}{1to2}}",
8545                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8546                  VK2WM:$mask, f64mem:$src), 0, "att">;
8547
8548  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8549                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8550                  VR256X:$src), 0, "att">;
8551  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8552                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8553                  VK4WM:$mask, VR256X:$src), 0, "att">;
8554  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8555                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8556                  VK4WM:$mask, VR256X:$src), 0, "att">;
8557  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8558                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8559                  f64mem:$src), 0, "att">;
8560  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8561                  "$dst {${mask}}, ${src}{1to4}}",
8562                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8563                  VK4WM:$mask, f64mem:$src), 0, "att">;
8564  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8565                  "$dst {${mask}} {z}, ${src}{1to4}}",
8566                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8567                  VK4WM:$mask, f64mem:$src), 0, "att">;
8568}
8569
8570// Convert Double to Signed/Unsigned Doubleword
8571multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8572                           SDNode MaskOpNode, SDNode OpNodeRnd,
8573                           X86SchedWriteWidths sched> {
8574  let Predicates = [HasAVX512] in {
8575    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8576                            MaskOpNode, sched.ZMM>,
8577             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8578                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8579  }
8580  let Predicates = [HasVLX] in {
8581    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8582    // memory forms of these instructions in Asm Parcer. They have the same
8583    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8584    // due to the same reason.
8585    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8586                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8587                               VK2WM>, EVEX_V128;
8588    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8589                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8590  }
8591
8592  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8593                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8594  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8595                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8596                  VK2WM:$mask, VR128X:$src), 0, "att">;
8597  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8598                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8599                  VK2WM:$mask, VR128X:$src), 0, "att">;
8600  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8601                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8602                  f64mem:$src), 0, "att">;
8603  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8604                  "$dst {${mask}}, ${src}{1to2}}",
8605                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8606                  VK2WM:$mask, f64mem:$src), 0, "att">;
8607  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8608                  "$dst {${mask}} {z}, ${src}{1to2}}",
8609                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8610                  VK2WM:$mask, f64mem:$src), 0, "att">;
8611
8612  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8613                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8614  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8615                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8616                  VK4WM:$mask, VR256X:$src), 0, "att">;
8617  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8618                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8619                  VK4WM:$mask, VR256X:$src), 0, "att">;
8620  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8621                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8622                  f64mem:$src), 0, "att">;
8623  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8624                  "$dst {${mask}}, ${src}{1to4}}",
8625                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8626                  VK4WM:$mask, f64mem:$src), 0, "att">;
8627  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8628                  "$dst {${mask}} {z}, ${src}{1to4}}",
8629                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8630                  VK4WM:$mask, f64mem:$src), 0, "att">;
8631}
8632
8633// Convert Double to Signed/Unsigned Quardword
8634multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8635                           SDNode MaskOpNode, SDNode OpNodeRnd,
8636                           X86SchedWriteWidths sched> {
8637  let Predicates = [HasDQI] in {
8638    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8639                            MaskOpNode, sched.ZMM>,
8640             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8641                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8642  }
8643  let Predicates = [HasDQI, HasVLX] in {
8644    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8645                               MaskOpNode, sched.XMM>, EVEX_V128;
8646    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8647                               MaskOpNode, sched.YMM>, EVEX_V256;
8648  }
8649}
8650
8651// Convert Double to Signed/Unsigned Quardword with truncation
8652multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8653                            SDNode MaskOpNode, SDNode OpNodeRnd,
8654                            X86SchedWriteWidths sched> {
8655  let Predicates = [HasDQI] in {
8656    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8657                            MaskOpNode, sched.ZMM>,
8658             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8659                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8660  }
8661  let Predicates = [HasDQI, HasVLX] in {
8662    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8663                               MaskOpNode, sched.XMM>, EVEX_V128;
8664    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8665                               MaskOpNode, sched.YMM>, EVEX_V256;
8666  }
8667}
8668
8669// Convert Signed/Unsigned Quardword to Double
8670multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8671                           SDNode MaskOpNode, SDNode OpNodeRnd,
8672                           X86SchedWriteWidths sched> {
8673  let Predicates = [HasDQI] in {
8674    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8675                            MaskOpNode, sched.ZMM>,
8676             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8677                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8678  }
8679  let Predicates = [HasDQI, HasVLX] in {
8680    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8681                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8682    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8683                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8684  }
8685}
8686
8687// Convert Float to Signed/Unsigned Quardword
8688multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8689                           SDNode MaskOpNode, SDNode OpNodeRnd,
8690                           X86SchedWriteWidths sched> {
8691  let Predicates = [HasDQI] in {
8692    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8693                            MaskOpNode, sched.ZMM>,
8694             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8695                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8696  }
8697  let Predicates = [HasDQI, HasVLX] in {
8698    // Explicitly specified broadcast string, since we take only 2 elements
8699    // from v4f32x_info source
8700    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8701                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8702                               (v2i64 (OpNode (bc_v4f32
8703                                (v2f64
8704                                 (scalar_to_vector (loadf64 addr:$src)))))),
8705                               (v2i64 (MaskOpNode (bc_v4f32
8706                                (v2f64
8707                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8708                               EVEX_V128;
8709    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8710                               MaskOpNode, sched.YMM>, EVEX_V256;
8711  }
8712}
8713
8714// Convert Float to Signed/Unsigned Quardword with truncation
8715multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8716                            SDNode MaskOpNode, SDNode OpNodeRnd,
8717                            X86SchedWriteWidths sched> {
8718  let Predicates = [HasDQI] in {
8719    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8720                            MaskOpNode, sched.ZMM>,
8721             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8722                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8723  }
8724  let Predicates = [HasDQI, HasVLX] in {
8725    // Explicitly specified broadcast string, since we take only 2 elements
8726    // from v4f32x_info source
8727    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8728                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8729                               (v2i64 (OpNode (bc_v4f32
8730                                (v2f64
8731                                 (scalar_to_vector (loadf64 addr:$src)))))),
8732                               (v2i64 (MaskOpNode (bc_v4f32
8733                                (v2f64
8734                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8735                               EVEX_V128;
8736    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8737                               MaskOpNode, sched.YMM>, EVEX_V256;
8738  }
8739}
8740
8741// Convert Signed/Unsigned Quardword to Float
8742// Also Convert Signed/Unsigned Doubleword to Half
8743multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8744                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8745                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8746                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8747                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8748  let Predicates = [prd] in {
8749    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8750                            MaskOpNode, sched.ZMM>,
8751             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8752                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8753  }
8754  let Predicates = [prd, HasVLX] in {
8755    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8756    // memory forms of these instructions in Asm Parcer. They have the same
8757    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8758    // due to the same reason.
8759    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8760                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8761                               "{x}", i128mem, _src.info128.KRCWM>,
8762                               EVEX_V128, NotEVEX2VEXConvertible;
8763    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8764                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8765                               "{y}">, EVEX_V256,
8766                               NotEVEX2VEXConvertible;
8767
8768    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8769    // patterns have been disabled with null_frag.
8770    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8771              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8772    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8773                             _src.info128.KRCWM:$mask),
8774              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8775    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8776                             _src.info128.KRCWM:$mask),
8777              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8778
8779    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8780              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8781    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8782                             _src.info128.KRCWM:$mask),
8783              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8784    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8785                             _src.info128.KRCWM:$mask),
8786              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8787
8788    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8789              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8790    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8791                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8792              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8793    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8794                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8795              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8796  }
8797
8798  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8799                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8800                  VR128X:$src), 0, "att">;
8801  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8802                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8803                  VK2WM:$mask, VR128X:$src), 0, "att">;
8804  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8805                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8806                  VK2WM:$mask, VR128X:$src), 0, "att">;
8807  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8808                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8809                  i64mem:$src), 0, "att">;
8810  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8811                  "$dst {${mask}}, ${src}{1to2}}",
8812                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8813                  VK2WM:$mask, i64mem:$src), 0, "att">;
8814  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8815                  "$dst {${mask}} {z}, ${src}{1to2}}",
8816                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8817                  VK2WM:$mask, i64mem:$src), 0, "att">;
8818
8819  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8820                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8821                  VR256X:$src), 0, "att">;
8822  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8823                  "$dst {${mask}}, $src}",
8824                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8825                  VK4WM:$mask, VR256X:$src), 0, "att">;
8826  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8827                  "$dst {${mask}} {z}, $src}",
8828                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8829                  VK4WM:$mask, VR256X:$src), 0, "att">;
8830  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8831                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8832                  i64mem:$src), 0, "att">;
8833  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8834                  "$dst {${mask}}, ${src}{1to4}}",
8835                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8836                  VK4WM:$mask, i64mem:$src), 0, "att">;
8837  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8838                  "$dst {${mask}} {z}, ${src}{1to4}}",
8839                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8840                  VK4WM:$mask, i64mem:$src), 0, "att">;
8841}
8842
8843defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8844                                 X86any_VSintToFP, X86VSintToFP,
8845                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8846
8847defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8848                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8849                                PS, EVEX_CD8<32, CD8VF>;
8850
8851defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8852                                 X86cvttp2si, X86cvttp2siSAE,
8853                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8854
8855defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8856                                 X86cvttp2si, X86cvttp2siSAE,
8857                                 SchedWriteCvtPD2DQ>,
8858                                 PD, REX_W, EVEX_CD8<64, CD8VF>;
8859
8860defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8861                                 X86cvttp2ui, X86cvttp2uiSAE,
8862                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8863
8864defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8865                                 X86cvttp2ui, X86cvttp2uiSAE,
8866                                 SchedWriteCvtPD2DQ>,
8867                                 PS, REX_W, EVEX_CD8<64, CD8VF>;
8868
8869defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8870                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8871                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8872
8873defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8874                                 uint_to_fp, X86VUintToFpRnd,
8875                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8876
8877defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8878                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8879                                 EVEX_CD8<32, CD8VF>;
8880
8881defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8882                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8883                                 REX_W, EVEX_CD8<64, CD8VF>;
8884
8885defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8886                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8887                                 PS, EVEX_CD8<32, CD8VF>;
8888
8889defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8890                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8891                                 PS, EVEX_CD8<64, CD8VF>;
8892
8893defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8894                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
8895                                 PD, EVEX_CD8<64, CD8VF>;
8896
8897defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8898                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8899                                 EVEX_CD8<32, CD8VH>;
8900
8901defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8902                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
8903                                 PD, EVEX_CD8<64, CD8VF>;
8904
8905defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8906                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8907                                 EVEX_CD8<32, CD8VH>;
8908
8909defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8910                                 X86cvttp2si, X86cvttp2siSAE,
8911                                 SchedWriteCvtPD2DQ>, REX_W,
8912                                 PD, EVEX_CD8<64, CD8VF>;
8913
8914defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8915                                 X86cvttp2si, X86cvttp2siSAE,
8916                                 SchedWriteCvtPS2DQ>, PD,
8917                                 EVEX_CD8<32, CD8VH>;
8918
8919defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8920                                 X86cvttp2ui, X86cvttp2uiSAE,
8921                                 SchedWriteCvtPD2DQ>, REX_W,
8922                                 PD, EVEX_CD8<64, CD8VF>;
8923
8924defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8925                                 X86cvttp2ui, X86cvttp2uiSAE,
8926                                 SchedWriteCvtPS2DQ>, PD,
8927                                 EVEX_CD8<32, CD8VH>;
8928
8929defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8930                            sint_to_fp, X86VSintToFpRnd,
8931                            SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>;
8932
8933defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8934                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8935                            REX_W, XS, EVEX_CD8<64, CD8VF>;
8936
8937defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8938                            X86any_VSintToFP, X86VMSintToFP,
8939                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8940                            SchedWriteCvtDQ2PS, HasFP16>,
8941                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
8942
8943defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8944                            X86any_VUintToFP, X86VMUintToFP,
8945                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8946                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8947                            EVEX_CD8<32, CD8VF>;
8948
8949defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8950                            X86any_VSintToFP, X86VMSintToFP,
8951                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8952                            SchedWriteCvtDQ2PS>, REX_W, PS,
8953                            EVEX_CD8<64, CD8VF>;
8954
8955defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8956                            X86any_VUintToFP, X86VMUintToFP,
8957                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8958                            SchedWriteCvtDQ2PS>, REX_W, XD,
8959                            EVEX_CD8<64, CD8VF>;
8960
8961let Predicates = [HasVLX] in {
8962  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8963  // patterns have been disabled with null_frag.
8964  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8965            (VCVTPD2DQZ128rr VR128X:$src)>;
8966  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8967                          VK2WM:$mask),
8968            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8969  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8970                          VK2WM:$mask),
8971            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8972
8973  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8974            (VCVTPD2DQZ128rm addr:$src)>;
8975  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8976                          VK2WM:$mask),
8977            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8978  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8979                          VK2WM:$mask),
8980            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8981
8982  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8983            (VCVTPD2DQZ128rmb addr:$src)>;
8984  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8985                          (v4i32 VR128X:$src0), VK2WM:$mask),
8986            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8987  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8988                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8989            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8990
8991  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8992  // patterns have been disabled with null_frag.
8993  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8994            (VCVTTPD2DQZ128rr VR128X:$src)>;
8995  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8996                          VK2WM:$mask),
8997            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8998  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8999                          VK2WM:$mask),
9000            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9001
9002  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
9003            (VCVTTPD2DQZ128rm addr:$src)>;
9004  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9005                          VK2WM:$mask),
9006            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9007  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9008                          VK2WM:$mask),
9009            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
9010
9011  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
9012            (VCVTTPD2DQZ128rmb addr:$src)>;
9013  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9014                          (v4i32 VR128X:$src0), VK2WM:$mask),
9015            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9016  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
9017                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9018            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
9019
9020  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9021  // patterns have been disabled with null_frag.
9022  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
9023            (VCVTPD2UDQZ128rr VR128X:$src)>;
9024  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9025                           VK2WM:$mask),
9026            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9027  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9028                           VK2WM:$mask),
9029            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9030
9031  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
9032            (VCVTPD2UDQZ128rm addr:$src)>;
9033  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9034                           VK2WM:$mask),
9035            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9036  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9037                           VK2WM:$mask),
9038            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9039
9040  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
9041            (VCVTPD2UDQZ128rmb addr:$src)>;
9042  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9043                           (v4i32 VR128X:$src0), VK2WM:$mask),
9044            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9045  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
9046                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9047            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9048
9049  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
9050  // patterns have been disabled with null_frag.
9051  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
9052            (VCVTTPD2UDQZ128rr VR128X:$src)>;
9053  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
9054                          VK2WM:$mask),
9055            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
9056  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
9057                          VK2WM:$mask),
9058            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9059
9060  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9061            (VCVTTPD2UDQZ128rm addr:$src)>;
9062  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9063                          VK2WM:$mask),
9064            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9065  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9066                          VK2WM:$mask),
9067            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9068
9069  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9070            (VCVTTPD2UDQZ128rmb addr:$src)>;
9071  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9072                          (v4i32 VR128X:$src0), VK2WM:$mask),
9073            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9074  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9075                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9076            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9077}
9078
9079let Predicates = [HasDQI, HasVLX] in {
9080  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9081            (VCVTPS2QQZ128rm addr:$src)>;
9082  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9083                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9084                                 VR128X:$src0)),
9085            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9086  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9087                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9088                                 v2i64x_info.ImmAllZerosV)),
9089            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9090
9091  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9092            (VCVTPS2UQQZ128rm addr:$src)>;
9093  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9094                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9095                                 VR128X:$src0)),
9096            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9097  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9098                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9099                                 v2i64x_info.ImmAllZerosV)),
9100            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9101
9102  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9103            (VCVTTPS2QQZ128rm addr:$src)>;
9104  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9105                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9106                                 VR128X:$src0)),
9107            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9108  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9109                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9110                                 v2i64x_info.ImmAllZerosV)),
9111            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9112
9113  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9114            (VCVTTPS2UQQZ128rm addr:$src)>;
9115  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9116                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9117                                 VR128X:$src0)),
9118            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9119  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9120                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9121                                 v2i64x_info.ImmAllZerosV)),
9122            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9123}
9124
9125let Predicates = [HasVLX] in {
9126  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9127            (VCVTDQ2PDZ128rm addr:$src)>;
9128  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9129                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9130                                 VR128X:$src0)),
9131            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9132  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9133                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9134                                 v2f64x_info.ImmAllZerosV)),
9135            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9136
9137  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9138            (VCVTUDQ2PDZ128rm addr:$src)>;
9139  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9140                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9141                                 VR128X:$src0)),
9142            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9143  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9144                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9145                                 v2f64x_info.ImmAllZerosV)),
9146            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9147}
9148
9149//===----------------------------------------------------------------------===//
9150// Half precision conversion instructions
9151//===----------------------------------------------------------------------===//
9152
9153let Uses = [MXCSR], mayRaiseFPException = 1 in
9154multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9155                           X86MemOperand x86memop, dag ld_dag,
9156                           X86FoldableSchedWrite sched> {
9157  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9158                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9159                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9160                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
9161                            T8PD, Sched<[sched]>;
9162  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9163                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9164                            (X86any_cvtph2ps (_src.VT ld_dag)),
9165                            (X86cvtph2ps (_src.VT ld_dag))>,
9166                            T8PD, Sched<[sched.Folded]>;
9167}
9168
9169multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9170                               X86FoldableSchedWrite sched> {
9171  let Uses = [MXCSR] in
9172  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9173                             (ins _src.RC:$src), "vcvtph2ps",
9174                             "{sae}, $src", "$src, {sae}",
9175                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9176                             T8PD, EVEX_B, Sched<[sched]>;
9177}
9178
9179let Predicates = [HasAVX512] in
9180  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9181                                    (load addr:$src), WriteCvtPH2PSZ>,
9182                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9183                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9184
9185let Predicates = [HasVLX] in {
9186  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9187                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9188                       EVEX_CD8<32, CD8VH>;
9189  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9190                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
9191                       WriteCvtPH2PS>, EVEX, EVEX_V128,
9192                       EVEX_CD8<32, CD8VH>;
9193
9194  // Pattern match vcvtph2ps of a scalar i64 load.
9195  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9196              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9197            (VCVTPH2PSZ128rm addr:$src)>;
9198}
9199
9200multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9201                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9202let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9203  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9204             (ins _src.RC:$src1, i32u8imm:$src2),
9205             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9206             [(set _dest.RC:$dst,
9207                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9208             Sched<[RR]>;
9209  let Constraints = "$src0 = $dst" in
9210  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9211             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9212             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9213             [(set _dest.RC:$dst,
9214                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9215                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9216             Sched<[RR]>, EVEX_K;
9217  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9218             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9219             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9220             [(set _dest.RC:$dst,
9221                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9222                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9223             Sched<[RR]>, EVEX_KZ;
9224  let hasSideEffects = 0, mayStore = 1 in {
9225    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9226               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9227               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9228               Sched<[MR]>;
9229    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9230               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9231               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9232                EVEX_K, Sched<[MR]>;
9233  }
9234}
9235}
9236
9237multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9238                               SchedWrite Sched> {
9239  let hasSideEffects = 0, Uses = [MXCSR] in {
9240    def rrb : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9241              (ins _src.RC:$src1, i32u8imm:$src2),
9242              "vcvtps2ph\t{$src2, {sae}, $src1, $dst|$dst, $src1, {sae}, $src2}",
9243              [(set _dest.RC:$dst,
9244                    (X86cvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9245              EVEX_B, Sched<[Sched]>;
9246    let Constraints = "$src0 = $dst" in
9247    def rrbk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9248              (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9249              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}}|$dst {${mask}}, $src1, {sae}, $src2}",
9250              [(set _dest.RC:$dst,
9251                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9252                                  _dest.RC:$src0, _src.KRCWM:$mask))]>,
9253              EVEX_B, Sched<[Sched]>, EVEX_K;
9254    def rrbkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9255              (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9256              "vcvtps2ph\t{$src2, {sae}, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, {sae}, $src2}",
9257              [(set _dest.RC:$dst,
9258                    (X86mcvtps2phSAE (_src.VT _src.RC:$src1), (i32 timm:$src2),
9259                                  _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9260              EVEX_B, Sched<[Sched]>, EVEX_KZ;
9261}
9262}
9263
9264let Predicates = [HasAVX512] in {
9265  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9266                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9267                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9268                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9269
9270  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9271            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9272}
9273
9274let Predicates = [HasVLX] in {
9275  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9276                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9277                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9278  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9279                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9280                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9281
9282  def : Pat<(store (f64 (extractelt
9283                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9284                         (iPTR 0))), addr:$dst),
9285            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9286  def : Pat<(store (i64 (extractelt
9287                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9288                         (iPTR 0))), addr:$dst),
9289            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9290  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9291            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9292}
9293
9294//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9295multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9296                              string OpcodeStr, Domain d,
9297                              X86FoldableSchedWrite sched = WriteFComX> {
9298  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9299  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9300                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9301                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9302}
9303
9304let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9305  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9306                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9307  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9308                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9309  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9310                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9311  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9312                                   AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
9313}
9314
9315let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9316  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9317                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9318                                 EVEX_CD8<32, CD8VT1>;
9319  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9320                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
9321                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9322  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9323                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9324                                 EVEX_CD8<32, CD8VT1>;
9325  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9326                                 "comisd", SSEPackedDouble>, PD, EVEX,
9327                                  VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9328  let isCodeGenOnly = 1 in {
9329    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9330                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9331                          EVEX_CD8<32, CD8VT1>;
9332    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9333                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9334                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9335
9336    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9337                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9338                          EVEX_CD8<32, CD8VT1>;
9339    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9340                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9341                          VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
9342  }
9343}
9344
9345let Defs = [EFLAGS], Predicates = [HasFP16] in {
9346  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9347                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9348                                EVEX_CD8<16, CD8VT1>;
9349  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9350                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9351                                EVEX_CD8<16, CD8VT1>;
9352  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9353                                "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9354                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9355  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9356                                "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9357                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9358  let isCodeGenOnly = 1 in {
9359    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9360                                sse_load_f16, "ucomish", SSEPackedSingle>,
9361                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9362
9363    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9364                                sse_load_f16, "comish", SSEPackedSingle>,
9365                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9366  }
9367}
9368
9369/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9370multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9371                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9372                         Predicate prd = HasAVX512> {
9373  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9374  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9375                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9376                           "$src2, $src1", "$src1, $src2",
9377                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9378                           EVEX_4V, VEX_LIG, Sched<[sched]>;
9379  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9380                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9381                         "$src2, $src1", "$src1, $src2",
9382                         (OpNode (_.VT _.RC:$src1),
9383                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9384                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9385}
9386}
9387
9388defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9389                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9390                               T_MAP6PD;
9391defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9392                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9393                                 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9394let Uses = [MXCSR] in {
9395defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9396                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9397                               T8PD;
9398defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9399                               f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
9400                               T8PD;
9401defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9402                                 SchedWriteFRsqrt.Scl, f32x_info>,
9403                                 EVEX_CD8<32, CD8VT1>, T8PD;
9404defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9405                                 SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
9406                                 EVEX_CD8<64, CD8VT1>, T8PD;
9407}
9408
9409/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9410multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9411                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9412  let ExeDomain = _.ExeDomain in {
9413  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9414                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9415                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9416                         Sched<[sched]>;
9417  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9418                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9419                         (OpNode (_.VT
9420                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9421                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9422  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9423                          (ins _.ScalarMemOp:$src), OpcodeStr,
9424                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9425                          (OpNode (_.VT
9426                            (_.BroadcastLdFrag addr:$src)))>,
9427                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9428  }
9429}
9430
9431multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9432                                X86SchedWriteWidths sched> {
9433  let Uses = [MXCSR] in {
9434  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9435                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9436  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9437                             v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9438  }
9439  let Predicates = [HasFP16] in
9440  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9441                           v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9442
9443  // Define only if AVX512VL feature is present.
9444  let Predicates = [HasVLX], Uses = [MXCSR] in {
9445    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9446                                  OpNode, sched.XMM, v4f32x_info>,
9447                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9448    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9449                                  OpNode, sched.YMM, v8f32x_info>,
9450                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9451    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9452                                  OpNode, sched.XMM, v2f64x_info>,
9453                                  EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
9454    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9455                                  OpNode, sched.YMM, v4f64x_info>,
9456                                  EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
9457  }
9458  let Predicates = [HasFP16, HasVLX] in {
9459    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9460                                OpNode, sched.XMM, v8f16x_info>,
9461                                EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9462    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9463                                OpNode, sched.YMM, v16f16x_info>,
9464                                EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9465  }
9466}
9467
9468defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9469defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9470
9471/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9472multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9473                         SDNode OpNode, SDNode OpNodeSAE,
9474                         X86FoldableSchedWrite sched> {
9475  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9476  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9477                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9478                           "$src2, $src1", "$src1, $src2",
9479                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9480                           Sched<[sched]>, SIMD_EXC;
9481
9482  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9483                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9484                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9485                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9486                            EVEX_B, Sched<[sched]>;
9487
9488  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9489                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9490                         "$src2, $src1", "$src1, $src2",
9491                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9492                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9493  }
9494}
9495
9496multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9497                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9498  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9499                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9500  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9501                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
9502}
9503
9504multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9505                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9506  let Predicates = [HasFP16] in
9507  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9508               EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9509}
9510
9511let Predicates = [HasERI] in {
9512  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9513                               SchedWriteFRcp.Scl>;
9514  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9515                               SchedWriteFRsqrt.Scl>;
9516}
9517
9518defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9519                              SchedWriteFRnd.Scl>,
9520                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9521                                  SchedWriteFRnd.Scl>;
9522/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9523
9524multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9525                         SDNode OpNode, X86FoldableSchedWrite sched> {
9526  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9527  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9528                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9529                         (OpNode (_.VT _.RC:$src))>,
9530                         Sched<[sched]>;
9531
9532  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9533                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9534                         (OpNode (_.VT
9535                             (bitconvert (_.LdFrag addr:$src))))>,
9536                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9537
9538  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9539                         (ins _.ScalarMemOp:$src), OpcodeStr,
9540                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9541                         (OpNode (_.VT
9542                                  (_.BroadcastLdFrag addr:$src)))>,
9543                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9544  }
9545}
9546multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9547                         SDNode OpNode, X86FoldableSchedWrite sched> {
9548  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9549  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9550                        (ins _.RC:$src), OpcodeStr,
9551                        "{sae}, $src", "$src, {sae}",
9552                        (OpNode (_.VT _.RC:$src))>,
9553                        EVEX_B, Sched<[sched]>;
9554}
9555
9556multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9557                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9558   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9559              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9560              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9561   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9562              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9563              T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
9564}
9565
9566multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9567                                  SDNode OpNode, X86SchedWriteWidths sched> {
9568  // Define only if AVX512VL feature is present.
9569  let Predicates = [HasVLX] in {
9570    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9571                                sched.XMM>,
9572                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9573    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9574                                sched.YMM>,
9575                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9576    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9577                                sched.XMM>,
9578                                EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9579    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9580                                sched.YMM>,
9581                                EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
9582  }
9583}
9584
9585multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9586                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9587  let Predicates = [HasFP16] in
9588  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9589              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9590              T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9591  let Predicates = [HasFP16, HasVLX] in {
9592    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9593                                     EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9594    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9595                                     EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9596  }
9597}
9598let Predicates = [HasERI] in {
9599 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9600                            SchedWriteFRsqrt>, EVEX;
9601 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9602                            SchedWriteFRcp>, EVEX;
9603 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9604                            SchedWriteFAdd>, EVEX;
9605}
9606defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9607                            SchedWriteFRnd>,
9608                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9609                                     SchedWriteFRnd>,
9610                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9611                                          SchedWriteFRnd>, EVEX;
9612
9613multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9614                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9615  let ExeDomain = _.ExeDomain in
9616  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9617                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9618                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9619                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9620}
9621
9622multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9623                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9624  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9625  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9626                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9627                         (_.VT (any_fsqrt _.RC:$src)),
9628                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9629                         Sched<[sched]>;
9630  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9631                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9632                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9633                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9634                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9635  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9636                          (ins _.ScalarMemOp:$src), OpcodeStr,
9637                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9638                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9639                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9640                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9641  }
9642}
9643
9644let Uses = [MXCSR], mayRaiseFPException = 1 in
9645multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9646                                  X86SchedWriteSizes sched> {
9647  let Predicates = [HasFP16] in
9648  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9649                                sched.PH.ZMM, v32f16_info>,
9650                                EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9651  let Predicates = [HasFP16, HasVLX] in {
9652    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9653                                     sched.PH.XMM, v8f16x_info>,
9654                                     EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9655    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9656                                     sched.PH.YMM, v16f16x_info>,
9657                                     EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9658  }
9659  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9660                                sched.PS.ZMM, v16f32_info>,
9661                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9662  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9663                                sched.PD.ZMM, v8f64_info>,
9664                                EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9665  // Define only if AVX512VL feature is present.
9666  let Predicates = [HasVLX] in {
9667    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9668                                     sched.PS.XMM, v4f32x_info>,
9669                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9670    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9671                                     sched.PS.YMM, v8f32x_info>,
9672                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9673    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9674                                     sched.PD.XMM, v2f64x_info>,
9675                                     EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>;
9676    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9677                                     sched.PD.YMM, v4f64x_info>,
9678                                     EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>;
9679  }
9680}
9681
9682let Uses = [MXCSR] in
9683multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9684                                        X86SchedWriteSizes sched> {
9685  let Predicates = [HasFP16] in
9686  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9687                                      sched.PH.ZMM, v32f16_info>,
9688                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9689  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9690                                      sched.PS.ZMM, v16f32_info>,
9691                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9692  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9693                                      sched.PD.ZMM, v8f64_info>,
9694                                      EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
9695}
9696
9697multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9698                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9699  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9700    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9701                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9702                         "$src2, $src1", "$src1, $src2",
9703                         (X86fsqrts (_.VT _.RC:$src1),
9704                                    (_.VT _.RC:$src2))>,
9705                         Sched<[sched]>, SIMD_EXC;
9706    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9707                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9708                         "$src2, $src1", "$src1, $src2",
9709                         (X86fsqrts (_.VT _.RC:$src1),
9710                                    (_.ScalarIntMemFrags addr:$src2))>,
9711                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9712    let Uses = [MXCSR] in
9713    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9714                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9715                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9716                         (X86fsqrtRnds (_.VT _.RC:$src1),
9717                                     (_.VT _.RC:$src2),
9718                                     (i32 timm:$rc))>,
9719                         EVEX_B, EVEX_RC, Sched<[sched]>;
9720
9721    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9722      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9723                (ins _.FRC:$src1, _.FRC:$src2),
9724                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9725                Sched<[sched]>, SIMD_EXC;
9726      let mayLoad = 1 in
9727        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9728                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9729                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9730                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9731    }
9732  }
9733
9734  let Predicates = [prd] in {
9735    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9736              (!cast<Instruction>(Name#Zr)
9737                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9738  }
9739
9740  let Predicates = [prd, OptForSize] in {
9741    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9742              (!cast<Instruction>(Name#Zm)
9743                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9744  }
9745}
9746
9747multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9748                                  X86SchedWriteSizes sched> {
9749  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9750                        EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9751  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9752                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9753  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9754                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W;
9755}
9756
9757defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9758             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9759
9760defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9761
9762multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9763                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9764  let ExeDomain = _.ExeDomain in {
9765  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9766                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9767                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9768                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9769                           (i32 timm:$src3)))>,
9770                           Sched<[sched]>, SIMD_EXC;
9771
9772  let Uses = [MXCSR] in
9773  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9774                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9775                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9776                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9777                         (i32 timm:$src3)))>, EVEX_B,
9778                         Sched<[sched]>;
9779
9780  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9781                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9782                         OpcodeStr,
9783                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9784                         (_.VT (X86RndScales _.RC:$src1,
9785                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9786                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9787
9788  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9789    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9790               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9791               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9792               []>, Sched<[sched]>, SIMD_EXC;
9793
9794    let mayLoad = 1 in
9795      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9796                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9797                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9798                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9799  }
9800  }
9801
9802  let Predicates = [HasAVX512] in {
9803    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9804              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9805               _.FRC:$src1, timm:$src2))>;
9806  }
9807
9808  let Predicates = [HasAVX512, OptForSize] in {
9809    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9810              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9811               addr:$src1, timm:$src2))>;
9812  }
9813}
9814
9815let Predicates = [HasFP16] in
9816defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9817                                           SchedWriteFRnd.Scl, f16x_info>,
9818                                           AVX512PSIi8Base, TA, EVEX_4V,
9819                                           EVEX_CD8<16, CD8VT1>;
9820
9821defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9822                                           SchedWriteFRnd.Scl, f32x_info>,
9823                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9824                                           EVEX_CD8<32, CD8VT1>;
9825
9826defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9827                                           SchedWriteFRnd.Scl, f64x_info>,
9828                                           REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9829                                           EVEX_CD8<64, CD8VT1>;
9830
9831multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9832                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9833                                dag OutMask, Predicate BasePredicate> {
9834  let Predicates = [BasePredicate] in {
9835    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9836               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9837               (extractelt _.VT:$dst, (iPTR 0))))),
9838              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9839               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9840
9841    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9842               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9843               ZeroFP))),
9844              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9845               OutMask, _.VT:$src2, _.VT:$src1)>;
9846  }
9847}
9848
9849defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9850                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9851                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9852defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9853                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9854                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9855defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9856                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9857                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9858
9859
9860//-------------------------------------------------
9861// Integer truncate and extend operations
9862//-------------------------------------------------
9863
9864// PatFrags that contain a select and a truncate op. The take operands in the
9865// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9866// either to the multiclasses.
9867def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9868                           (vselect_mask node:$mask,
9869                                         (trunc node:$src), node:$src0)>;
9870def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9871                            (vselect_mask node:$mask,
9872                                          (X86vtruncs node:$src), node:$src0)>;
9873def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9874                             (vselect_mask node:$mask,
9875                                           (X86vtruncus node:$src), node:$src0)>;
9876
9877multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9878                              SDPatternOperator MaskNode,
9879                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9880                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9881  let ExeDomain = DestInfo.ExeDomain in {
9882  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9883             (ins SrcInfo.RC:$src),
9884             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9885             [(set DestInfo.RC:$dst,
9886                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9887             EVEX, Sched<[sched]>;
9888  let Constraints = "$src0 = $dst" in
9889  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9890             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9891             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9892             [(set DestInfo.RC:$dst,
9893                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9894                             (DestInfo.VT DestInfo.RC:$src0),
9895                             SrcInfo.KRCWM:$mask))]>,
9896             EVEX, EVEX_K, Sched<[sched]>;
9897  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9898             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9899             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9900             [(set DestInfo.RC:$dst,
9901                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9902                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9903             EVEX, EVEX_KZ, Sched<[sched]>;
9904  }
9905
9906  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9907    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9908               (ins x86memop:$dst, SrcInfo.RC:$src),
9909               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9910               EVEX, Sched<[sched.Folded]>;
9911
9912    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9913               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9914               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9915               EVEX, EVEX_K, Sched<[sched.Folded]>;
9916  }//mayStore = 1, hasSideEffects = 0
9917}
9918
9919multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9920                                    PatFrag truncFrag, PatFrag mtruncFrag,
9921                                    string Name> {
9922
9923  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9924            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9925                                    addr:$dst, SrcInfo.RC:$src)>;
9926
9927  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9928                        SrcInfo.KRCWM:$mask),
9929            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9930                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9931}
9932
9933multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9934                        SDNode OpNode256, SDNode OpNode512,
9935                        SDPatternOperator MaskNode128,
9936                        SDPatternOperator MaskNode256,
9937                        SDPatternOperator MaskNode512,
9938                        X86SchedWriteWidths sched,
9939                        AVX512VLVectorVTInfo VTSrcInfo,
9940                        X86VectorVTInfo DestInfoZ128,
9941                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9942                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9943                        X86MemOperand x86memopZ, PatFrag truncFrag,
9944                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9945
9946  let Predicates = [HasVLX, prd] in {
9947    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched.XMM,
9948                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9949                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9950                                         mtruncFrag, NAME>, EVEX_V128;
9951
9952    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched.YMM,
9953                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9954                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9955                                         mtruncFrag, NAME>, EVEX_V256;
9956  }
9957  let Predicates = [prd] in
9958    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched.ZMM,
9959                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9960                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9961                                         mtruncFrag, NAME>, EVEX_V512;
9962}
9963
9964multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9965                           X86SchedWriteWidths sched, PatFrag StoreNode,
9966                           PatFrag MaskedStoreNode, SDNode InVecNode,
9967                           SDPatternOperator InVecMaskNode> {
9968  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9969                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9970                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9971                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9972                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9973}
9974
9975multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9976                           SDPatternOperator MaskNode,
9977                           X86SchedWriteWidths sched, PatFrag StoreNode,
9978                           PatFrag MaskedStoreNode, SDNode InVecNode,
9979                           SDPatternOperator InVecMaskNode> {
9980  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9981                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9982                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9983                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9984                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9985}
9986
9987multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9988                           SDPatternOperator MaskNode,
9989                           X86SchedWriteWidths sched, PatFrag StoreNode,
9990                           PatFrag MaskedStoreNode, SDNode InVecNode,
9991                           SDPatternOperator InVecMaskNode> {
9992  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9993                          InVecMaskNode, MaskNode, MaskNode, sched,
9994                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9995                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9996                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9997}
9998
9999multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
10000                           SDPatternOperator MaskNode,
10001                           X86SchedWriteWidths sched, PatFrag StoreNode,
10002                           PatFrag MaskedStoreNode, SDNode InVecNode,
10003                           SDPatternOperator InVecMaskNode> {
10004  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
10005                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
10006                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
10007                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
10008                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
10009}
10010
10011multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10012                           SDPatternOperator MaskNode,
10013                           X86SchedWriteWidths sched, PatFrag StoreNode,
10014                           PatFrag MaskedStoreNode, SDNode InVecNode,
10015                           SDPatternOperator InVecMaskNode> {
10016  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10017                          InVecMaskNode, MaskNode, MaskNode, sched,
10018                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
10019                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
10020                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
10021}
10022
10023multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10024                           SDPatternOperator MaskNode,
10025                           X86SchedWriteWidths sched, PatFrag StoreNode,
10026                           PatFrag MaskedStoreNode, SDNode InVecNode,
10027                           SDPatternOperator InVecMaskNode> {
10028  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
10029                          InVecMaskNode, MaskNode, MaskNode, sched,
10030                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
10031                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
10032                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
10033}
10034
10035defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
10036                                  SchedWriteVecTruncate, truncstorevi8,
10037                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10038defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
10039                                  SchedWriteVecTruncate, truncstore_s_vi8,
10040                                  masked_truncstore_s_vi8, X86vtruncs,
10041                                  X86vmtruncs>;
10042defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
10043                                  SchedWriteVecTruncate, truncstore_us_vi8,
10044                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
10045
10046defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
10047                                  SchedWriteVecTruncate, truncstorevi16,
10048                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10049defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
10050                                  SchedWriteVecTruncate, truncstore_s_vi16,
10051                                  masked_truncstore_s_vi16, X86vtruncs,
10052                                  X86vmtruncs>;
10053defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
10054                                  select_truncus, SchedWriteVecTruncate,
10055                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10056                                  X86vtruncus, X86vmtruncus>;
10057
10058defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
10059                                  SchedWriteVecTruncate, truncstorevi32,
10060                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
10061defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
10062                                  SchedWriteVecTruncate, truncstore_s_vi32,
10063                                  masked_truncstore_s_vi32, X86vtruncs,
10064                                  X86vmtruncs>;
10065defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
10066                                  select_truncus, SchedWriteVecTruncate,
10067                                  truncstore_us_vi32, masked_truncstore_us_vi32,
10068                                  X86vtruncus, X86vmtruncus>;
10069
10070defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
10071                                  SchedWriteVecTruncate, truncstorevi8,
10072                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
10073defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
10074                                  SchedWriteVecTruncate, truncstore_s_vi8,
10075                                  masked_truncstore_s_vi8, X86vtruncs,
10076                                  X86vmtruncs>;
10077defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10078                                  select_truncus, SchedWriteVecTruncate,
10079                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10080                                  X86vtruncus, X86vmtruncus>;
10081
10082defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10083                                  SchedWriteVecTruncate, truncstorevi16,
10084                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10085defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10086                                  SchedWriteVecTruncate, truncstore_s_vi16,
10087                                  masked_truncstore_s_vi16, X86vtruncs,
10088                                  X86vmtruncs>;
10089defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10090                                  select_truncus, SchedWriteVecTruncate,
10091                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10092                                  X86vtruncus, X86vmtruncus>;
10093
10094defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10095                                  SchedWriteVecTruncate, truncstorevi8,
10096                                  masked_truncstorevi8, X86vtrunc,
10097                                  X86vmtrunc>;
10098defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10099                                  SchedWriteVecTruncate, truncstore_s_vi8,
10100                                  masked_truncstore_s_vi8, X86vtruncs,
10101                                  X86vmtruncs>;
10102defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10103                                  select_truncus, SchedWriteVecTruncate,
10104                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10105                                  X86vtruncus, X86vmtruncus>;
10106
10107let Predicates = [HasAVX512, NoVLX] in {
10108def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10109         (v8i16 (EXTRACT_SUBREG
10110                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10111                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
10112def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10113         (v4i32 (EXTRACT_SUBREG
10114                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10115                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10116}
10117
10118let Predicates = [HasBWI, NoVLX] in {
10119def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10120         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10121                                            VR256X:$src, sub_ymm))), sub_xmm))>;
10122}
10123
10124// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10125multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10126                           X86VectorVTInfo DestInfo,
10127                           X86VectorVTInfo SrcInfo> {
10128  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10129                                 DestInfo.RC:$src0,
10130                                 SrcInfo.KRCWM:$mask)),
10131            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10132                                                 SrcInfo.KRCWM:$mask,
10133                                                 SrcInfo.RC:$src)>;
10134
10135  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10136                                 DestInfo.ImmAllZerosV,
10137                                 SrcInfo.KRCWM:$mask)),
10138            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10139                                                  SrcInfo.RC:$src)>;
10140}
10141
10142let Predicates = [HasVLX] in {
10143defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10144defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10145defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10146}
10147
10148let Predicates = [HasAVX512] in {
10149defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10150defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10151defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10152
10153defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10154defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10155defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10156
10157defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10158defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10159defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10160}
10161
10162multiclass avx512_pmovx_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10163              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10164              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10165  let ExeDomain = DestInfo.ExeDomain in {
10166  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10167                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10168                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10169                  EVEX, Sched<[sched]>;
10170
10171  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10172                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10173                  (DestInfo.VT (LdFrag addr:$src))>,
10174                EVEX, Sched<[sched.Folded]>;
10175  }
10176}
10177
10178multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
10179          SDNode OpNode, SDNode InVecNode, string ExtTy,
10180          X86SchedWriteWidths sched,
10181          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10182  let Predicates = [HasVLX, HasBWI] in {
10183    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
10184                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10185                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG;
10186
10187    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
10188                    v16i8x_info, i128mem, LdFrag, OpNode>,
10189                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG;
10190  }
10191  let Predicates = [HasBWI] in {
10192    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
10193                    v32i8x_info, i256mem, LdFrag, OpNode>,
10194                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG;
10195  }
10196}
10197
10198multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
10199          SDNode OpNode, SDNode InVecNode, string ExtTy,
10200          X86SchedWriteWidths sched,
10201          PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10202  let Predicates = [HasVLX, HasAVX512] in {
10203    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10204                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10205                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG;
10206
10207    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10208                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10209                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG;
10210  }
10211  let Predicates = [HasAVX512] in {
10212    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10213                   v16i8x_info, i128mem, LdFrag, OpNode>,
10214                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG;
10215  }
10216}
10217
10218multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
10219                              SDNode InVecNode, string ExtTy,
10220                              X86SchedWriteWidths sched,
10221                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10222  let Predicates = [HasVLX, HasAVX512] in {
10223    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10224                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10225                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG;
10226
10227    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10228                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10229                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG;
10230  }
10231  let Predicates = [HasAVX512] in {
10232    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10233                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10234                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG;
10235  }
10236}
10237
10238multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
10239         SDNode OpNode, SDNode InVecNode, string ExtTy,
10240         X86SchedWriteWidths sched,
10241         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10242  let Predicates = [HasVLX, HasAVX512] in {
10243    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
10244                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10245                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG;
10246
10247    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
10248                   v8i16x_info, i128mem, LdFrag, OpNode>,
10249                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG;
10250  }
10251  let Predicates = [HasAVX512] in {
10252    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
10253                   v16i16x_info, i256mem, LdFrag, OpNode>,
10254                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG;
10255  }
10256}
10257
10258multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
10259         SDNode OpNode, SDNode InVecNode, string ExtTy,
10260         X86SchedWriteWidths sched,
10261         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10262  let Predicates = [HasVLX, HasAVX512] in {
10263    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10264                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10265                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG;
10266
10267    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10268                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10269                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG;
10270  }
10271  let Predicates = [HasAVX512] in {
10272    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10273                   v8i16x_info, i128mem, LdFrag, OpNode>,
10274                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG;
10275  }
10276}
10277
10278multiclass avx512_pmovx_dq<bits<8> opc, string OpcodeStr,
10279         SDNode OpNode, SDNode InVecNode, string ExtTy,
10280         X86SchedWriteWidths sched,
10281         PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10282
10283  let Predicates = [HasVLX, HasAVX512] in {
10284    defm Z128:  avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
10285                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10286                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10287
10288    defm Z256:  avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
10289                   v4i32x_info, i128mem, LdFrag, OpNode>,
10290                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10291  }
10292  let Predicates = [HasAVX512] in {
10293    defm Z   :  avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
10294                   v8i32x_info, i256mem, LdFrag, OpNode>,
10295                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10296  }
10297}
10298
10299defm VPMOVZXBW : avx512_pmovx_bw<0x30, "vpmovzxbw", zext, zext_invec, "z", SchedWriteVecExtend>;
10300defm VPMOVZXBD : avx512_pmovx_bd<0x31, "vpmovzxbd", zext, zext_invec, "z", SchedWriteVecExtend>;
10301defm VPMOVZXBQ : avx512_pmovx_bq<0x32, "vpmovzxbq",       zext_invec, "z", SchedWriteVecExtend>;
10302defm VPMOVZXWD : avx512_pmovx_wd<0x33, "vpmovzxwd", zext, zext_invec, "z", SchedWriteVecExtend>;
10303defm VPMOVZXWQ : avx512_pmovx_wq<0x34, "vpmovzxwq", zext, zext_invec, "z", SchedWriteVecExtend>;
10304defm VPMOVZXDQ : avx512_pmovx_dq<0x35, "vpmovzxdq", zext, zext_invec, "z", SchedWriteVecExtend>;
10305
10306defm VPMOVSXBW: avx512_pmovx_bw<0x20, "vpmovsxbw", sext, sext_invec, "s", SchedWriteVecExtend>;
10307defm VPMOVSXBD: avx512_pmovx_bd<0x21, "vpmovsxbd", sext, sext_invec, "s", SchedWriteVecExtend>;
10308defm VPMOVSXBQ: avx512_pmovx_bq<0x22, "vpmovsxbq",       sext_invec, "s", SchedWriteVecExtend>;
10309defm VPMOVSXWD: avx512_pmovx_wd<0x23, "vpmovsxwd", sext, sext_invec, "s", SchedWriteVecExtend>;
10310defm VPMOVSXWQ: avx512_pmovx_wq<0x24, "vpmovsxwq", sext, sext_invec, "s", SchedWriteVecExtend>;
10311defm VPMOVSXDQ: avx512_pmovx_dq<0x25, "vpmovsxdq", sext, sext_invec, "s", SchedWriteVecExtend>;
10312
10313
10314// Patterns that we also need any extend versions of. aext_vector_inreg
10315// is currently legalized to zext_vector_inreg.
10316multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10317  // 256-bit patterns
10318  let Predicates = [HasVLX, HasBWI] in {
10319    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10320              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10321  }
10322
10323  let Predicates = [HasVLX] in {
10324    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10325              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10326
10327    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10328              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10329  }
10330
10331  // 512-bit patterns
10332  let Predicates = [HasBWI] in {
10333    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10334              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10335  }
10336  let Predicates = [HasAVX512] in {
10337    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10338              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10339    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10340              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10341
10342    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10343              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10344
10345    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10346              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10347  }
10348}
10349
10350multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10351                                 SDNode InVecOp> :
10352    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10353  // 128-bit patterns
10354  let Predicates = [HasVLX, HasBWI] in {
10355  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10356            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10357  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10358            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10359  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10360            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10361  }
10362  let Predicates = [HasVLX] in {
10363  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10364            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10365  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10366            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10367
10368  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10369            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10370
10371  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10372            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10373  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10374            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10375  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10376            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10377
10378  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10379            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10380  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10381            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10382
10383  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10384            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10385  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10386            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10387  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10388            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10389  }
10390  let Predicates = [HasVLX] in {
10391  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10392            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10393  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10394            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10395  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10396            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10397
10398  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10399            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10400  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10401            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10402
10403  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10404            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10405  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10406            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10407  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10408            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10409  }
10410  // 512-bit patterns
10411  let Predicates = [HasAVX512] in {
10412  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10413            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10414  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10415            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10416  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10417            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10418  }
10419}
10420
10421defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10422defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10423
10424// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10425// ext+trunc aggressively making it impossible to legalize the DAG to this
10426// pattern directly.
10427let Predicates = [HasAVX512, NoBWI] in {
10428def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10429         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10430def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10431         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10432}
10433
10434//===----------------------------------------------------------------------===//
10435// GATHER - SCATTER Operations
10436
10437// FIXME: Improve scheduling of gather/scatter instructions.
10438multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10439                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10440  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10441      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10442  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10443            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10444            !strconcat(OpcodeStr#_.Suffix,
10445            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10446            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10447            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10448}
10449
10450multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10451                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10452  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10453                                      vy512xmem>, EVEX_V512, REX_W;
10454  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10455                                      vz512mem>, EVEX_V512, REX_W;
10456let Predicates = [HasVLX] in {
10457  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10458                              vx256xmem>, EVEX_V256, REX_W;
10459  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10460                              vy256xmem>, EVEX_V256, REX_W;
10461  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10462                              vx128xmem>, EVEX_V128, REX_W;
10463  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10464                              vx128xmem>, EVEX_V128, REX_W;
10465}
10466}
10467
10468multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10469                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10470  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10471                                       EVEX_V512;
10472  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10473                                       EVEX_V512;
10474let Predicates = [HasVLX] in {
10475  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10476                                          vy256xmem>, EVEX_V256;
10477  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10478                                          vy128xmem>, EVEX_V256;
10479  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10480                                          vx128xmem>, EVEX_V128;
10481  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10482                                          vx64xmem, VK2WM>, EVEX_V128;
10483}
10484}
10485
10486
10487defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10488               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10489
10490defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10491                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10492
10493multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10494                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10495
10496let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10497    hasSideEffects = 0 in
10498
10499  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10500            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10501            !strconcat(OpcodeStr#_.Suffix,
10502            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10503            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10504            Sched<[WriteStore]>;
10505}
10506
10507multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10508                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10509  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10510                                      vy512xmem>, EVEX_V512, REX_W;
10511  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10512                                      vz512mem>, EVEX_V512, REX_W;
10513let Predicates = [HasVLX] in {
10514  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10515                              vx256xmem>, EVEX_V256, REX_W;
10516  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10517                              vy256xmem>, EVEX_V256, REX_W;
10518  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10519                              vx128xmem>, EVEX_V128, REX_W;
10520  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10521                              vx128xmem>, EVEX_V128, REX_W;
10522}
10523}
10524
10525multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10526                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10527  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10528                                       EVEX_V512;
10529  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10530                                       EVEX_V512;
10531let Predicates = [HasVLX] in {
10532  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10533                                          vy256xmem>, EVEX_V256;
10534  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10535                                          vy128xmem>, EVEX_V256;
10536  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10537                                          vx128xmem>, EVEX_V128;
10538  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10539                                          vx64xmem, VK2WM>, EVEX_V128;
10540}
10541}
10542
10543defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10544               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10545
10546defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10547                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10548
10549// prefetch
10550multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10551                       RegisterClass KRC, X86MemOperand memop> {
10552  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10553  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10554            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10555            EVEX, EVEX_K, Sched<[WriteLoad]>;
10556}
10557
10558defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10559                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10560
10561defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10562                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10563
10564defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10565                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10566
10567defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10568                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10569
10570defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10571                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10572
10573defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10574                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10575
10576defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10577                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10578
10579defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10580                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10581
10582defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10583                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10584
10585defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10586                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10587
10588defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10589                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10590
10591defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10592                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10593
10594defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10595                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10596
10597defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10598                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10599
10600defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10601                     VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10602
10603defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10604                     VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
10605
10606multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
10607def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10608                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10609                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10610                  EVEX, Sched<[Sched]>;
10611}
10612
10613multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10614                                 string OpcodeStr, Predicate prd> {
10615let Predicates = [prd] in
10616  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr, WriteVecMoveZ>, EVEX_V512;
10617
10618  let Predicates = [prd, HasVLX] in {
10619    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr, WriteVecMoveY>, EVEX_V256;
10620    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr, WriteVecMoveX>, EVEX_V128;
10621  }
10622}
10623
10624defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10625defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
10626defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10627defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
10628
10629multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10630    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10631                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10632                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10633                        EVEX, Sched<[WriteMove]>;
10634}
10635
10636// Use 512bit version to implement 128/256 bit in case NoVLX.
10637multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10638                                           X86VectorVTInfo _,
10639                                           string Name> {
10640
10641  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10642            (_.KVT (COPY_TO_REGCLASS
10643                     (!cast<Instruction>(Name#"Zrr")
10644                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10645                                      _.RC:$src, _.SubRegIdx)),
10646                   _.KRC))>;
10647}
10648
10649multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10650                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10651  let Predicates = [prd] in
10652    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10653                                            EVEX_V512;
10654
10655  let Predicates = [prd, HasVLX] in {
10656    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10657                                              EVEX_V256;
10658    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10659                                               EVEX_V128;
10660  }
10661  let Predicates = [prd, NoVLX] in {
10662    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10663    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10664  }
10665}
10666
10667defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10668                                              avx512vl_i8_info, HasBWI>;
10669defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10670                                              avx512vl_i16_info, HasBWI>, REX_W;
10671defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10672                                              avx512vl_i32_info, HasDQI>;
10673defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10674                                              avx512vl_i64_info, HasDQI>, REX_W;
10675
10676// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10677// is available, but BWI is not. We can't handle this in lowering because
10678// a target independent DAG combine likes to combine sext and trunc.
10679let Predicates = [HasDQI, NoBWI] in {
10680  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10681            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10682  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10683            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10684}
10685
10686let Predicates = [HasDQI, NoBWI, HasVLX] in {
10687  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10688            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10689}
10690
10691//===----------------------------------------------------------------------===//
10692// AVX-512 - COMPRESS and EXPAND
10693//
10694
10695multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10696                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10697  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10698              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10699              (null_frag)>, AVX5128IBase,
10700              Sched<[sched]>;
10701
10702  let mayStore = 1, hasSideEffects = 0 in
10703  def mr : AVX5128I<opc, MRMDestMem, (outs),
10704              (ins _.MemOp:$dst, _.RC:$src),
10705              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10706              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10707              Sched<[sched.Folded]>;
10708
10709  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10710              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10711              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10712              []>,
10713              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10714              Sched<[sched.Folded]>;
10715}
10716
10717multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10718  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10719            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10720                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10721
10722  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10723            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10724                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10725  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10726            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10727                            _.KRCWM:$mask, _.RC:$src)>;
10728}
10729
10730multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10731                                 X86FoldableSchedWrite sched,
10732                                 AVX512VLVectorVTInfo VTInfo,
10733                                 Predicate Pred = HasAVX512> {
10734  let Predicates = [Pred] in
10735  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10736           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10737
10738  let Predicates = [Pred, HasVLX] in {
10739    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10740                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10741    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10742                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10743  }
10744}
10745
10746// FIXME: Is there a better scheduler class for VPCOMPRESS?
10747defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10748                                          avx512vl_i32_info>, EVEX;
10749defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10750                                          avx512vl_i64_info>, EVEX, REX_W;
10751defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10752                                          avx512vl_f32_info>, EVEX;
10753defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10754                                          avx512vl_f64_info>, EVEX, REX_W;
10755
10756// expand
10757multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10758                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10759  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10760              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10761              (null_frag)>, AVX5128IBase,
10762              Sched<[sched]>;
10763
10764  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10765              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10766              (null_frag)>,
10767            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10768            Sched<[sched.Folded, sched.ReadAfterFold]>;
10769}
10770
10771multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10772
10773  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10774            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10775                                        _.KRCWM:$mask, addr:$src)>;
10776
10777  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10778            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10779                                        _.KRCWM:$mask, addr:$src)>;
10780
10781  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10782                                               (_.VT _.RC:$src0))),
10783            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10784                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10785
10786  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10787            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10788                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10789  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10790            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10791                            _.KRCWM:$mask, _.RC:$src)>;
10792}
10793
10794multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10795                               X86FoldableSchedWrite sched,
10796                               AVX512VLVectorVTInfo VTInfo,
10797                               Predicate Pred = HasAVX512> {
10798  let Predicates = [Pred] in
10799  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10800           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10801
10802  let Predicates = [Pred, HasVLX] in {
10803    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10804                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10805    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10806                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10807  }
10808}
10809
10810// FIXME: Is there a better scheduler class for VPEXPAND?
10811defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10812                                      avx512vl_i32_info>, EVEX;
10813defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10814                                      avx512vl_i64_info>, EVEX, REX_W;
10815defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10816                                      avx512vl_f32_info>, EVEX;
10817defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10818                                      avx512vl_f64_info>, EVEX, REX_W;
10819
10820//handle instruction  reg_vec1 = op(reg_vec,imm)
10821//                               op(mem_vec,imm)
10822//                               op(broadcast(eltVt),imm)
10823//all instruction created with FROUND_CURRENT
10824multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10825                                      SDPatternOperator OpNode,
10826                                      SDPatternOperator MaskOpNode,
10827                                      X86FoldableSchedWrite sched,
10828                                      X86VectorVTInfo _> {
10829  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10830  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10831                      (ins _.RC:$src1, i32u8imm:$src2),
10832                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10833                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10834                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10835                      Sched<[sched]>;
10836  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10837                    (ins _.MemOp:$src1, i32u8imm:$src2),
10838                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10839                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10840                            (i32 timm:$src2)),
10841                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10842                                (i32 timm:$src2))>,
10843                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10844  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10845                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10846                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10847                    "${src1}"#_.BroadcastStr#", $src2",
10848                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10849                            (i32 timm:$src2)),
10850                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10851                                (i32 timm:$src2))>, EVEX_B,
10852                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10853  }
10854}
10855
10856//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10857multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10858                                          SDNode OpNode, X86FoldableSchedWrite sched,
10859                                          X86VectorVTInfo _> {
10860  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10861  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10862                      (ins _.RC:$src1, i32u8imm:$src2),
10863                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10864                      "$src1, {sae}, $src2",
10865                      (OpNode (_.VT _.RC:$src1),
10866                              (i32 timm:$src2))>,
10867                      EVEX_B, Sched<[sched]>;
10868}
10869
10870multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10871            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10872            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10873            Predicate prd>{
10874  let Predicates = [prd] in {
10875    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10876                                           sched.ZMM, _.info512>,
10877                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10878                                               sched.ZMM, _.info512>, EVEX_V512;
10879  }
10880  let Predicates = [prd, HasVLX] in {
10881    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10882                                           sched.XMM, _.info128>, EVEX_V128;
10883    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10884                                           sched.YMM, _.info256>, EVEX_V256;
10885  }
10886}
10887
10888//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10889//                               op(reg_vec2,mem_vec,imm)
10890//                               op(reg_vec2,broadcast(eltVt),imm)
10891//all instruction created with FROUND_CURRENT
10892multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10893                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10894  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10895  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10896                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10897                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10898                      (OpNode (_.VT _.RC:$src1),
10899                              (_.VT _.RC:$src2),
10900                              (i32 timm:$src3))>,
10901                      Sched<[sched]>;
10902  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10903                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10904                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10905                    (OpNode (_.VT _.RC:$src1),
10906                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10907                            (i32 timm:$src3))>,
10908                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10909  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10910                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10911                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10912                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10913                    (OpNode (_.VT _.RC:$src1),
10914                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10915                            (i32 timm:$src3))>, EVEX_B,
10916                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10917  }
10918}
10919
10920//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10921//                               op(reg_vec2,mem_vec,imm)
10922multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10923                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10924                              X86VectorVTInfo SrcInfo>{
10925  let ExeDomain = DestInfo.ExeDomain in {
10926  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10927                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10928                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10929                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10930                               (SrcInfo.VT SrcInfo.RC:$src2),
10931                               (i8 timm:$src3)))>,
10932                  Sched<[sched]>;
10933  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10934                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10935                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10936                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10937                             (SrcInfo.VT (bitconvert
10938                                                (SrcInfo.LdFrag addr:$src2))),
10939                             (i8 timm:$src3)))>,
10940                Sched<[sched.Folded, sched.ReadAfterFold]>;
10941  }
10942}
10943
10944//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10945//                               op(reg_vec2,mem_vec,imm)
10946//                               op(reg_vec2,broadcast(eltVt),imm)
10947multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10948                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10949  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10950
10951  let ExeDomain = _.ExeDomain in
10952  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10953                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10954                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10955                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10956                    (OpNode (_.VT _.RC:$src1),
10957                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10958                            (i8 timm:$src3))>, EVEX_B,
10959                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10960}
10961
10962//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10963//                                      op(reg_vec2,mem_scalar,imm)
10964multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10965                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10966  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10967  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10968                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10969                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10970                      (OpNode (_.VT _.RC:$src1),
10971                              (_.VT _.RC:$src2),
10972                              (i32 timm:$src3))>,
10973                      Sched<[sched]>;
10974  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10975                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10976                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10977                    (OpNode (_.VT _.RC:$src1),
10978                            (_.ScalarIntMemFrags addr:$src2),
10979                            (i32 timm:$src3))>,
10980                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10981  }
10982}
10983
10984//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10985multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10986                                    SDNode OpNode, X86FoldableSchedWrite sched,
10987                                    X86VectorVTInfo _> {
10988  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10989  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10990                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10991                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10992                      "$src1, $src2, {sae}, $src3",
10993                      (OpNode (_.VT _.RC:$src1),
10994                              (_.VT _.RC:$src2),
10995                              (i32 timm:$src3))>,
10996                      EVEX_B, Sched<[sched]>;
10997}
10998
10999//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
11000multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11001                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11002  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
11003  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11004                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
11005                      OpcodeStr, "$src3, {sae}, $src2, $src1",
11006                      "$src1, $src2, {sae}, $src3",
11007                      (OpNode (_.VT _.RC:$src1),
11008                              (_.VT _.RC:$src2),
11009                              (i32 timm:$src3))>,
11010                      EVEX_B, Sched<[sched]>;
11011}
11012
11013multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
11014            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
11015            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
11016  let Predicates = [prd] in {
11017    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11018                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
11019                                  EVEX_V512;
11020
11021  }
11022  let Predicates = [prd, HasVLX] in {
11023    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11024                                  EVEX_V128;
11025    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11026                                  EVEX_V256;
11027  }
11028}
11029
11030multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
11031                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
11032                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
11033  let Predicates = [Pred] in {
11034    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
11035                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
11036  }
11037  let Predicates = [Pred, HasVLX] in {
11038    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
11039                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
11040    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
11041                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
11042  }
11043}
11044
11045multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
11046                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
11047                                  Predicate Pred = HasAVX512> {
11048  let Predicates = [Pred] in {
11049    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
11050                                EVEX_V512;
11051  }
11052  let Predicates = [Pred, HasVLX] in {
11053    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
11054                                EVEX_V128;
11055    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
11056                                EVEX_V256;
11057  }
11058}
11059
11060multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
11061                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
11062                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
11063  let Predicates = [prd] in {
11064     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
11065              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
11066  }
11067}
11068
11069multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
11070                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
11071                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
11072                    X86SchedWriteWidths sched, Predicate prd>{
11073  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
11074                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
11075                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
11076  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
11077                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11078                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
11079  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11080                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11081                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
11082}
11083
11084defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11085                              X86VReduce, X86VReduce, X86VReduceSAE,
11086                              SchedWriteFRnd, HasDQI>;
11087defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11088                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11089                              SchedWriteFRnd, HasAVX512>;
11090defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11091                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
11092                              SchedWriteFRnd, HasAVX512>;
11093
11094defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11095                                                0x50, X86VRange, X86VRangeSAE,
11096                                                SchedWriteFAdd, HasDQI>,
11097      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11098defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11099                                                0x50, X86VRange, X86VRangeSAE,
11100                                                SchedWriteFAdd, HasDQI>,
11101      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11102
11103defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11104      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11105      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11106defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11107      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11108      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11109
11110defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11111      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11112      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11113defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11114      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11115      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11116defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11117      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11118      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11119
11120defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11121      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11122      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
11123defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11124      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11125      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11126defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11127      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11128      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11129
11130multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11131                                          X86FoldableSchedWrite sched,
11132                                          X86VectorVTInfo _,
11133                                          X86VectorVTInfo CastInfo,
11134                                          string EVEX2VEXOvrd> {
11135  let ExeDomain = _.ExeDomain in {
11136  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11137                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11138                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11139                  (_.VT (bitconvert
11140                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11141                                                  (i8 timm:$src3)))))>,
11142                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11143  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11144                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11145                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11146                (_.VT
11147                 (bitconvert
11148                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
11149                                           (CastInfo.LdFrag addr:$src2),
11150                                           (i8 timm:$src3)))))>,
11151                Sched<[sched.Folded, sched.ReadAfterFold]>,
11152                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11153  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11154                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11155                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11156                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11157                    (_.VT
11158                     (bitconvert
11159                      (CastInfo.VT
11160                       (X86Shuf128 _.RC:$src1,
11161                                   (_.BroadcastLdFrag addr:$src2),
11162                                   (i8 timm:$src3)))))>, EVEX_B,
11163                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11164  }
11165}
11166
11167multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11168                                   AVX512VLVectorVTInfo _,
11169                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11170                                   string EVEX2VEXOvrd>{
11171  let Predicates = [HasAVX512] in
11172  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11173                                          _.info512, CastInfo.info512, "">, EVEX_V512;
11174
11175  let Predicates = [HasAVX512, HasVLX] in
11176  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11177                                             _.info256, CastInfo.info256,
11178                                             EVEX2VEXOvrd>, EVEX_V256;
11179}
11180
11181defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11182      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11183defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11184      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11185defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11186      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11187defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11188      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
11189
11190multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11191                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11192  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11193  // instantiation of this class.
11194  let ExeDomain = _.ExeDomain in {
11195  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11196                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11197                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11198                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11199                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11200  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11201                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11202                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11203                (_.VT (X86VAlign _.RC:$src1,
11204                                 (bitconvert (_.LdFrag addr:$src2)),
11205                                 (i8 timm:$src3)))>,
11206                Sched<[sched.Folded, sched.ReadAfterFold]>,
11207                EVEX2VEXOverride<"VPALIGNRrmi">;
11208
11209  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11210                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11211                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11212                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11213                   (X86VAlign _.RC:$src1,
11214                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11215                              (i8 timm:$src3))>, EVEX_B,
11216                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11217  }
11218}
11219
11220multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11221                                AVX512VLVectorVTInfo _> {
11222  let Predicates = [HasAVX512] in {
11223    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11224                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
11225  }
11226  let Predicates = [HasAVX512, HasVLX] in {
11227    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11228                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
11229    // We can't really override the 256-bit version so change it back to unset.
11230    let EVEX2VEXOverride = ? in
11231    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11232                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
11233  }
11234}
11235
11236defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11237                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11238defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11239                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11240                                   REX_W;
11241
11242defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11243                                         SchedWriteShuffle, avx512vl_i8_info,
11244                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11245
11246// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11247// into vpalignr.
11248def ValignqImm32XForm : SDNodeXForm<timm, [{
11249  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11250}]>;
11251def ValignqImm8XForm : SDNodeXForm<timm, [{
11252  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11253}]>;
11254def ValigndImm8XForm : SDNodeXForm<timm, [{
11255  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11256}]>;
11257
11258multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11259                                        X86VectorVTInfo From, X86VectorVTInfo To,
11260                                        SDNodeXForm ImmXForm> {
11261  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11262                                 (bitconvert
11263                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11264                                                   timm:$src3))),
11265                                 To.RC:$src0)),
11266            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11267                                                  To.RC:$src1, To.RC:$src2,
11268                                                  (ImmXForm timm:$src3))>;
11269
11270  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11271                                 (bitconvert
11272                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11273                                                   timm:$src3))),
11274                                 To.ImmAllZerosV)),
11275            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11276                                                   To.RC:$src1, To.RC:$src2,
11277                                                   (ImmXForm timm:$src3))>;
11278
11279  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11280                                 (bitconvert
11281                                  (From.VT (OpNode From.RC:$src1,
11282                                                   (From.LdFrag addr:$src2),
11283                                           timm:$src3))),
11284                                 To.RC:$src0)),
11285            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11286                                                  To.RC:$src1, addr:$src2,
11287                                                  (ImmXForm timm:$src3))>;
11288
11289  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11290                                 (bitconvert
11291                                  (From.VT (OpNode From.RC:$src1,
11292                                                   (From.LdFrag addr:$src2),
11293                                           timm:$src3))),
11294                                 To.ImmAllZerosV)),
11295            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11296                                                   To.RC:$src1, addr:$src2,
11297                                                   (ImmXForm timm:$src3))>;
11298}
11299
11300multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11301                                           X86VectorVTInfo From,
11302                                           X86VectorVTInfo To,
11303                                           SDNodeXForm ImmXForm> :
11304      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11305  def : Pat<(From.VT (OpNode From.RC:$src1,
11306                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11307                             timm:$src3)),
11308            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11309                                                  (ImmXForm timm:$src3))>;
11310
11311  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11312                                 (bitconvert
11313                                  (From.VT (OpNode From.RC:$src1,
11314                                           (bitconvert
11315                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11316                                           timm:$src3))),
11317                                 To.RC:$src0)),
11318            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11319                                                   To.RC:$src1, addr:$src2,
11320                                                   (ImmXForm timm:$src3))>;
11321
11322  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11323                                 (bitconvert
11324                                  (From.VT (OpNode From.RC:$src1,
11325                                           (bitconvert
11326                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11327                                           timm:$src3))),
11328                                 To.ImmAllZerosV)),
11329            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11330                                                    To.RC:$src1, addr:$src2,
11331                                                    (ImmXForm timm:$src3))>;
11332}
11333
11334let Predicates = [HasAVX512] in {
11335  // For 512-bit we lower to the widest element type we can. So we only need
11336  // to handle converting valignq to valignd.
11337  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11338                                         v16i32_info, ValignqImm32XForm>;
11339}
11340
11341let Predicates = [HasVLX] in {
11342  // For 128-bit we lower to the widest element type we can. So we only need
11343  // to handle converting valignq to valignd.
11344  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11345                                         v4i32x_info, ValignqImm32XForm>;
11346  // For 256-bit we lower to the widest element type we can. So we only need
11347  // to handle converting valignq to valignd.
11348  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11349                                         v8i32x_info, ValignqImm32XForm>;
11350}
11351
11352let Predicates = [HasVLX, HasBWI] in {
11353  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11354  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11355                                      v16i8x_info, ValignqImm8XForm>;
11356  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11357                                      v16i8x_info, ValigndImm8XForm>;
11358}
11359
11360defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11361                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11362                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11363
11364multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11365                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11366  let ExeDomain = _.ExeDomain in {
11367  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11368                    (ins _.RC:$src1), OpcodeStr,
11369                    "$src1", "$src1",
11370                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11371                    Sched<[sched]>;
11372
11373  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11374                  (ins _.MemOp:$src1), OpcodeStr,
11375                  "$src1", "$src1",
11376                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11377            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11378            Sched<[sched.Folded]>;
11379  }
11380}
11381
11382multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11383                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11384           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11385  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11386                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11387                  "${src1}"#_.BroadcastStr,
11388                  "${src1}"#_.BroadcastStr,
11389                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11390             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11391             Sched<[sched.Folded]>;
11392}
11393
11394multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11395                              X86SchedWriteWidths sched,
11396                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11397  let Predicates = [prd] in
11398    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11399                             EVEX_V512;
11400
11401  let Predicates = [prd, HasVLX] in {
11402    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11403                              EVEX_V256;
11404    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11405                              EVEX_V128;
11406  }
11407}
11408
11409multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11410                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11411                               Predicate prd> {
11412  let Predicates = [prd] in
11413    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11414                              EVEX_V512;
11415
11416  let Predicates = [prd, HasVLX] in {
11417    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11418                                 EVEX_V256;
11419    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11420                                 EVEX_V128;
11421  }
11422}
11423
11424multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11425                                 SDNode OpNode, X86SchedWriteWidths sched,
11426                                 Predicate prd> {
11427  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11428                               avx512vl_i64_info, prd>, REX_W;
11429  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11430                               avx512vl_i32_info, prd>;
11431}
11432
11433multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11434                                 SDNode OpNode, X86SchedWriteWidths sched,
11435                                 Predicate prd> {
11436  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11437                              avx512vl_i16_info, prd>, WIG;
11438  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11439                              avx512vl_i8_info, prd>, WIG;
11440}
11441
11442multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11443                                  bits<8> opc_d, bits<8> opc_q,
11444                                  string OpcodeStr, SDNode OpNode,
11445                                  X86SchedWriteWidths sched> {
11446  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11447                                    HasAVX512>,
11448              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11449                                    HasBWI>;
11450}
11451
11452defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11453                                    SchedWriteVecALU>;
11454
11455// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11456let Predicates = [HasAVX512, NoVLX] in {
11457  def : Pat<(v4i64 (abs VR256X:$src)),
11458            (EXTRACT_SUBREG
11459                (VPABSQZrr
11460                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11461             sub_ymm)>;
11462  def : Pat<(v2i64 (abs VR128X:$src)),
11463            (EXTRACT_SUBREG
11464                (VPABSQZrr
11465                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11466             sub_xmm)>;
11467}
11468
11469// Use 512bit version to implement 128/256 bit.
11470multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11471                                 AVX512VLVectorVTInfo _, Predicate prd> {
11472  let Predicates = [prd, NoVLX] in {
11473    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11474              (EXTRACT_SUBREG
11475                (!cast<Instruction>(InstrStr # "Zrr")
11476                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11477                                 _.info256.RC:$src1,
11478                                 _.info256.SubRegIdx)),
11479              _.info256.SubRegIdx)>;
11480
11481    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11482              (EXTRACT_SUBREG
11483                (!cast<Instruction>(InstrStr # "Zrr")
11484                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11485                                 _.info128.RC:$src1,
11486                                 _.info128.SubRegIdx)),
11487              _.info128.SubRegIdx)>;
11488  }
11489}
11490
11491defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11492                                        SchedWriteVecIMul, HasCDI>;
11493
11494// FIXME: Is there a better scheduler class for VPCONFLICT?
11495defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11496                                        SchedWriteVecALU, HasCDI>;
11497
11498// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11499defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11500defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11501
11502//===---------------------------------------------------------------------===//
11503// Counts number of ones - VPOPCNTD and VPOPCNTQ
11504//===---------------------------------------------------------------------===//
11505
11506// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11507defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11508                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11509
11510defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11511defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11512
11513//===---------------------------------------------------------------------===//
11514// Replicate Single FP - MOVSHDUP and MOVSLDUP
11515//===---------------------------------------------------------------------===//
11516
11517multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11518                            X86SchedWriteWidths sched> {
11519  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11520                                      avx512vl_f32_info, HasAVX512>, XS;
11521}
11522
11523defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11524                                  SchedWriteFShuffle>;
11525defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11526                                  SchedWriteFShuffle>;
11527
11528//===----------------------------------------------------------------------===//
11529// AVX-512 - MOVDDUP
11530//===----------------------------------------------------------------------===//
11531
11532multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11533                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11534  let ExeDomain = _.ExeDomain in {
11535  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11536                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11537                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11538                   Sched<[sched]>;
11539  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11540                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11541                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11542                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11543                 Sched<[sched.Folded]>;
11544  }
11545}
11546
11547multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11548                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11549  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11550                           VTInfo.info512>, EVEX_V512;
11551
11552  let Predicates = [HasAVX512, HasVLX] in {
11553    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11554                                VTInfo.info256>, EVEX_V256;
11555    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11556                                   VTInfo.info128>, EVEX_V128;
11557  }
11558}
11559
11560multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11561                          X86SchedWriteWidths sched> {
11562  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11563                                        avx512vl_f64_info>, XD, REX_W;
11564}
11565
11566defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11567
11568let Predicates = [HasVLX] in {
11569def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11570          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11571
11572def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11573                        (v2f64 VR128X:$src0)),
11574          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11575                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11576def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11577                        immAllZerosV),
11578          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11579}
11580
11581//===----------------------------------------------------------------------===//
11582// AVX-512 - Unpack Instructions
11583//===----------------------------------------------------------------------===//
11584
11585let Uses = []<Register>, mayRaiseFPException = 0 in {
11586defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11587                                 SchedWriteFShuffleSizes, 0, 1>;
11588defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11589                                 SchedWriteFShuffleSizes>;
11590}
11591
11592defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11593                                       SchedWriteShuffle, HasBWI>;
11594defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11595                                       SchedWriteShuffle, HasBWI>;
11596defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11597                                       SchedWriteShuffle, HasBWI>;
11598defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11599                                       SchedWriteShuffle, HasBWI>;
11600
11601defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11602                                       SchedWriteShuffle, HasAVX512>;
11603defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11604                                       SchedWriteShuffle, HasAVX512>;
11605defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11606                                        SchedWriteShuffle, HasAVX512>;
11607defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11608                                        SchedWriteShuffle, HasAVX512>;
11609
11610//===----------------------------------------------------------------------===//
11611// AVX-512 - Extract & Insert Integer Instructions
11612//===----------------------------------------------------------------------===//
11613
11614multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11615                                                            X86VectorVTInfo _> {
11616  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11617              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11618              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11619              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11620                       addr:$dst)]>,
11621              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11622}
11623
11624multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11625  let Predicates = [HasBWI] in {
11626    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11627                  (ins _.RC:$src1, u8imm:$src2),
11628                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11629                  [(set GR32orGR64:$dst,
11630                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11631                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11632
11633    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11634  }
11635}
11636
11637multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11638  let Predicates = [HasBWI] in {
11639    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11640                  (ins _.RC:$src1, u8imm:$src2),
11641                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11642                  [(set GR32orGR64:$dst,
11643                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11644                  EVEX, PD, Sched<[WriteVecExtract]>;
11645
11646    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11647    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11648                   (ins _.RC:$src1, u8imm:$src2),
11649                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11650                   EVEX, TAPD, Sched<[WriteVecExtract]>;
11651
11652    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11653  }
11654}
11655
11656multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11657                                                            RegisterClass GRC> {
11658  let Predicates = [HasDQI] in {
11659    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11660                  (ins _.RC:$src1, u8imm:$src2),
11661                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11662                  [(set GRC:$dst,
11663                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11664                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11665
11666    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11667                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11668                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11669                [(store (extractelt (_.VT _.RC:$src1),
11670                                    imm:$src2),addr:$dst)]>,
11671                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11672                Sched<[WriteVecExtractSt]>;
11673  }
11674}
11675
11676defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
11677defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
11678defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11679defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
11680
11681multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11682                                            X86VectorVTInfo _, PatFrag LdFrag,
11683                                            SDPatternOperator immoperator> {
11684  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11685      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11686      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11687      [(set _.RC:$dst,
11688          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11689      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11690}
11691
11692multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11693                                            X86VectorVTInfo _, PatFrag LdFrag> {
11694  let Predicates = [HasBWI] in {
11695    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11696        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11697        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11698        [(set _.RC:$dst,
11699            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11700        Sched<[WriteVecInsert]>;
11701
11702    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11703  }
11704}
11705
11706multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11707                                         X86VectorVTInfo _, RegisterClass GRC> {
11708  let Predicates = [HasDQI] in {
11709    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11710        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11711        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11712        [(set _.RC:$dst,
11713            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11714        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11715
11716    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11717                                    _.ScalarLdFrag, imm>, TAPD;
11718  }
11719}
11720
11721defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11722                                     extloadi8>, TAPD, WIG;
11723defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11724                                     extloadi16>, PD, WIG;
11725defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11726defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
11727
11728let Predicates = [HasAVX512, NoBWI] in {
11729  def : Pat<(X86pinsrb VR128:$src1,
11730                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11731                       timm:$src3),
11732            (VPINSRBrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11733                       timm:$src3)>;
11734}
11735
11736let Predicates = [HasBWI] in {
11737  def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
11738            (VPINSRBZrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
11739                        GR8:$src2, sub_8bit), timm:$src3)>;
11740  def : Pat<(X86pinsrb VR128:$src1,
11741                       (i32 (anyext (i8 (bitconvert v8i1:$src2)))),
11742                       timm:$src3),
11743            (VPINSRBZrr VR128:$src1, (i32 (COPY_TO_REGCLASS VK8:$src2, GR32)),
11744                        timm:$src3)>;
11745}
11746
11747// Always select FP16 instructions if available.
11748let Predicates = [HasBWI], AddedComplexity = -10 in {
11749  def : Pat<(f16 (load addr:$src)), (COPY_TO_REGCLASS (VPINSRWZrm (v8i16 (IMPLICIT_DEF)), addr:$src, 0), FR16X)>;
11750  def : Pat<(store f16:$src, addr:$dst), (VPEXTRWZmr addr:$dst, (v8i16 (COPY_TO_REGCLASS FR16:$src, VR128)), 0)>;
11751  def : Pat<(i16 (bitconvert f16:$src)), (EXTRACT_SUBREG (VPEXTRWZrr (v8i16 (COPY_TO_REGCLASS FR16X:$src, VR128X)), 0), sub_16bit)>;
11752  def : Pat<(f16 (bitconvert i16:$src)), (COPY_TO_REGCLASS (VPINSRWZrr (v8i16 (IMPLICIT_DEF)), (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit), 0), FR16X)>;
11753}
11754
11755//===----------------------------------------------------------------------===//
11756// VSHUFPS - VSHUFPD Operations
11757//===----------------------------------------------------------------------===//
11758
11759multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11760  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11761                                    SchedWriteFShuffle>,
11762                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11763                                    AVX512AIi8Base, EVEX_4V;
11764}
11765
11766defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11767defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W;
11768
11769//===----------------------------------------------------------------------===//
11770// AVX-512 - Byte shift Left/Right
11771//===----------------------------------------------------------------------===//
11772
11773multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11774                               Format MRMm, string OpcodeStr,
11775                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11776  def ri : AVX512<opc, MRMr,
11777             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11778             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11779             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11780             Sched<[sched]>;
11781  def mi : AVX512<opc, MRMm,
11782           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11783           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11784           [(set _.RC:$dst,(_.VT (OpNode
11785                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11786                                 (i8 timm:$src2))))]>,
11787           Sched<[sched.Folded, sched.ReadAfterFold]>;
11788}
11789
11790multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11791                                   Format MRMm, string OpcodeStr,
11792                                   X86SchedWriteWidths sched, Predicate prd>{
11793  let Predicates = [prd] in
11794    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11795                                 sched.ZMM, v64i8_info>, EVEX_V512;
11796  let Predicates = [prd, HasVLX] in {
11797    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11798                                    sched.YMM, v32i8x_info>, EVEX_V256;
11799    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11800                                    sched.XMM, v16i8x_info>, EVEX_V128;
11801  }
11802}
11803defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11804                                       SchedWriteShuffle, HasBWI>,
11805                                       AVX512PDIi8Base, EVEX_4V, WIG;
11806defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11807                                       SchedWriteShuffle, HasBWI>,
11808                                       AVX512PDIi8Base, EVEX_4V, WIG;
11809
11810multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11811                                string OpcodeStr, X86FoldableSchedWrite sched,
11812                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11813  let isCommutable = 1 in
11814  def rr : AVX512BI<opc, MRMSrcReg,
11815             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11816             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11817             [(set _dst.RC:$dst,(_dst.VT
11818                                (OpNode (_src.VT _src.RC:$src1),
11819                                        (_src.VT _src.RC:$src2))))]>,
11820             Sched<[sched]>;
11821  def rm : AVX512BI<opc, MRMSrcMem,
11822           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11823           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11824           [(set _dst.RC:$dst,(_dst.VT
11825                              (OpNode (_src.VT _src.RC:$src1),
11826                              (_src.VT (bitconvert
11827                                        (_src.LdFrag addr:$src2))))))]>,
11828           Sched<[sched.Folded, sched.ReadAfterFold]>;
11829}
11830
11831multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11832                                    string OpcodeStr, X86SchedWriteWidths sched,
11833                                    Predicate prd> {
11834  let Predicates = [prd] in
11835    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11836                                  v8i64_info, v64i8_info>, EVEX_V512;
11837  let Predicates = [prd, HasVLX] in {
11838    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11839                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11840    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11841                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11842  }
11843}
11844
11845defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11846                                        SchedWritePSADBW, HasBWI>, EVEX_4V, WIG;
11847
11848// Transforms to swizzle an immediate to enable better matching when
11849// memory operand isn't in the right place.
11850def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11851  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11852  uint8_t Imm = N->getZExtValue();
11853  // Swap bits 1/4 and 3/6.
11854  uint8_t NewImm = Imm & 0xa5;
11855  if (Imm & 0x02) NewImm |= 0x10;
11856  if (Imm & 0x10) NewImm |= 0x02;
11857  if (Imm & 0x08) NewImm |= 0x40;
11858  if (Imm & 0x40) NewImm |= 0x08;
11859  return getI8Imm(NewImm, SDLoc(N));
11860}]>;
11861def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11862  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11863  uint8_t Imm = N->getZExtValue();
11864  // Swap bits 2/4 and 3/5.
11865  uint8_t NewImm = Imm & 0xc3;
11866  if (Imm & 0x04) NewImm |= 0x10;
11867  if (Imm & 0x10) NewImm |= 0x04;
11868  if (Imm & 0x08) NewImm |= 0x20;
11869  if (Imm & 0x20) NewImm |= 0x08;
11870  return getI8Imm(NewImm, SDLoc(N));
11871}]>;
11872def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11873  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11874  uint8_t Imm = N->getZExtValue();
11875  // Swap bits 1/2 and 5/6.
11876  uint8_t NewImm = Imm & 0x99;
11877  if (Imm & 0x02) NewImm |= 0x04;
11878  if (Imm & 0x04) NewImm |= 0x02;
11879  if (Imm & 0x20) NewImm |= 0x40;
11880  if (Imm & 0x40) NewImm |= 0x20;
11881  return getI8Imm(NewImm, SDLoc(N));
11882}]>;
11883def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11884  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11885  uint8_t Imm = N->getZExtValue();
11886  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11887  uint8_t NewImm = Imm & 0x81;
11888  if (Imm & 0x02) NewImm |= 0x04;
11889  if (Imm & 0x04) NewImm |= 0x10;
11890  if (Imm & 0x08) NewImm |= 0x40;
11891  if (Imm & 0x10) NewImm |= 0x02;
11892  if (Imm & 0x20) NewImm |= 0x08;
11893  if (Imm & 0x40) NewImm |= 0x20;
11894  return getI8Imm(NewImm, SDLoc(N));
11895}]>;
11896def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11897  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11898  uint8_t Imm = N->getZExtValue();
11899  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11900  uint8_t NewImm = Imm & 0x81;
11901  if (Imm & 0x02) NewImm |= 0x10;
11902  if (Imm & 0x04) NewImm |= 0x02;
11903  if (Imm & 0x08) NewImm |= 0x20;
11904  if (Imm & 0x10) NewImm |= 0x04;
11905  if (Imm & 0x20) NewImm |= 0x40;
11906  if (Imm & 0x40) NewImm |= 0x08;
11907  return getI8Imm(NewImm, SDLoc(N));
11908}]>;
11909
11910multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11911                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11912                          string Name>{
11913  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11914  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11915                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11916                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11917                      (OpNode (_.VT _.RC:$src1),
11918                              (_.VT _.RC:$src2),
11919                              (_.VT _.RC:$src3),
11920                              (i8 timm:$src4)), 1, 1>,
11921                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11922  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11923                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11924                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11925                    (OpNode (_.VT _.RC:$src1),
11926                            (_.VT _.RC:$src2),
11927                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11928                            (i8 timm:$src4)), 1, 0>,
11929                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11930                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11931  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11932                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11933                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11934                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11935                    (OpNode (_.VT _.RC:$src1),
11936                            (_.VT _.RC:$src2),
11937                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11938                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11939                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11940                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11941  }// Constraints = "$src1 = $dst"
11942
11943  // Additional patterns for matching passthru operand in other positions.
11944  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11945                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11946                   _.RC:$src1)),
11947            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11948             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11949  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11950                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11951                   _.RC:$src1)),
11952            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11953             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11954
11955  // Additional patterns for matching zero masking with loads in other
11956  // positions.
11957  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11958                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11959                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11960                   _.ImmAllZerosV)),
11961            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11962             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11963  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11964                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11965                    _.RC:$src2, (i8 timm:$src4)),
11966                   _.ImmAllZerosV)),
11967            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11968             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11969
11970  // Additional patterns for matching masked loads with different
11971  // operand orders.
11972  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11973                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11974                    _.RC:$src2, (i8 timm:$src4)),
11975                   _.RC:$src1)),
11976            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11977             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11978  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11979                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11980                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11981                   _.RC:$src1)),
11982            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11983             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11984  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11985                   (OpNode _.RC:$src2, _.RC:$src1,
11986                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11987                   _.RC:$src1)),
11988            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11989             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11990  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11991                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11992                    _.RC:$src1, (i8 timm:$src4)),
11993                   _.RC:$src1)),
11994            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11995             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11996  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11997                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11998                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11999                   _.RC:$src1)),
12000            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12001             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12002
12003  // Additional patterns for matching zero masking with broadcasts in other
12004  // positions.
12005  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12006                   (OpNode (_.BroadcastLdFrag addr:$src3),
12007                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12008                   _.ImmAllZerosV)),
12009            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12010             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12011             (VPTERNLOG321_imm8 timm:$src4))>;
12012  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12013                   (OpNode _.RC:$src1,
12014                    (_.BroadcastLdFrag addr:$src3),
12015                    _.RC:$src2, (i8 timm:$src4)),
12016                   _.ImmAllZerosV)),
12017            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12018             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12019             (VPTERNLOG132_imm8 timm:$src4))>;
12020
12021  // Additional patterns for matching masked broadcasts with different
12022  // operand orders.
12023  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12024                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
12025                    _.RC:$src2, (i8 timm:$src4)),
12026                   _.RC:$src1)),
12027            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12028             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
12029  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12030                   (OpNode (_.BroadcastLdFrag addr:$src3),
12031                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
12032                   _.RC:$src1)),
12033            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12034             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
12035  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12036                   (OpNode _.RC:$src2, _.RC:$src1,
12037                    (_.BroadcastLdFrag addr:$src3),
12038                    (i8 timm:$src4)), _.RC:$src1)),
12039            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12040             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
12041  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12042                   (OpNode _.RC:$src2,
12043                    (_.BroadcastLdFrag addr:$src3),
12044                    _.RC:$src1, (i8 timm:$src4)),
12045                   _.RC:$src1)),
12046            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12047             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
12048  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
12049                   (OpNode (_.BroadcastLdFrag addr:$src3),
12050                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
12051                   _.RC:$src1)),
12052            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12053             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
12054}
12055
12056multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
12057                                 AVX512VLVectorVTInfo _> {
12058  let Predicates = [HasAVX512] in
12059    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
12060                               _.info512, NAME>, EVEX_V512;
12061  let Predicates = [HasAVX512, HasVLX] in {
12062    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
12063                               _.info128, NAME>, EVEX_V128;
12064    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
12065                               _.info256, NAME>, EVEX_V256;
12066  }
12067}
12068
12069defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
12070                                        avx512vl_i32_info>;
12071defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
12072                                        avx512vl_i64_info>, REX_W;
12073
12074// Patterns to implement vnot using vpternlog instead of creating all ones
12075// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
12076// so that the result is only dependent on src0. But we use the same source
12077// for all operands to prevent a false dependency.
12078// TODO: We should maybe have a more generalized algorithm for folding to
12079// vpternlog.
12080let Predicates = [HasAVX512] in {
12081  def : Pat<(v64i8 (vnot VR512:$src)),
12082            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12083  def : Pat<(v32i16 (vnot VR512:$src)),
12084            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12085  def : Pat<(v16i32 (vnot VR512:$src)),
12086            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12087  def : Pat<(v8i64 (vnot VR512:$src)),
12088            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12089}
12090
12091let Predicates = [HasAVX512, NoVLX] in {
12092  def : Pat<(v16i8 (vnot VR128X:$src)),
12093            (EXTRACT_SUBREG
12094             (VPTERNLOGQZrri
12095              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12096              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12097              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12098              (i8 15)), sub_xmm)>;
12099  def : Pat<(v8i16 (vnot VR128X:$src)),
12100            (EXTRACT_SUBREG
12101             (VPTERNLOGQZrri
12102              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12103              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12104              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12105              (i8 15)), sub_xmm)>;
12106  def : Pat<(v4i32 (vnot VR128X:$src)),
12107            (EXTRACT_SUBREG
12108             (VPTERNLOGQZrri
12109              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12110              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12111              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12112              (i8 15)), sub_xmm)>;
12113  def : Pat<(v2i64 (vnot VR128X:$src)),
12114            (EXTRACT_SUBREG
12115             (VPTERNLOGQZrri
12116              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12117              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12118              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12119              (i8 15)), sub_xmm)>;
12120
12121  def : Pat<(v32i8 (vnot VR256X:$src)),
12122            (EXTRACT_SUBREG
12123             (VPTERNLOGQZrri
12124              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12125              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12126              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12127              (i8 15)), sub_ymm)>;
12128  def : Pat<(v16i16 (vnot VR256X:$src)),
12129            (EXTRACT_SUBREG
12130             (VPTERNLOGQZrri
12131              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12132              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12133              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12134              (i8 15)), sub_ymm)>;
12135  def : Pat<(v8i32 (vnot VR256X:$src)),
12136            (EXTRACT_SUBREG
12137             (VPTERNLOGQZrri
12138              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12139              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12140              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12141              (i8 15)), sub_ymm)>;
12142  def : Pat<(v4i64 (vnot VR256X:$src)),
12143            (EXTRACT_SUBREG
12144             (VPTERNLOGQZrri
12145              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12146              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12147              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12148              (i8 15)), sub_ymm)>;
12149}
12150
12151let Predicates = [HasVLX] in {
12152  def : Pat<(v16i8 (vnot VR128X:$src)),
12153            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12154  def : Pat<(v8i16 (vnot VR128X:$src)),
12155            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12156  def : Pat<(v4i32 (vnot VR128X:$src)),
12157            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12158  def : Pat<(v2i64 (vnot VR128X:$src)),
12159            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12160
12161  def : Pat<(v32i8 (vnot VR256X:$src)),
12162            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12163  def : Pat<(v16i16 (vnot VR256X:$src)),
12164            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12165  def : Pat<(v8i32 (vnot VR256X:$src)),
12166            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12167  def : Pat<(v4i64 (vnot VR256X:$src)),
12168            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12169}
12170
12171//===----------------------------------------------------------------------===//
12172// AVX-512 - FixupImm
12173//===----------------------------------------------------------------------===//
12174
12175multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12176                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12177                                  X86VectorVTInfo TblVT>{
12178  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12179      Uses = [MXCSR], mayRaiseFPException = 1 in {
12180    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12181                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12182                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12183                        (X86VFixupimm (_.VT _.RC:$src1),
12184                                      (_.VT _.RC:$src2),
12185                                      (TblVT.VT _.RC:$src3),
12186                                      (i32 timm:$src4))>, Sched<[sched]>;
12187    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12188                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12189                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12190                      (X86VFixupimm (_.VT _.RC:$src1),
12191                                    (_.VT _.RC:$src2),
12192                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12193                                    (i32 timm:$src4))>,
12194                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12195    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12196                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12197                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12198                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12199                      (X86VFixupimm (_.VT _.RC:$src1),
12200                                    (_.VT _.RC:$src2),
12201                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12202                                    (i32 timm:$src4))>,
12203                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12204  } // Constraints = "$src1 = $dst"
12205}
12206
12207multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12208                                      X86FoldableSchedWrite sched,
12209                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12210  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12211let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12212  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12213                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12214                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12215                      "$src2, $src3, {sae}, $src4",
12216                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12217                                       (_.VT _.RC:$src2),
12218                                       (TblVT.VT _.RC:$src3),
12219                                       (i32 timm:$src4))>,
12220                      EVEX_B, Sched<[sched]>;
12221  }
12222}
12223
12224multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12225                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12226                                  X86VectorVTInfo _src3VT> {
12227  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12228      ExeDomain = _.ExeDomain in {
12229    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12230                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12231                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12232                      (X86VFixupimms (_.VT _.RC:$src1),
12233                                     (_.VT _.RC:$src2),
12234                                     (_src3VT.VT _src3VT.RC:$src3),
12235                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12236    let Uses = [MXCSR] in
12237    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12238                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12239                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12240                      "$src2, $src3, {sae}, $src4",
12241                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12242                                        (_.VT _.RC:$src2),
12243                                        (_src3VT.VT _src3VT.RC:$src3),
12244                                        (i32 timm:$src4))>,
12245                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12246    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12247                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12248                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12249                     (X86VFixupimms (_.VT _.RC:$src1),
12250                                    (_.VT _.RC:$src2),
12251                                    (_src3VT.VT (scalar_to_vector
12252                                              (_src3VT.ScalarLdFrag addr:$src3))),
12253                                    (i32 timm:$src4))>,
12254                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12255  }
12256}
12257
12258multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12259                                      AVX512VLVectorVTInfo _Vec,
12260                                      AVX512VLVectorVTInfo _Tbl> {
12261  let Predicates = [HasAVX512] in
12262    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12263                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12264                                EVEX_4V, EVEX_V512;
12265  let Predicates = [HasAVX512, HasVLX] in {
12266    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12267                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12268                            EVEX_4V, EVEX_V128;
12269    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12270                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12271                            EVEX_4V, EVEX_V256;
12272  }
12273}
12274
12275defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12276                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12277                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12278defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12279                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12280                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
12281defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12282                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12283defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12284                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
12285
12286// Patterns used to select SSE scalar fp arithmetic instructions from
12287// either:
12288//
12289// (1) a scalar fp operation followed by a blend
12290//
12291// The effect is that the backend no longer emits unnecessary vector
12292// insert instructions immediately after SSE scalar fp instructions
12293// like addss or mulss.
12294//
12295// For example, given the following code:
12296//   __m128 foo(__m128 A, __m128 B) {
12297//     A[0] += B[0];
12298//     return A;
12299//   }
12300//
12301// Previously we generated:
12302//   addss %xmm0, %xmm1
12303//   movss %xmm1, %xmm0
12304//
12305// We now generate:
12306//   addss %xmm1, %xmm0
12307//
12308// (2) a vector packed single/double fp operation followed by a vector insert
12309//
12310// The effect is that the backend converts the packed fp instruction
12311// followed by a vector insert into a single SSE scalar fp instruction.
12312//
12313// For example, given the following code:
12314//   __m128 foo(__m128 A, __m128 B) {
12315//     __m128 C = A + B;
12316//     return (__m128) {c[0], a[1], a[2], a[3]};
12317//   }
12318//
12319// Previously we generated:
12320//   addps %xmm0, %xmm1
12321//   movss %xmm1, %xmm0
12322//
12323// We now generate:
12324//   addss %xmm1, %xmm0
12325
12326// TODO: Some canonicalization in lowering would simplify the number of
12327// patterns we have to try to match.
12328multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12329                                          string OpcPrefix, SDNode MoveNode,
12330                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12331  let Predicates = [HasAVX512] in {
12332    // extracted scalar math op with insert via movss
12333    def : Pat<(MoveNode
12334               (_.VT VR128X:$dst),
12335               (_.VT (scalar_to_vector
12336                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12337                          _.FRC:$src)))),
12338              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12339               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12340    def : Pat<(MoveNode
12341               (_.VT VR128X:$dst),
12342               (_.VT (scalar_to_vector
12343                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12344                          (_.ScalarLdFrag addr:$src))))),
12345              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12346
12347    // extracted masked scalar math op with insert via movss
12348    def : Pat<(MoveNode (_.VT VR128X:$src1),
12349               (scalar_to_vector
12350                (X86selects_mask VK1WM:$mask,
12351                            (MaskedOp (_.EltVT
12352                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12353                                      _.FRC:$src2),
12354                            _.FRC:$src0))),
12355              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12356               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12357               VK1WM:$mask, _.VT:$src1,
12358               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12359    def : Pat<(MoveNode (_.VT VR128X:$src1),
12360               (scalar_to_vector
12361                (X86selects_mask VK1WM:$mask,
12362                            (MaskedOp (_.EltVT
12363                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12364                                      (_.ScalarLdFrag addr:$src2)),
12365                            _.FRC:$src0))),
12366              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12367               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12368               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12369
12370    // extracted masked scalar math op with insert via movss
12371    def : Pat<(MoveNode (_.VT VR128X:$src1),
12372               (scalar_to_vector
12373                (X86selects_mask VK1WM:$mask,
12374                            (MaskedOp (_.EltVT
12375                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12376                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12377      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12378          VK1WM:$mask, _.VT:$src1,
12379          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12380    def : Pat<(MoveNode (_.VT VR128X:$src1),
12381               (scalar_to_vector
12382                (X86selects_mask VK1WM:$mask,
12383                            (MaskedOp (_.EltVT
12384                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12385                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12386      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12387  }
12388}
12389
12390defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12391defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12392defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12393defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12394
12395defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12396defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12397defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12398defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12399
12400defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12401defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12402defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12403defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12404
12405multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12406                                             SDNode Move, X86VectorVTInfo _> {
12407  let Predicates = [HasAVX512] in {
12408    def : Pat<(_.VT (Move _.VT:$dst,
12409                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12410              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12411  }
12412}
12413
12414defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12415defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12416defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12417
12418//===----------------------------------------------------------------------===//
12419// AES instructions
12420//===----------------------------------------------------------------------===//
12421
12422multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12423  let Predicates = [HasVLX, HasVAES] in {
12424    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12425                                  !cast<Intrinsic>(IntPrefix),
12426                                  loadv2i64, 0, VR128X, i128mem>,
12427                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
12428    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12429                                  !cast<Intrinsic>(IntPrefix#"_256"),
12430                                  loadv4i64, 0, VR256X, i256mem>,
12431                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
12432    }
12433    let Predicates = [HasAVX512, HasVAES] in
12434    defm Z    : AESI_binop_rm_int<Op, OpStr,
12435                                  !cast<Intrinsic>(IntPrefix#"_512"),
12436                                  loadv8i64, 0, VR512, i512mem>,
12437                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
12438}
12439
12440defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12441defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12442defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12443defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12444
12445//===----------------------------------------------------------------------===//
12446// PCLMUL instructions - Carry less multiplication
12447//===----------------------------------------------------------------------===//
12448
12449let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12450defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12451                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
12452
12453let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12454defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12455                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
12456
12457defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12458                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12459                                EVEX_CD8<64, CD8VF>, WIG;
12460}
12461
12462// Aliases
12463defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12464defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12465defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12466
12467//===----------------------------------------------------------------------===//
12468// VBMI2
12469//===----------------------------------------------------------------------===//
12470
12471multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12472                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12473  let Constraints = "$src1 = $dst",
12474      ExeDomain   = VTI.ExeDomain in {
12475    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12476                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12477                "$src3, $src2", "$src2, $src3",
12478                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12479                T8PD, EVEX_4V, Sched<[sched]>;
12480    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12481                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12482                "$src3, $src2", "$src2, $src3",
12483                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12484                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12485                T8PD, EVEX_4V,
12486                Sched<[sched.Folded, sched.ReadAfterFold]>;
12487  }
12488}
12489
12490multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12491                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12492         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12493  let Constraints = "$src1 = $dst",
12494      ExeDomain   = VTI.ExeDomain in
12495  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12496              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12497              "${src3}"#VTI.BroadcastStr#", $src2",
12498              "$src2, ${src3}"#VTI.BroadcastStr,
12499              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12500               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12501              T8PD, EVEX_4V, EVEX_B,
12502              Sched<[sched.Folded, sched.ReadAfterFold]>;
12503}
12504
12505multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12506                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12507  let Predicates = [HasVBMI2] in
12508  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12509                                   EVEX_V512;
12510  let Predicates = [HasVBMI2, HasVLX] in {
12511    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12512                                   EVEX_V256;
12513    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12514                                   EVEX_V128;
12515  }
12516}
12517
12518multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12519                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12520  let Predicates = [HasVBMI2] in
12521  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12522                                    EVEX_V512;
12523  let Predicates = [HasVBMI2, HasVLX] in {
12524    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12525                                    EVEX_V256;
12526    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12527                                    EVEX_V128;
12528  }
12529}
12530multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12531                           SDNode OpNode, X86SchedWriteWidths sched> {
12532  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12533             avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
12534  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12535             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12536  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12537             avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
12538}
12539
12540multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12541                           SDNode OpNode, X86SchedWriteWidths sched> {
12542  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12543             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12544             REX_W, EVEX_CD8<16, CD8VF>;
12545  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12546             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12547  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12548             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
12549}
12550
12551// Concat & Shift
12552defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12553defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12554defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12555defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12556
12557// Compress
12558defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12559                                         avx512vl_i8_info, HasVBMI2>, EVEX;
12560defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12561                                          avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12562// Expand
12563defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12564                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12565defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12566                                      avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
12567
12568//===----------------------------------------------------------------------===//
12569// VNNI
12570//===----------------------------------------------------------------------===//
12571
12572let Constraints = "$src1 = $dst" in
12573multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12574                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12575                    bit IsCommutable> {
12576  let ExeDomain = VTI.ExeDomain in {
12577  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12578                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12579                                   "$src3, $src2", "$src2, $src3",
12580                                   (VTI.VT (OpNode VTI.RC:$src1,
12581                                            VTI.RC:$src2, VTI.RC:$src3)),
12582                                   IsCommutable, IsCommutable>,
12583                                   EVEX_4V, T8PD, Sched<[sched]>;
12584  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12585                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12586                                   "$src3, $src2", "$src2, $src3",
12587                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12588                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12589                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12590                                   Sched<[sched.Folded, sched.ReadAfterFold,
12591                                          sched.ReadAfterFold]>;
12592  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12593                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12594                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12595                                   "$src2, ${src3}"#VTI.BroadcastStr,
12596                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12597                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12598                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12599                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
12600                                                sched.ReadAfterFold]>;
12601  }
12602}
12603
12604multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12605                       X86SchedWriteWidths sched, bit IsCommutable> {
12606  let Predicates = [HasVNNI] in
12607  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12608                           IsCommutable>, EVEX_V512;
12609  let Predicates = [HasVNNI, HasVLX] in {
12610    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12611                           IsCommutable>, EVEX_V256;
12612    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12613                           IsCommutable>, EVEX_V128;
12614  }
12615}
12616
12617// FIXME: Is there a better scheduler class for VPDP?
12618defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12619defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12620defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12621defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12622
12623// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12624let Predicates = [HasVNNI] in {
12625  def : Pat<(v16i32 (add VR512:$src1,
12626                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12627            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12628  def : Pat<(v16i32 (add VR512:$src1,
12629                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12630            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12631}
12632let Predicates = [HasVNNI,HasVLX] in {
12633  def : Pat<(v8i32 (add VR256X:$src1,
12634                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12635            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12636  def : Pat<(v8i32 (add VR256X:$src1,
12637                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12638            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12639  def : Pat<(v4i32 (add VR128X:$src1,
12640                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12641            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12642  def : Pat<(v4i32 (add VR128X:$src1,
12643                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12644            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12645}
12646
12647//===----------------------------------------------------------------------===//
12648// Bit Algorithms
12649//===----------------------------------------------------------------------===//
12650
12651// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12652defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12653                                   avx512vl_i8_info, HasBITALG>;
12654defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12655                                   avx512vl_i16_info, HasBITALG>, REX_W;
12656
12657defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12658defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12659
12660def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12661                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12662  return N->hasOneUse();
12663}]>;
12664
12665multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12666  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12667                                (ins VTI.RC:$src1, VTI.RC:$src2),
12668                                "vpshufbitqmb",
12669                                "$src2, $src1", "$src1, $src2",
12670                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12671                                (VTI.VT VTI.RC:$src2)),
12672                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12673                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12674                                Sched<[sched]>;
12675  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12676                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12677                                "vpshufbitqmb",
12678                                "$src2, $src1", "$src1, $src2",
12679                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12680                                (VTI.VT (VTI.LdFrag addr:$src2))),
12681                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12682                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12683                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12684                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12685}
12686
12687multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12688  let Predicates = [HasBITALG] in
12689  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12690  let Predicates = [HasBITALG, HasVLX] in {
12691    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12692    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12693  }
12694}
12695
12696// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12697defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12698
12699//===----------------------------------------------------------------------===//
12700// GFNI
12701//===----------------------------------------------------------------------===//
12702
12703multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12704                                   X86SchedWriteWidths sched> {
12705  let Predicates = [HasGFNI, HasAVX512] in
12706  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12707                                EVEX_V512;
12708  let Predicates = [HasGFNI, HasVLX] in {
12709    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12710                                EVEX_V256;
12711    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12712                                EVEX_V128;
12713  }
12714}
12715
12716defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12717                                          SchedWriteVecALU>,
12718                                          EVEX_CD8<8, CD8VF>, T8PD;
12719
12720multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12721                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12722                                      X86VectorVTInfo BcstVTI>
12723           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12724  let ExeDomain = VTI.ExeDomain in
12725  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12726                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12727                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12728                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12729                (OpNode (VTI.VT VTI.RC:$src1),
12730                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12731                 (i8 timm:$src3))>, EVEX_B,
12732                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12733}
12734
12735multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12736                                     X86SchedWriteWidths sched> {
12737  let Predicates = [HasGFNI, HasAVX512] in
12738  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12739                                           v64i8_info, v8i64_info>, EVEX_V512;
12740  let Predicates = [HasGFNI, HasVLX] in {
12741    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12742                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12743    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12744                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12745  }
12746}
12747
12748defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12749                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12750                         EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12751defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12752                         X86GF2P8affineqb, SchedWriteVecIMul>,
12753                         EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
12754
12755
12756//===----------------------------------------------------------------------===//
12757// AVX5124FMAPS
12758//===----------------------------------------------------------------------===//
12759
12760let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12761    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12762defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12763                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12764                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12765                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12766                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12767
12768defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12769                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12770                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12771                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12772                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12773
12774defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12775                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12776                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12777                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12778                    Sched<[SchedWriteFMA.Scl.Folded]>;
12779
12780defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12781                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12782                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12783                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12784                     Sched<[SchedWriteFMA.Scl.Folded]>;
12785}
12786
12787//===----------------------------------------------------------------------===//
12788// AVX5124VNNIW
12789//===----------------------------------------------------------------------===//
12790
12791let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12792    Constraints = "$src1 = $dst" in {
12793defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12794                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12795                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12796                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12797                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12798
12799defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12800                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12801                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12802                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12803                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12804}
12805
12806let hasSideEffects = 0 in {
12807  let mayStore = 1, SchedRW = [WriteFStoreX] in
12808  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12809  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12810  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12811}
12812
12813//===----------------------------------------------------------------------===//
12814// VP2INTERSECT
12815//===----------------------------------------------------------------------===//
12816
12817multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12818  def rr : I<0x68, MRMSrcReg,
12819                  (outs _.KRPC:$dst),
12820                  (ins _.RC:$src1, _.RC:$src2),
12821                  !strconcat("vp2intersect", _.Suffix,
12822                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12823                  [(set _.KRPC:$dst, (X86vp2intersect
12824                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12825                  EVEX_4V, T8XD, Sched<[sched]>;
12826
12827  def rm : I<0x68, MRMSrcMem,
12828                  (outs _.KRPC:$dst),
12829                  (ins  _.RC:$src1, _.MemOp:$src2),
12830                  !strconcat("vp2intersect", _.Suffix,
12831                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12832                  [(set _.KRPC:$dst, (X86vp2intersect
12833                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12834                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12835                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12836
12837  def rmb : I<0x68, MRMSrcMem,
12838                  (outs _.KRPC:$dst),
12839                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12840                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12841                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12842                  [(set _.KRPC:$dst, (X86vp2intersect
12843                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12844                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12845                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12846}
12847
12848multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12849  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12850    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12851
12852  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12853    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12854    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12855  }
12856}
12857
12858defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12859defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
12860
12861multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12862                             X86SchedWriteWidths sched,
12863                             AVX512VLVectorVTInfo _SrcVTInfo,
12864                             AVX512VLVectorVTInfo _DstVTInfo,
12865                             SDNode OpNode, Predicate prd,
12866                             bit IsCommutable = 0> {
12867  let Predicates = [prd] in
12868    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12869                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12870                                   _SrcVTInfo.info512, IsCommutable>,
12871                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12872  let Predicates = [HasVLX, prd] in {
12873    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12874                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12875                                      _SrcVTInfo.info256, IsCommutable>,
12876                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12877    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12878                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12879                                      _SrcVTInfo.info128, IsCommutable>,
12880                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12881  }
12882}
12883
12884let ExeDomain = SSEPackedSingle in
12885defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12886                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12887                                        avx512vl_f32_info, avx512vl_bf16_info,
12888                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12889
12890// Truncate Float to BFloat16
12891multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12892                             X86SchedWriteWidths sched> {
12893  let ExeDomain = SSEPackedSingle in {
12894  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12895    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16bf16x_info, v16f32_info,
12896                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12897  }
12898  let Predicates = [HasBF16, HasVLX] in {
12899    let Uses = []<Register>, mayRaiseFPException = 0 in {
12900    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v4f32x_info,
12901                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12902                               VK4WM>, EVEX_V128;
12903    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8bf16x_info, v8f32x_info,
12904                               X86cvtneps2bf16, X86cvtneps2bf16,
12905                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12906    }
12907  } // Predicates = [HasBF16, HasVLX]
12908  } // ExeDomain = SSEPackedSingle
12909
12910  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12911                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12912                  VR128X:$src), 0>;
12913  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12914                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12915                  f128mem:$src), 0, "intel">;
12916  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12917                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12918                  VR256X:$src), 0>;
12919  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12920                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12921                  f256mem:$src), 0, "intel">;
12922}
12923
12924defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12925                                       SchedWriteCvtPD2PS>, T8XS,
12926                                       EVEX_CD8<32, CD8VF>;
12927
12928let Predicates = [HasBF16, HasVLX] in {
12929  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12930  // patterns have been disabled with null_frag.
12931  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12932            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12933  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8bf16 VR128X:$src0),
12934                              VK4WM:$mask),
12935            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12936  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8bf16x_info.ImmAllZerosV,
12937                              VK4WM:$mask),
12938            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12939
12940  def : Pat<(v8bf16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12941            (VCVTNEPS2BF16Z128rm addr:$src)>;
12942  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8bf16 VR128X:$src0),
12943                              VK4WM:$mask),
12944            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12945  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8bf16x_info.ImmAllZerosV,
12946                              VK4WM:$mask),
12947            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12948
12949  def : Pat<(v8bf16 (X86cvtneps2bf16 (v4f32
12950                                     (X86VBroadcastld32 addr:$src)))),
12951            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12952  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12953                              (v8bf16 VR128X:$src0), VK4WM:$mask),
12954            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12955  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12956                              v8bf16x_info.ImmAllZerosV, VK4WM:$mask),
12957            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12958
12959  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (v4f32 VR128X:$src))),
12960            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12961  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16128 (loadv4f32 addr:$src))),
12962            (VCVTNEPS2BF16Z128rm addr:$src)>;
12963
12964  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (v8f32 VR256X:$src))),
12965            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12966  def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
12967            (VCVTNEPS2BF16Z256rm addr:$src)>;
12968
12969  def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
12970            (VPBROADCASTWZ128rm addr:$src)>;
12971  def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
12972            (VPBROADCASTWZ256rm addr:$src)>;
12973
12974  def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12975            (VPBROADCASTWZ128rr VR128X:$src)>;
12976  def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12977            (VPBROADCASTWZ256rr VR128X:$src)>;
12978
12979  def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))),
12980            (VCVTNEPS2BF16Z256rr VR256X:$src)>;
12981  def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))),
12982            (VCVTNEPS2BF16Z256rm addr:$src)>;
12983
12984  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12985}
12986
12987let Predicates = [HasBF16] in {
12988  def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
12989            (VPBROADCASTWZrm addr:$src)>;
12990
12991  def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
12992            (VPBROADCASTWZrr VR128X:$src)>;
12993
12994  def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))),
12995            (VCVTNEPS2BF16Zrr VR512:$src)>;
12996  def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))),
12997            (VCVTNEPS2BF16Zrm addr:$src)>;
12998  // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
12999}
13000
13001let Constraints = "$src1 = $dst" in {
13002multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
13003                              X86FoldableSchedWrite sched,
13004                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
13005  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13006                           (ins src_v.RC:$src2, src_v.RC:$src3),
13007                           OpcodeStr, "$src3, $src2", "$src2, $src3",
13008                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
13009                           EVEX_4V, Sched<[sched]>;
13010
13011  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13012                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
13013                               OpcodeStr, "$src3, $src2", "$src2, $src3",
13014                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13015                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
13016                               Sched<[sched.Folded, sched.ReadAfterFold]>;
13017
13018  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13019                  (ins src_v.RC:$src2, f32mem:$src3),
13020                  OpcodeStr,
13021                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
13022                  !strconcat("$src2, ${src3}", _.BroadcastStr),
13023                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
13024                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
13025                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
13026
13027}
13028} // Constraints = "$src1 = $dst"
13029
13030multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
13031                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
13032                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
13033  let Predicates = [prd] in {
13034    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
13035                                   src_v.info512>, EVEX_V512;
13036  }
13037  let Predicates = [HasVLX, prd] in {
13038    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
13039                                   src_v.info256>, EVEX_V256;
13040    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
13041                                   src_v.info128>, EVEX_V128;
13042  }
13043}
13044
13045let ExeDomain = SSEPackedSingle in
13046defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
13047                                       avx512vl_f32_info, avx512vl_bf16_info,
13048                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
13049
13050//===----------------------------------------------------------------------===//
13051// AVX512FP16
13052//===----------------------------------------------------------------------===//
13053
13054let Predicates = [HasFP16] in {
13055// Move word ( r/m16) to Packed word
13056def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
13057                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
13058def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
13059                      "vmovw\t{$src, $dst|$dst, $src}",
13060                      [(set VR128X:$dst,
13061                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
13062                      T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
13063
13064def : Pat<(f16 (bitconvert GR16:$src)),
13065          (f16 (COPY_TO_REGCLASS
13066                (VMOVW2SHrr
13067                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
13068                FR16X))>;
13069def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
13070          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13071def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
13072          (VMOVW2SHrr GR32:$src)>;
13073// FIXME: We should really find a way to improve these patterns.
13074def : Pat<(v8i32 (X86vzmovl
13075                  (insert_subvector undef,
13076                                    (v4i32 (scalar_to_vector
13077                                            (and GR32:$src, 0xffff))),
13078                                    (iPTR 0)))),
13079          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13080def : Pat<(v16i32 (X86vzmovl
13081                   (insert_subvector undef,
13082                                     (v4i32 (scalar_to_vector
13083                                             (and GR32:$src, 0xffff))),
13084                                     (iPTR 0)))),
13085          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
13086
13087def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
13088          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
13089
13090// AVX 128-bit movw instruction write zeros in the high 128-bit part.
13091def : Pat<(v8i16 (X86vzload16 addr:$src)),
13092          (VMOVWrm addr:$src)>;
13093def : Pat<(v16i16 (X86vzload16 addr:$src)),
13094          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13095
13096// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
13097def : Pat<(v32i16 (X86vzload16 addr:$src)),
13098          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
13099
13100def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
13101          (VMOVWrm addr:$src)>;
13102def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
13103          (VMOVWrm addr:$src)>;
13104def : Pat<(v8i32 (X86vzmovl
13105                  (insert_subvector undef,
13106                                    (v4i32 (scalar_to_vector
13107                                            (i32 (zextloadi16 addr:$src)))),
13108                                    (iPTR 0)))),
13109          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13110def : Pat<(v16i32 (X86vzmovl
13111                   (insert_subvector undef,
13112                                     (v4i32 (scalar_to_vector
13113                                             (i32 (zextloadi16 addr:$src)))),
13114                                     (iPTR 0)))),
13115          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
13116
13117// Move word from xmm register to r/m16
13118def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
13119                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
13120def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
13121                       (ins i16mem:$dst, VR128X:$src),
13122                       "vmovw\t{$src, $dst|$dst, $src}",
13123                       [(store (i16 (extractelt (v8i16 VR128X:$src),
13124                                     (iPTR 0))), addr:$dst)]>,
13125                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
13126
13127def : Pat<(i16 (bitconvert FR16X:$src)),
13128          (i16 (EXTRACT_SUBREG
13129                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
13130                sub_16bit))>;
13131def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
13132          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
13133
13134// Allow "vmovw" to use GR64
13135let hasSideEffects = 0 in {
13136  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
13137                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
13138  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
13139                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
13140}
13141}
13142
13143// Convert 16-bit float to i16/u16
13144multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13145                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13146                          AVX512VLVectorVTInfo _Dst,
13147                          AVX512VLVectorVTInfo _Src,
13148                          X86SchedWriteWidths sched> {
13149  let Predicates = [HasFP16] in {
13150    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13151                            OpNode, MaskOpNode, sched.ZMM>,
13152             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13153                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13154  }
13155  let Predicates = [HasFP16, HasVLX] in {
13156    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13157                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13158    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13159                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13160  }
13161}
13162
13163// Convert 16-bit float to i16/u16 truncate
13164multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13165                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13166                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13167                           X86SchedWriteWidths sched> {
13168  let Predicates = [HasFP16] in {
13169    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13170                            OpNode, MaskOpNode, sched.ZMM>,
13171             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13172                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13173  }
13174  let Predicates = [HasFP16, HasVLX] in {
13175    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13176                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13177    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13178                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13179  }
13180}
13181
13182defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13183                                X86cvtp2UIntRnd, avx512vl_i16_info,
13184                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13185                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
13186defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13187                                X86VUintToFpRnd, avx512vl_f16_info,
13188                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13189                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
13190defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13191                                X86cvttp2si, X86cvttp2siSAE,
13192                                avx512vl_i16_info, avx512vl_f16_info,
13193                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13194defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13195                                X86cvttp2ui, X86cvttp2uiSAE,
13196                                avx512vl_i16_info, avx512vl_f16_info,
13197                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13198defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13199                                X86cvtp2IntRnd, avx512vl_i16_info,
13200                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13201                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
13202defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13203                                X86VSintToFpRnd, avx512vl_f16_info,
13204                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13205                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
13206
13207// Convert Half to Signed/Unsigned Doubleword
13208multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13209                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13210                           X86SchedWriteWidths sched> {
13211  let Predicates = [HasFP16] in {
13212    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13213                            MaskOpNode, sched.ZMM>,
13214             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13215                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13216  }
13217  let Predicates = [HasFP16, HasVLX] in {
13218    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13219                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13220    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13221                               MaskOpNode, sched.YMM>, EVEX_V256;
13222  }
13223}
13224
13225// Convert Half to Signed/Unsigned Doubleword with truncation
13226multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13227                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13228                            X86SchedWriteWidths sched> {
13229  let Predicates = [HasFP16] in {
13230    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13231                            MaskOpNode, sched.ZMM>,
13232             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13233                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13234  }
13235  let Predicates = [HasFP16, HasVLX] in {
13236    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13237                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13238    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13239                               MaskOpNode, sched.YMM>, EVEX_V256;
13240  }
13241}
13242
13243
13244defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13245                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13246                                 EVEX_CD8<16, CD8VH>;
13247defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13248                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13249                                 EVEX_CD8<16, CD8VH>;
13250
13251defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13252                                X86cvttp2si, X86cvttp2siSAE,
13253                                SchedWriteCvtPS2DQ>, T_MAP5XS,
13254                                EVEX_CD8<16, CD8VH>;
13255
13256defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13257                                 X86cvttp2ui, X86cvttp2uiSAE,
13258                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
13259                                 EVEX_CD8<16, CD8VH>;
13260
13261// Convert Half to Signed/Unsigned Quardword
13262multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13263                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13264                           X86SchedWriteWidths sched> {
13265  let Predicates = [HasFP16] in {
13266    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13267                            MaskOpNode, sched.ZMM>,
13268             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13269                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13270  }
13271  let Predicates = [HasFP16, HasVLX] in {
13272    // Explicitly specified broadcast string, since we take only 2 elements
13273    // from v8f16x_info source
13274    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13275                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13276                               EVEX_V128;
13277    // Explicitly specified broadcast string, since we take only 4 elements
13278    // from v8f16x_info source
13279    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13280                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13281                               EVEX_V256;
13282  }
13283}
13284
13285// Convert Half to Signed/Unsigned Quardword with truncation
13286multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13287                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13288                            X86SchedWriteWidths sched> {
13289  let Predicates = [HasFP16] in {
13290    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13291                            MaskOpNode, sched.ZMM>,
13292             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13293                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13294  }
13295  let Predicates = [HasFP16, HasVLX] in {
13296    // Explicitly specified broadcast string, since we take only 2 elements
13297    // from v8f16x_info source
13298    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13299                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13300    // Explicitly specified broadcast string, since we take only 4 elements
13301    // from v8f16x_info source
13302    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13303                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13304  }
13305}
13306
13307defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13308                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13309                                 EVEX_CD8<16, CD8VQ>;
13310
13311defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13312                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13313                                 EVEX_CD8<16, CD8VQ>;
13314
13315defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13316                                 X86cvttp2si, X86cvttp2siSAE,
13317                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13318                                 EVEX_CD8<16, CD8VQ>;
13319
13320defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13321                                 X86cvttp2ui, X86cvttp2uiSAE,
13322                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13323                                 EVEX_CD8<16, CD8VQ>;
13324
13325// Convert Signed/Unsigned Quardword to Half
13326multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13327                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13328                           X86SchedWriteWidths sched> {
13329  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13330  // 512 memory forms of these instructions in Asm Parcer. They have the same
13331  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13332  // due to the same reason.
13333  let Predicates = [HasFP16] in {
13334    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13335                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13336             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13337                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13338  }
13339  let Predicates = [HasFP16, HasVLX] in {
13340    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13341                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13342                               i128mem, VK2WM>,
13343                               EVEX_V128, NotEVEX2VEXConvertible;
13344    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13345                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13346                               i256mem, VK4WM>,
13347                               EVEX_V256, NotEVEX2VEXConvertible;
13348  }
13349
13350  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13351                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13352                  VR128X:$src), 0, "att">;
13353  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13354                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13355                  VK2WM:$mask, VR128X:$src), 0, "att">;
13356  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13357                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13358                  VK2WM:$mask, VR128X:$src), 0, "att">;
13359  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13360                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13361                  i64mem:$src), 0, "att">;
13362  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13363                  "$dst {${mask}}, ${src}{1to2}}",
13364                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13365                  VK2WM:$mask, i64mem:$src), 0, "att">;
13366  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13367                  "$dst {${mask}} {z}, ${src}{1to2}}",
13368                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13369                  VK2WM:$mask, i64mem:$src), 0, "att">;
13370
13371  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13372                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13373                  VR256X:$src), 0, "att">;
13374  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13375                  "$dst {${mask}}, $src}",
13376                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13377                  VK4WM:$mask, VR256X:$src), 0, "att">;
13378  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13379                  "$dst {${mask}} {z}, $src}",
13380                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13381                  VK4WM:$mask, VR256X:$src), 0, "att">;
13382  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13383                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13384                  i64mem:$src), 0, "att">;
13385  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13386                  "$dst {${mask}}, ${src}{1to4}}",
13387                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13388                  VK4WM:$mask, i64mem:$src), 0, "att">;
13389  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13390                  "$dst {${mask}} {z}, ${src}{1to4}}",
13391                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13392                  VK4WM:$mask, i64mem:$src), 0, "att">;
13393
13394  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13395                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13396                  VR512:$src), 0, "att">;
13397  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13398                  "$dst {${mask}}, $src}",
13399                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13400                  VK8WM:$mask, VR512:$src), 0, "att">;
13401  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13402                  "$dst {${mask}} {z}, $src}",
13403                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13404                  VK8WM:$mask, VR512:$src), 0, "att">;
13405  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13406                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13407                  i64mem:$src), 0, "att">;
13408  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13409                  "$dst {${mask}}, ${src}{1to8}}",
13410                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13411                  VK8WM:$mask, i64mem:$src), 0, "att">;
13412  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13413                  "$dst {${mask}} {z}, ${src}{1to8}}",
13414                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13415                  VK8WM:$mask, i64mem:$src), 0, "att">;
13416}
13417
13418defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13419                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS,
13420                            EVEX_CD8<64, CD8VF>;
13421
13422defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13423                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD,
13424                            EVEX_CD8<64, CD8VF>;
13425
13426// Convert half to signed/unsigned int 32/64
13427defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13428                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13429                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13430defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13431                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13432                                   T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13433defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13434                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13435                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13436defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13437                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13438                                   T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13439
13440defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13441                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13442                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13443defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13444                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13445                        "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13446defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13447                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13448                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13449defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13450                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13451                        "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
13452
13453let Predicates = [HasFP16] in {
13454  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13455                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13456                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13457  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13458                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13459                                   T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13460  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13461                                    v8f16x_info, i32mem, loadi32,
13462                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13463  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13464                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13465                                    T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
13466  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13467              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13468
13469  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13470              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13471
13472
13473  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13474            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13475  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13476            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13477
13478  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13479            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13480  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13481            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13482
13483  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13484            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13485  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13486            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13487
13488  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13489            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13490  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13491            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13492
13493  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13494  // which produce unnecessary vmovsh instructions
13495  def : Pat<(v8f16 (X86Movsh
13496                     (v8f16 VR128X:$dst),
13497                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13498            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13499
13500  def : Pat<(v8f16 (X86Movsh
13501                     (v8f16 VR128X:$dst),
13502                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13503            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13504
13505  def : Pat<(v8f16 (X86Movsh
13506                     (v8f16 VR128X:$dst),
13507                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13508            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13509
13510  def : Pat<(v8f16 (X86Movsh
13511                     (v8f16 VR128X:$dst),
13512                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13513            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13514
13515  def : Pat<(v8f16 (X86Movsh
13516                     (v8f16 VR128X:$dst),
13517                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13518            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13519
13520  def : Pat<(v8f16 (X86Movsh
13521                     (v8f16 VR128X:$dst),
13522                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13523            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13524
13525  def : Pat<(v8f16 (X86Movsh
13526                     (v8f16 VR128X:$dst),
13527                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13528            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13529
13530  def : Pat<(v8f16 (X86Movsh
13531                     (v8f16 VR128X:$dst),
13532                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13533            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13534} // Predicates = [HasFP16]
13535
13536let Predicates = [HasFP16, HasVLX] in {
13537  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13538  // patterns have been disabled with null_frag.
13539  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13540            (VCVTQQ2PHZ256rr VR256X:$src)>;
13541  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13542                           VK4WM:$mask),
13543            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13544  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13545                           VK4WM:$mask),
13546            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13547
13548  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13549            (VCVTQQ2PHZ256rm addr:$src)>;
13550  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13551                           VK4WM:$mask),
13552            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13553  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13554                           VK4WM:$mask),
13555            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13556
13557  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13558            (VCVTQQ2PHZ256rmb addr:$src)>;
13559  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13560                           (v8f16 VR128X:$src0), VK4WM:$mask),
13561            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13562  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13563                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13564            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13565
13566  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13567            (VCVTQQ2PHZ128rr VR128X:$src)>;
13568  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13569                           VK2WM:$mask),
13570            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13571  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13572                           VK2WM:$mask),
13573            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13574
13575  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13576            (VCVTQQ2PHZ128rm addr:$src)>;
13577  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13578                           VK2WM:$mask),
13579            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13580  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13581                           VK2WM:$mask),
13582            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13583
13584  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13585            (VCVTQQ2PHZ128rmb addr:$src)>;
13586  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13587                           (v8f16 VR128X:$src0), VK2WM:$mask),
13588            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13589  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13590                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13591            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13592
13593  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13594  // patterns have been disabled with null_frag.
13595  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13596            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13597  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13598                           VK4WM:$mask),
13599            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13600  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13601                           VK4WM:$mask),
13602            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13603
13604  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13605            (VCVTUQQ2PHZ256rm addr:$src)>;
13606  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13607                           VK4WM:$mask),
13608            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13609  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13610                           VK4WM:$mask),
13611            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13612
13613  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13614            (VCVTUQQ2PHZ256rmb addr:$src)>;
13615  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13616                           (v8f16 VR128X:$src0), VK4WM:$mask),
13617            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13618  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13619                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13620            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13621
13622  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13623            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13624  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13625                           VK2WM:$mask),
13626            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13627  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13628                           VK2WM:$mask),
13629            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13630
13631  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13632            (VCVTUQQ2PHZ128rm addr:$src)>;
13633  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13634                           VK2WM:$mask),
13635            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13636  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13637                           VK2WM:$mask),
13638            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13639
13640  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13641            (VCVTUQQ2PHZ128rmb addr:$src)>;
13642  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13643                           (v8f16 VR128X:$src0), VK2WM:$mask),
13644            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13645  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13646                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13647            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13648}
13649
13650let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13651  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13652    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13653            (ins _.RC:$src2, _.RC:$src3),
13654            OpcodeStr, "$src3, $src2", "$src2, $src3",
13655            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13656
13657    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13658            (ins _.RC:$src2, _.MemOp:$src3),
13659            OpcodeStr, "$src3, $src2", "$src2, $src3",
13660            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13661
13662    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13663            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13664            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13665            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13666  }
13667} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13668
13669multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13670                                 X86VectorVTInfo _> {
13671  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13672  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13673          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13674          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13675          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13676          EVEX_4V, EVEX_B, EVEX_RC;
13677}
13678
13679
13680multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13681  let Predicates = [HasFP16] in {
13682    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13683                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13684                      EVEX_V512, Sched<[WriteFMAZ]>;
13685  }
13686  let Predicates = [HasVLX, HasFP16] in {
13687    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13688    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13689  }
13690}
13691
13692multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13693                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13694  let Predicates = [HasFP16] in {
13695    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13696                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13697                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13698                                       "", "@earlyclobber $dst">, EVEX_V512;
13699  }
13700  let Predicates = [HasVLX, HasFP16] in {
13701    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13702                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13703    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13704                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13705  }
13706}
13707
13708
13709let Uses = [MXCSR] in {
13710  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13711                                    T_MAP6XS, EVEX_CD8<32, CD8VF>;
13712  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13713                                    T_MAP6XD, EVEX_CD8<32, CD8VF>;
13714
13715  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13716                                         x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13717  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13718                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13719}
13720
13721
13722multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13723                                   bit IsCommutable> {
13724  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13725    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13726                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13727                        "$src3, $src2", "$src2, $src3",
13728                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13729                        Sched<[WriteFMAX]>;
13730    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13731                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13732                        "$src3, $src2", "$src2, $src3",
13733                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13734                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13735    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13736                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13737                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13738                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13739                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13740  }
13741}
13742
13743multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13744                                     SDNode OpNodeRnd, bit IsCommutable> {
13745  let Predicates = [HasFP16] in {
13746    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13747                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13748                        "$src2, $src1", "$src1, $src2",
13749                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13750                        IsCommutable, IsCommutable, IsCommutable,
13751                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13752    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13753                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13754                        "$src2, $src1", "$src1, $src2",
13755                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13756                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13757                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13758    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13759                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13760                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13761                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13762                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13763                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13764  }
13765}
13766
13767let Uses = [MXCSR] in {
13768  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13769                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13770  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13771                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13772
13773  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13774                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13775  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13776                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13777}
13778