xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrAVX512.td (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the X86 AVX512 instruction set, defining the
10// instructions, and properties of the instructions which are needed for code
11// generation, machine code emission, and analysis.
12//
13//===----------------------------------------------------------------------===//
14
15// Group template arguments that can be derived from the vector type (EltNum x
16// EltVT).  These are things like the register class for the writemask, etc.
17// The idea is to pass one of these as the template argument rather than the
18// individual arguments.
19// The template is also used for scalar types, in this case numelts is 1.
20class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
21                      string suffix = ""> {
22  RegisterClass RC = rc;
23  ValueType EltVT = eltvt;
24  int NumElts = numelts;
25
26  // Corresponding mask register class.
27  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
28
29  // Corresponding mask register pair class.
30  RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31                              !cast<RegisterOperand>("VK" # NumElts # "Pair"));
32
33  // Corresponding write-mask register class.
34  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
35
36  // The mask VT.
37  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
38
39  // Suffix used in the instruction mnemonic.
40  string Suffix = suffix;
41
42  // VTName is a string name for vector VT. For vector types it will be
43  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44  // It is a little bit complex for scalar types, where NumElts = 1.
45  // In this case we build v4f32 or v2f64
46  string VTName = "v" # !if (!eq (NumElts, 1),
47                        !if (!eq (EltVT.Size, 16), 8,
48                        !if (!eq (EltVT.Size, 32), 4,
49                        !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
50
51  // The vector VT.
52  ValueType VT = !cast<ValueType>(VTName);
53
54  string EltTypeName = !cast<string>(EltVT);
55  // Size of the element type in bits, e.g. 32 for v16i32.
56  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
57  int EltSize = EltVT.Size;
58
59  // "i" for integer types and "f" for floating-point types
60  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61
62  // Size of RC in bits, e.g. 512 for VR512.
63  int Size = VT.Size;
64
65  // The corresponding memory operand, e.g. i512mem for VR512.
66  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
67  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
68  // FP scalar memory operand for intrinsics - ssmem/sdmem.
69  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
70                           !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
71                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
72
73  // Load patterns
74  PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
75
76  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
77
78  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
79  PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
80
81  PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
82                               !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
83                               !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
84
85  // The string to specify embedded broadcast in assembly.
86  string BroadcastStr = "{1to" # NumElts # "}";
87
88  // 8-bit compressed displacement tuple/subvector format.  This is only
89  // defined for NumElts <= 8.
90  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
92
93  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94                          !if (!eq (Size, 256), sub_ymm, ?));
95
96  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
98                     !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
99                     SSEPackedInt)));
100
101  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
102                      !if (!eq (EltTypeName, "f16"), FR16X,
103                      FR64X));
104
105  dag ImmAllZerosV = (VT immAllZerosV);
106
107  string ZSuffix = !if (!eq (Size, 128), "Z128",
108                   !if (!eq (Size, 256), "Z256", "Z"));
109}
110
111def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
112def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
113def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
114def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
115def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
116def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
117def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
118
119// "x" in v32i8x_info means RC = VR256X
120def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
121def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
122def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
123def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
124def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
125def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
126def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
127
128def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
129def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
130def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
131def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
132def v8f16x_info  : X86VectorVTInfo<8,  f16, VR128X, "ph">;
133def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
134def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
135
136// We map scalar types to the smallest (128-bit) vector type
137// with the appropriate element type. This allows to use the same masking logic.
138def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
139def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
140def f16x_info    : X86VectorVTInfo<1,  f16, VR128X, "sh">;
141def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
142def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
143
144class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
145                           X86VectorVTInfo i128> {
146  X86VectorVTInfo info512 = i512;
147  X86VectorVTInfo info256 = i256;
148  X86VectorVTInfo info128 = i128;
149}
150
151def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
152                                             v16i8x_info>;
153def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
154                                             v8i16x_info>;
155def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
156                                             v4i32x_info>;
157def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
158                                             v2i64x_info>;
159def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
160                                             v8f16x_info>;
161def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
162                                             v4f32x_info>;
163def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
164                                             v2f64x_info>;
165
166class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
167                       ValueType _vt> {
168  RegisterClass KRC = _krc;
169  RegisterClass KRCWM = _krcwm;
170  ValueType KVT = _vt;
171}
172
173def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
174def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
175def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
176def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
177def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
178def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
179def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
180
181// Used for matching masked operations. Ensures the operation part only has a
182// single use.
183def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
184                           (vselect node:$mask, node:$src1, node:$src2), [{
185  return isProfitableToFormMaskedOp(N);
186}]>;
187
188def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
189                              (X86selects node:$mask, node:$src1, node:$src2), [{
190  return isProfitableToFormMaskedOp(N);
191}]>;
192
193// This multiclass generates the masking variants from the non-masking
194// variant.  It only provides the assembly pieces for the masking variants.
195// It assumes custom ISel patterns for masking which can be provided as
196// template arguments.
197multiclass AVX512_maskable_custom<bits<8> O, Format F,
198                                  dag Outs,
199                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
200                                  string OpcodeStr,
201                                  string AttSrcAsm, string IntelSrcAsm,
202                                  list<dag> Pattern,
203                                  list<dag> MaskingPattern,
204                                  list<dag> ZeroMaskingPattern,
205                                  string MaskingConstraint = "",
206                                  bit IsCommutable = 0,
207                                  bit IsKCommutable = 0,
208                                  bit IsKZCommutable = IsCommutable,
209                                  string ClobberConstraint = ""> {
210  let isCommutable = IsCommutable, Constraints = ClobberConstraint in
211    def NAME: AVX512<O, F, Outs, Ins,
212                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
213                                     "$dst, "#IntelSrcAsm#"}",
214                       Pattern>;
215
216  // Prefer over VMOV*rrk Pat<>
217  let isCommutable = IsKCommutable in
218    def NAME#k: AVX512<O, F, Outs, MaskingIns,
219                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
220                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
221                       MaskingPattern>,
222              EVEX_K {
223      // In case of the 3src subclass this is overridden with a let.
224      string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
225                               !if(!eq(MaskingConstraint, ""), ClobberConstraint,
226                                   !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
227    }
228
229  // Zero mask does not add any restrictions to commute operands transformation.
230  // So, it is Ok to use IsCommutable instead of IsKCommutable.
231  let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
232      Constraints = ClobberConstraint in
233    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
234                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
235                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
236                       ZeroMaskingPattern>,
237              EVEX_KZ;
238}
239
240
241// Common base class of AVX512_maskable and AVX512_maskable_3src.
242multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
243                                  dag Outs,
244                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
245                                  string OpcodeStr,
246                                  string AttSrcAsm, string IntelSrcAsm,
247                                  dag RHS, dag MaskingRHS,
248                                  SDPatternOperator Select = vselect_mask,
249                                  string MaskingConstraint = "",
250                                  bit IsCommutable = 0,
251                                  bit IsKCommutable = 0,
252                                  bit IsKZCommutable = IsCommutable,
253                                  string ClobberConstraint = ""> :
254  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
255                         AttSrcAsm, IntelSrcAsm,
256                         [(set _.RC:$dst, RHS)],
257                         [(set _.RC:$dst, MaskingRHS)],
258                         [(set _.RC:$dst,
259                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
260                         MaskingConstraint, IsCommutable,
261                         IsKCommutable, IsKZCommutable, ClobberConstraint>;
262
263// This multiclass generates the unconditional/non-masking, the masking and
264// the zero-masking variant of the vector instruction.  In the masking case, the
265// preserved vector elements come from a new dummy input operand tied to $dst.
266// This version uses a separate dag for non-masking and masking.
267multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
268                           dag Outs, dag Ins, string OpcodeStr,
269                           string AttSrcAsm, string IntelSrcAsm,
270                           dag RHS, dag MaskRHS,
271                           string ClobberConstraint = "",
272                           bit IsCommutable = 0, bit IsKCommutable = 0,
273                           bit IsKZCommutable = IsCommutable> :
274   AVX512_maskable_custom<O, F, Outs, Ins,
275                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
276                          !con((ins _.KRCWM:$mask), Ins),
277                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
278                          [(set _.RC:$dst, RHS)],
279                          [(set _.RC:$dst,
280                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
281                          [(set _.RC:$dst,
282                              (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
283                          "$src0 = $dst", IsCommutable, IsKCommutable,
284                          IsKZCommutable, ClobberConstraint>;
285
286// This multiclass generates the unconditional/non-masking, the masking and
287// the zero-masking variant of the vector instruction.  In the masking case, the
288// preserved vector elements come from a new dummy input operand tied to $dst.
289multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
290                           dag Outs, dag Ins, string OpcodeStr,
291                           string AttSrcAsm, string IntelSrcAsm,
292                           dag RHS,
293                           bit IsCommutable = 0, bit IsKCommutable = 0,
294                           bit IsKZCommutable = IsCommutable,
295                           SDPatternOperator Select = vselect_mask,
296                           string ClobberConstraint = ""> :
297   AVX512_maskable_common<O, F, _, Outs, Ins,
298                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
299                          !con((ins _.KRCWM:$mask), Ins),
300                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
301                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
302                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
303                          IsKZCommutable, ClobberConstraint>;
304
305// This multiclass generates the unconditional/non-masking, the masking and
306// the zero-masking variant of the scalar instruction.
307multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
308                           dag Outs, dag Ins, string OpcodeStr,
309                           string AttSrcAsm, string IntelSrcAsm,
310                           dag RHS> :
311   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
312                   RHS, 0, 0, 0, X86selects_mask>;
313
314// Similar to AVX512_maskable but in this case one of the source operands
315// ($src1) is already tied to $dst so we just use that for the preserved
316// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
317// $src1.
318multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
319                                dag Outs, dag NonTiedIns, string OpcodeStr,
320                                string AttSrcAsm, string IntelSrcAsm,
321                                dag RHS,
322                                bit IsCommutable = 0,
323                                bit IsKCommutable = 0,
324                                SDPatternOperator Select = vselect_mask,
325                                bit MaskOnly = 0> :
326   AVX512_maskable_common<O, F, _, Outs,
327                          !con((ins _.RC:$src1), NonTiedIns),
328                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
329                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
330                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
331                          !if(MaskOnly, (null_frag), RHS),
332                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
333                          Select, "", IsCommutable, IsKCommutable>;
334
335// Similar to AVX512_maskable_3src but in this case the input VT for the tied
336// operand differs from the output VT. This requires a bitconvert on
337// the preserved vector going into the vselect.
338// NOTE: The unmasked pattern is disabled.
339multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
340                                     X86VectorVTInfo InVT,
341                                     dag Outs, dag NonTiedIns, string OpcodeStr,
342                                     string AttSrcAsm, string IntelSrcAsm,
343                                     dag RHS, bit IsCommutable = 0> :
344   AVX512_maskable_common<O, F, OutVT, Outs,
345                          !con((ins InVT.RC:$src1), NonTiedIns),
346                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
347                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
348                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
349                          (vselect_mask InVT.KRCWM:$mask, RHS,
350                           (bitconvert InVT.RC:$src1)),
351                           vselect_mask, "", IsCommutable>;
352
353multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
354                                     dag Outs, dag NonTiedIns, string OpcodeStr,
355                                     string AttSrcAsm, string IntelSrcAsm,
356                                     dag RHS,
357                                     bit IsCommutable = 0,
358                                     bit IsKCommutable = 0,
359                                     bit MaskOnly = 0> :
360   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
361                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
362                        X86selects_mask, MaskOnly>;
363
364multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
365                                  dag Outs, dag Ins,
366                                  string OpcodeStr,
367                                  string AttSrcAsm, string IntelSrcAsm,
368                                  list<dag> Pattern> :
369   AVX512_maskable_custom<O, F, Outs, Ins,
370                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
371                          !con((ins _.KRCWM:$mask), Ins),
372                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
373                          "$src0 = $dst">;
374
375multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
376                                       dag Outs, dag NonTiedIns,
377                                       string OpcodeStr,
378                                       string AttSrcAsm, string IntelSrcAsm,
379                                       list<dag> Pattern> :
380   AVX512_maskable_custom<O, F, Outs,
381                          !con((ins _.RC:$src1), NonTiedIns),
382                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
383                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
384                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
385                          "">;
386
387// Instruction with mask that puts result in mask register,
388// like "compare" and "vptest"
389multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
390                                  dag Outs,
391                                  dag Ins, dag MaskingIns,
392                                  string OpcodeStr,
393                                  string AttSrcAsm, string IntelSrcAsm,
394                                  list<dag> Pattern,
395                                  list<dag> MaskingPattern,
396                                  bit IsCommutable = 0> {
397    let isCommutable = IsCommutable in {
398    def NAME: AVX512<O, F, Outs, Ins,
399                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
400                                     "$dst, "#IntelSrcAsm#"}",
401                       Pattern>;
402
403    def NAME#k: AVX512<O, F, Outs, MaskingIns,
404                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
405                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
406                       MaskingPattern>, EVEX_K;
407    }
408}
409
410multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
411                                  dag Outs,
412                                  dag Ins, dag MaskingIns,
413                                  string OpcodeStr,
414                                  string AttSrcAsm, string IntelSrcAsm,
415                                  dag RHS, dag MaskingRHS,
416                                  bit IsCommutable = 0> :
417  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
418                         AttSrcAsm, IntelSrcAsm,
419                         [(set _.KRC:$dst, RHS)],
420                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
421
422multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
423                           dag Outs, dag Ins, string OpcodeStr,
424                           string AttSrcAsm, string IntelSrcAsm,
425                           dag RHS, dag RHS_su, bit IsCommutable = 0> :
426   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
427                          !con((ins _.KRCWM:$mask), Ins),
428                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
429                          (and _.KRCWM:$mask, RHS_su), IsCommutable>;
430
431// Used by conversion instructions.
432multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
433                                  dag Outs,
434                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
435                                  string OpcodeStr,
436                                  string AttSrcAsm, string IntelSrcAsm,
437                                  dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
438  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
439                         AttSrcAsm, IntelSrcAsm,
440                         [(set _.RC:$dst, RHS)],
441                         [(set _.RC:$dst, MaskingRHS)],
442                         [(set _.RC:$dst, ZeroMaskingRHS)],
443                         "$src0 = $dst">;
444
445multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
446                               dag Outs, dag NonTiedIns, string OpcodeStr,
447                               string AttSrcAsm, string IntelSrcAsm,
448                               dag RHS, dag MaskingRHS, bit IsCommutable,
449                               bit IsKCommutable> :
450   AVX512_maskable_custom<O, F, Outs,
451                          !con((ins _.RC:$src1), NonTiedIns),
452                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
453                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
454                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
455                          [(set _.RC:$dst, RHS)],
456                          [(set _.RC:$dst,
457                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
458                          [(set _.RC:$dst,
459                            (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
460                          "", IsCommutable, IsKCommutable>;
461
462// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
463// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
464// swizzled by ExecutionDomainFix to pxor.
465// We set canFoldAsLoad because this can be converted to a constant-pool
466// load of an all-zeros value if folding it would be beneficial.
467let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
468    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
469def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
470               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
471def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
472               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
473}
474
475let Predicates = [HasAVX512] in {
476def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
477def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
478def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
479def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
480def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
481}
482
483// Alias instructions that allow VPTERNLOG to be used with a mask to create
484// a mix of all ones and all zeros elements. This is done this way to force
485// the same register to be used as input for all three sources.
486let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
487def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
488                                (ins VK16WM:$mask), "",
489                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
490                                                      (v16i32 immAllOnesV),
491                                                      (v16i32 immAllZerosV)))]>;
492def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
493                                (ins VK8WM:$mask), "",
494                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
495                                           (v8i64 immAllOnesV),
496                                           (v8i64 immAllZerosV)))]>;
497}
498
499let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
500    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
501def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
502               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
503def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
504               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
505}
506
507let Predicates = [HasAVX512] in {
508def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
509def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
510def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
511def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
512def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
513def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
514def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
515def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
516def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
517def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
518}
519
520let Predicates = [HasFP16] in {
521def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
522def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
523def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
524}
525
526// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
527// This is expanded by ExpandPostRAPseudos.
528let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
529    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
530  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
531                          [(set FR32X:$dst, fp32imm0)]>;
532  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
533                          [(set FR64X:$dst, fp64imm0)]>;
534  def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
535                            [(set VR128X:$dst, fp128imm0)]>;
536}
537
538let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
539    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
540  def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
541                          [(set FR16X:$dst, fp16imm0)]>;
542}
543
544//===----------------------------------------------------------------------===//
545// AVX-512 - VECTOR INSERT
546//
547
548// Supports two different pattern operators for mask and unmasked ops. Allows
549// null_frag to be passed for one.
550multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
551                                  X86VectorVTInfo To,
552                                  SDPatternOperator vinsert_insert,
553                                  SDPatternOperator vinsert_for_mask,
554                                  X86FoldableSchedWrite sched> {
555  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
556    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
557                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
558                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
559                   "$src3, $src2, $src1", "$src1, $src2, $src3",
560                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
561                                         (From.VT From.RC:$src2),
562                                         (iPTR imm)),
563                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
564                                           (From.VT From.RC:$src2),
565                                           (iPTR imm))>,
566                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
567    let mayLoad = 1 in
568    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
569                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
570                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
571                   "$src3, $src2, $src1", "$src1, $src2, $src3",
572                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
573                               (From.VT (From.LdFrag addr:$src2)),
574                               (iPTR imm)),
575                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
576                               (From.VT (From.LdFrag addr:$src2)),
577                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
578                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
579                   Sched<[sched.Folded, sched.ReadAfterFold]>;
580  }
581}
582
583// Passes the same pattern operator for masked and unmasked ops.
584multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
585                            X86VectorVTInfo To,
586                            SDPatternOperator vinsert_insert,
587                            X86FoldableSchedWrite sched> :
588  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
589
590multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
591                       X86VectorVTInfo To, PatFrag vinsert_insert,
592                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
593  let Predicates = p in {
594    def : Pat<(vinsert_insert:$ins
595                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
596              (To.VT (!cast<Instruction>(InstrStr#"rr")
597                     To.RC:$src1, From.RC:$src2,
598                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
599
600    def : Pat<(vinsert_insert:$ins
601                  (To.VT To.RC:$src1),
602                  (From.VT (From.LdFrag addr:$src2)),
603                  (iPTR imm)),
604              (To.VT (!cast<Instruction>(InstrStr#"rm")
605                  To.RC:$src1, addr:$src2,
606                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
607  }
608}
609
610multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
611                            ValueType EltVT64, int Opcode256,
612                            X86FoldableSchedWrite sched> {
613
614  let Predicates = [HasVLX] in
615    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
616                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
617                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
618                                 vinsert128_insert, sched>, EVEX_V256;
619
620  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
621                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
622                                 X86VectorVTInfo<16, EltVT32, VR512>,
623                                 vinsert128_insert, sched>, EVEX_V512;
624
625  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
626                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
627                                 X86VectorVTInfo< 8, EltVT64, VR512>,
628                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
629
630  // Even with DQI we'd like to only use these instructions for masking.
631  let Predicates = [HasVLX, HasDQI] in
632    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
633                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
634                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
635                                   null_frag, vinsert128_insert, sched>,
636                                   VEX_W1X, EVEX_V256;
637
638  // Even with DQI we'd like to only use these instructions for masking.
639  let Predicates = [HasDQI] in {
640    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
641                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
642                                 X86VectorVTInfo< 8, EltVT64, VR512>,
643                                 null_frag, vinsert128_insert, sched>,
644                                 VEX_W, EVEX_V512;
645
646    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
647                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
648                                   X86VectorVTInfo<16, EltVT32, VR512>,
649                                   null_frag, vinsert256_insert, sched>,
650                                   EVEX_V512;
651  }
652}
653
654// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
655defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
656defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
657
658// Codegen pattern with the alternative types,
659// Even with AVX512DQ we'll still use these for unmasked operations.
660defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
661              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
662defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
663              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
664
665defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
666              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
667defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
668              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
669
670defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
671              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
672defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
673              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
674
675// Codegen pattern with the alternative types insert VEC128 into VEC256
676defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
677              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
678defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
679              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
680defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
681              vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
682// Codegen pattern with the alternative types insert VEC128 into VEC512
683defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
684              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
685defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
686               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
687defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
688              vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
689// Codegen pattern with the alternative types insert VEC256 into VEC512
690defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
691              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
692defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
693              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
694defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
695              vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
696
697
698multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
699                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
700                                 PatFrag vinsert_insert,
701                                 SDNodeXForm INSERT_get_vinsert_imm,
702                                 list<Predicate> p> {
703let Predicates = p in {
704  def : Pat<(Cast.VT
705             (vselect_mask Cast.KRCWM:$mask,
706                           (bitconvert
707                            (vinsert_insert:$ins (To.VT To.RC:$src1),
708                                                 (From.VT From.RC:$src2),
709                                                 (iPTR imm))),
710                           Cast.RC:$src0)),
711            (!cast<Instruction>(InstrStr#"rrk")
712             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
713             (INSERT_get_vinsert_imm To.RC:$ins))>;
714  def : Pat<(Cast.VT
715             (vselect_mask Cast.KRCWM:$mask,
716                           (bitconvert
717                            (vinsert_insert:$ins (To.VT To.RC:$src1),
718                                                 (From.VT
719                                                  (bitconvert
720                                                   (From.LdFrag addr:$src2))),
721                                                 (iPTR imm))),
722                           Cast.RC:$src0)),
723            (!cast<Instruction>(InstrStr#"rmk")
724             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
725             (INSERT_get_vinsert_imm To.RC:$ins))>;
726
727  def : Pat<(Cast.VT
728             (vselect_mask Cast.KRCWM:$mask,
729                           (bitconvert
730                            (vinsert_insert:$ins (To.VT To.RC:$src1),
731                                                 (From.VT From.RC:$src2),
732                                                 (iPTR imm))),
733                           Cast.ImmAllZerosV)),
734            (!cast<Instruction>(InstrStr#"rrkz")
735             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
736             (INSERT_get_vinsert_imm To.RC:$ins))>;
737  def : Pat<(Cast.VT
738             (vselect_mask Cast.KRCWM:$mask,
739                           (bitconvert
740                            (vinsert_insert:$ins (To.VT To.RC:$src1),
741                                                 (From.VT (From.LdFrag addr:$src2)),
742                                                 (iPTR imm))),
743                           Cast.ImmAllZerosV)),
744            (!cast<Instruction>(InstrStr#"rmkz")
745             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
746             (INSERT_get_vinsert_imm To.RC:$ins))>;
747}
748}
749
750defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
751                             v8f32x_info, vinsert128_insert,
752                             INSERT_get_vinsert128_imm, [HasVLX]>;
753defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
754                             v4f64x_info, vinsert128_insert,
755                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
756
757defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
758                             v8i32x_info, vinsert128_insert,
759                             INSERT_get_vinsert128_imm, [HasVLX]>;
760defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
761                             v8i32x_info, vinsert128_insert,
762                             INSERT_get_vinsert128_imm, [HasVLX]>;
763defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
764                             v8i32x_info, vinsert128_insert,
765                             INSERT_get_vinsert128_imm, [HasVLX]>;
766defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
767                             v4i64x_info, vinsert128_insert,
768                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
769defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
770                             v4i64x_info, vinsert128_insert,
771                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
772defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
773                             v4i64x_info, vinsert128_insert,
774                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
775
776defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
777                             v16f32_info, vinsert128_insert,
778                             INSERT_get_vinsert128_imm, [HasAVX512]>;
779defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
780                             v8f64_info, vinsert128_insert,
781                             INSERT_get_vinsert128_imm, [HasDQI]>;
782
783defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
784                             v16i32_info, vinsert128_insert,
785                             INSERT_get_vinsert128_imm, [HasAVX512]>;
786defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
787                             v16i32_info, vinsert128_insert,
788                             INSERT_get_vinsert128_imm, [HasAVX512]>;
789defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
790                             v16i32_info, vinsert128_insert,
791                             INSERT_get_vinsert128_imm, [HasAVX512]>;
792defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
793                             v8i64_info, vinsert128_insert,
794                             INSERT_get_vinsert128_imm, [HasDQI]>;
795defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
796                             v8i64_info, vinsert128_insert,
797                             INSERT_get_vinsert128_imm, [HasDQI]>;
798defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
799                             v8i64_info, vinsert128_insert,
800                             INSERT_get_vinsert128_imm, [HasDQI]>;
801
802defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
803                             v16f32_info, vinsert256_insert,
804                             INSERT_get_vinsert256_imm, [HasDQI]>;
805defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
806                             v8f64_info, vinsert256_insert,
807                             INSERT_get_vinsert256_imm, [HasAVX512]>;
808
809defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
810                             v16i32_info, vinsert256_insert,
811                             INSERT_get_vinsert256_imm, [HasDQI]>;
812defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
813                             v16i32_info, vinsert256_insert,
814                             INSERT_get_vinsert256_imm, [HasDQI]>;
815defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
816                             v16i32_info, vinsert256_insert,
817                             INSERT_get_vinsert256_imm, [HasDQI]>;
818defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
819                             v8i64_info, vinsert256_insert,
820                             INSERT_get_vinsert256_imm, [HasAVX512]>;
821defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
822                             v8i64_info, vinsert256_insert,
823                             INSERT_get_vinsert256_imm, [HasAVX512]>;
824defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
825                             v8i64_info, vinsert256_insert,
826                             INSERT_get_vinsert256_imm, [HasAVX512]>;
827
828// vinsertps - insert f32 to XMM
829let ExeDomain = SSEPackedSingle in {
830let isCommutable = 1 in
831def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
832      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
833      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
834      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
835      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
836def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
837      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
838      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
839      [(set VR128X:$dst, (X86insertps VR128X:$src1,
840                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
841                          timm:$src3))]>,
842      EVEX_4V, EVEX_CD8<32, CD8VT1>,
843      Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
844}
845
846//===----------------------------------------------------------------------===//
847// AVX-512 VECTOR EXTRACT
848//---
849
850// Supports two different pattern operators for mask and unmasked ops. Allows
851// null_frag to be passed for one.
852multiclass vextract_for_size_split<int Opcode,
853                                   X86VectorVTInfo From, X86VectorVTInfo To,
854                                   SDPatternOperator vextract_extract,
855                                   SDPatternOperator vextract_for_mask,
856                                   SchedWrite SchedRR, SchedWrite SchedMR> {
857
858  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
859    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
860                (ins From.RC:$src1, u8imm:$idx),
861                "vextract" # To.EltTypeName # "x" # To.NumElts,
862                "$idx, $src1", "$src1, $idx",
863                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
864                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
865                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
866
867    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
868                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
869                    "vextract" # To.EltTypeName # "x" # To.NumElts #
870                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
871                    [(store (To.VT (vextract_extract:$idx
872                                    (From.VT From.RC:$src1), (iPTR imm))),
873                             addr:$dst)]>, EVEX,
874                    Sched<[SchedMR]>;
875
876    let mayStore = 1, hasSideEffects = 0 in
877    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
878                    (ins To.MemOp:$dst, To.KRCWM:$mask,
879                                        From.RC:$src1, u8imm:$idx),
880                     "vextract" # To.EltTypeName # "x" # To.NumElts #
881                          "\t{$idx, $src1, $dst {${mask}}|"
882                          "$dst {${mask}}, $src1, $idx}", []>,
883                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
884  }
885}
886
887// Passes the same pattern operator for masked and unmasked ops.
888multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
889                             X86VectorVTInfo To,
890                             SDPatternOperator vextract_extract,
891                             SchedWrite SchedRR, SchedWrite SchedMR> :
892  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
893
894// Codegen pattern for the alternative types
895multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
896                X86VectorVTInfo To, PatFrag vextract_extract,
897                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
898  let Predicates = p in {
899     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
900               (To.VT (!cast<Instruction>(InstrStr#"rr")
901                          From.RC:$src1,
902                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
903     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
904                              (iPTR imm))), addr:$dst),
905               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
906                (EXTRACT_get_vextract_imm To.RC:$ext))>;
907  }
908}
909
910multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
911                             ValueType EltVT64, int Opcode256,
912                             SchedWrite SchedRR, SchedWrite SchedMR> {
913  let Predicates = [HasAVX512] in {
914    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
915                                   X86VectorVTInfo<16, EltVT32, VR512>,
916                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
917                                   vextract128_extract, SchedRR, SchedMR>,
918                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
919    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
920                                   X86VectorVTInfo< 8, EltVT64, VR512>,
921                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
922                                   vextract256_extract, SchedRR, SchedMR>,
923                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
924  }
925  let Predicates = [HasVLX] in
926    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
927                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
928                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
929                                 vextract128_extract, SchedRR, SchedMR>,
930                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
931
932  // Even with DQI we'd like to only use these instructions for masking.
933  let Predicates = [HasVLX, HasDQI] in
934    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
935                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
936                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
937                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
938                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
939
940  // Even with DQI we'd like to only use these instructions for masking.
941  let Predicates = [HasDQI] in {
942    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
943                                 X86VectorVTInfo< 8, EltVT64, VR512>,
944                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
945                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
946                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
947    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
948                                 X86VectorVTInfo<16, EltVT32, VR512>,
949                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
950                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
951                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
952  }
953}
954
955// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
956defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
957defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
958
959// extract_subvector codegen patterns with the alternative types.
960// Even with AVX512DQ we'll still use these for unmasked operations.
961defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
962          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
963defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
964          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
965
966defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
967          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
968defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
969          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
970
971defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
972          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
973defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
974          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
975
976// Codegen pattern with the alternative types extract VEC128 from VEC256
977defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
978          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
979defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
980          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
981defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
982          vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
983
984// Codegen pattern with the alternative types extract VEC128 from VEC512
985defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
986                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
987defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
988                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
989defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
990                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
991// Codegen pattern with the alternative types extract VEC256 from VEC512
992defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
993                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
994defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
995                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
996defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
997                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
998
999
1000// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1001// smaller extract to enable EVEX->VEX.
1002let Predicates = [NoVLX] in {
1003def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1004          (v2i64 (VEXTRACTI128rr
1005                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1006                  (iPTR 1)))>;
1007def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1008          (v2f64 (VEXTRACTF128rr
1009                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1010                  (iPTR 1)))>;
1011def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1012          (v4i32 (VEXTRACTI128rr
1013                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1014                  (iPTR 1)))>;
1015def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1016          (v4f32 (VEXTRACTF128rr
1017                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1018                  (iPTR 1)))>;
1019def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1020          (v8i16 (VEXTRACTI128rr
1021                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1022                  (iPTR 1)))>;
1023def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1024          (v16i8 (VEXTRACTI128rr
1025                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1026                  (iPTR 1)))>;
1027}
1028
1029// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
1030// smaller extract to enable EVEX->VEX.
1031let Predicates = [HasVLX] in {
1032def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
1033          (v2i64 (VEXTRACTI32x4Z256rr
1034                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
1035                  (iPTR 1)))>;
1036def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
1037          (v2f64 (VEXTRACTF32x4Z256rr
1038                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1039                  (iPTR 1)))>;
1040def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1041          (v4i32 (VEXTRACTI32x4Z256rr
1042                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1043                  (iPTR 1)))>;
1044def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1045          (v4f32 (VEXTRACTF32x4Z256rr
1046                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1047                  (iPTR 1)))>;
1048def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1049          (v8i16 (VEXTRACTI32x4Z256rr
1050                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1051                  (iPTR 1)))>;
1052def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1053          (v16i8 (VEXTRACTI32x4Z256rr
1054                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1055                  (iPTR 1)))>;
1056}
1057
1058let Predicates = [HasFP16, HasVLX] in
1059def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
1060          (v8f16 (VEXTRACTF32x4Z256rr
1061                  (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
1062                  (iPTR 1)))>;
1063
1064
1065// Additional patterns for handling a bitcast between the vselect and the
1066// extract_subvector.
1067multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1068                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1069                                  PatFrag vextract_extract,
1070                                  SDNodeXForm EXTRACT_get_vextract_imm,
1071                                  list<Predicate> p> {
1072let Predicates = p in {
1073  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1074                                   (bitconvert
1075                                    (To.VT (vextract_extract:$ext
1076                                            (From.VT From.RC:$src), (iPTR imm)))),
1077                                   To.RC:$src0)),
1078            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1079                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1080                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1081
1082  def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1083                                   (bitconvert
1084                                    (To.VT (vextract_extract:$ext
1085                                            (From.VT From.RC:$src), (iPTR imm)))),
1086                                   Cast.ImmAllZerosV)),
1087            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1088                      Cast.KRCWM:$mask, From.RC:$src,
1089                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1090}
1091}
1092
1093defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1094                              v4f32x_info, vextract128_extract,
1095                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1096defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1097                              v2f64x_info, vextract128_extract,
1098                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1099
1100defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1101                              v4i32x_info, vextract128_extract,
1102                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1103defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1104                              v4i32x_info, vextract128_extract,
1105                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1106defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1107                              v4i32x_info, vextract128_extract,
1108                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1109defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1110                              v2i64x_info, vextract128_extract,
1111                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1112defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1113                              v2i64x_info, vextract128_extract,
1114                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1115defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1116                              v2i64x_info, vextract128_extract,
1117                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1118
1119defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1120                              v4f32x_info, vextract128_extract,
1121                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1122defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1123                              v2f64x_info, vextract128_extract,
1124                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1125
1126defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1127                              v4i32x_info, vextract128_extract,
1128                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1129defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1130                              v4i32x_info, vextract128_extract,
1131                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1132defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1133                              v4i32x_info, vextract128_extract,
1134                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1135defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1136                              v2i64x_info, vextract128_extract,
1137                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1138defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1139                              v2i64x_info, vextract128_extract,
1140                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1141defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1142                              v2i64x_info, vextract128_extract,
1143                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1144
1145defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1146                              v8f32x_info, vextract256_extract,
1147                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1148defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1149                              v4f64x_info, vextract256_extract,
1150                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1151
1152defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1153                              v8i32x_info, vextract256_extract,
1154                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1155defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1156                              v8i32x_info, vextract256_extract,
1157                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1158defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1159                              v8i32x_info, vextract256_extract,
1160                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1161defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1162                              v4i64x_info, vextract256_extract,
1163                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1164defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1165                              v4i64x_info, vextract256_extract,
1166                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1167defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1168                              v4i64x_info, vextract256_extract,
1169                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1170
1171// vextractps - extract 32 bits from XMM
1172def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
1173      (ins VR128X:$src1, u8imm:$src2),
1174      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1175      [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1176      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1177
1178def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1179      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1180      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1181      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1182                          addr:$dst)]>,
1183      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1184
1185//===---------------------------------------------------------------------===//
1186// AVX-512 BROADCAST
1187//---
1188// broadcast with a scalar argument.
1189multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
1190                                   X86VectorVTInfo SrcInfo> {
1191  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1192            (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1193             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1194  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1195                                       (X86VBroadcast SrcInfo.FRC:$src),
1196                                       DestInfo.RC:$src0)),
1197            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1198             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1199             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1200  def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1201                                       (X86VBroadcast SrcInfo.FRC:$src),
1202                                       DestInfo.ImmAllZerosV)),
1203            (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1204             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1205}
1206
1207// Split version to allow mask and broadcast node to be different types. This
1208// helps support the 32x2 broadcasts.
1209multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1210                                     SchedWrite SchedRR, SchedWrite SchedRM,
1211                                     X86VectorVTInfo MaskInfo,
1212                                     X86VectorVTInfo DestInfo,
1213                                     X86VectorVTInfo SrcInfo,
1214                                     bit IsConvertibleToThreeAddress,
1215                                     SDPatternOperator UnmaskedOp = X86VBroadcast,
1216                                     SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1217  let hasSideEffects = 0 in
1218  def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1219                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1220                    [(set MaskInfo.RC:$dst,
1221                      (MaskInfo.VT
1222                       (bitconvert
1223                        (DestInfo.VT
1224                         (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1225                    DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1226  def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1227                      (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1228                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1229                       "${dst} {${mask}} {z}, $src}"),
1230                       [(set MaskInfo.RC:$dst,
1231                         (vselect_mask MaskInfo.KRCWM:$mask,
1232                          (MaskInfo.VT
1233                           (bitconvert
1234                            (DestInfo.VT
1235                             (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1236                          MaskInfo.ImmAllZerosV))],
1237                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1238  let Constraints = "$src0 = $dst" in
1239  def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1240                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1241                          SrcInfo.RC:$src),
1242                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1243                     "${dst} {${mask}}, $src}"),
1244                     [(set MaskInfo.RC:$dst,
1245                       (vselect_mask MaskInfo.KRCWM:$mask,
1246                        (MaskInfo.VT
1247                         (bitconvert
1248                          (DestInfo.VT
1249                           (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1250                        MaskInfo.RC:$src0))],
1251                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1252
1253  let hasSideEffects = 0, mayLoad = 1 in
1254  def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1255                    (ins SrcInfo.ScalarMemOp:$src),
1256                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1257                    [(set MaskInfo.RC:$dst,
1258                      (MaskInfo.VT
1259                       (bitconvert
1260                        (DestInfo.VT
1261                         (UnmaskedBcastOp addr:$src)))))],
1262                    DestInfo.ExeDomain>, T8PD, EVEX,
1263                    EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1264
1265  def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1266                      (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1267                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1268                       "${dst} {${mask}} {z}, $src}"),
1269                       [(set MaskInfo.RC:$dst,
1270                         (vselect_mask MaskInfo.KRCWM:$mask,
1271                          (MaskInfo.VT
1272                           (bitconvert
1273                            (DestInfo.VT
1274                             (SrcInfo.BroadcastLdFrag addr:$src)))),
1275                          MaskInfo.ImmAllZerosV))],
1276                       DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1277                       EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1278
1279  let Constraints = "$src0 = $dst",
1280      isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1281  def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1282                     (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1283                          SrcInfo.ScalarMemOp:$src),
1284                     !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1285                     "${dst} {${mask}}, $src}"),
1286                     [(set MaskInfo.RC:$dst,
1287                       (vselect_mask MaskInfo.KRCWM:$mask,
1288                        (MaskInfo.VT
1289                         (bitconvert
1290                          (DestInfo.VT
1291                           (SrcInfo.BroadcastLdFrag addr:$src)))),
1292                        MaskInfo.RC:$src0))],
1293                      DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1294                      EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1295}
1296
1297// Helper class to force mask and broadcast result to same type.
1298multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1299                               SchedWrite SchedRR, SchedWrite SchedRM,
1300                               X86VectorVTInfo DestInfo,
1301                               X86VectorVTInfo SrcInfo,
1302                               bit IsConvertibleToThreeAddress> :
1303  avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1304                            DestInfo, DestInfo, SrcInfo,
1305                            IsConvertibleToThreeAddress>;
1306
1307multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1308                                  AVX512VLVectorVTInfo _> {
1309  let Predicates = [HasAVX512] in {
1310    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1311                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1312              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1313              EVEX_V512;
1314  }
1315
1316  let Predicates = [HasVLX] in {
1317    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1318                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1319                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1320                 EVEX_V256;
1321  }
1322}
1323
1324multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1325                                  AVX512VLVectorVTInfo _> {
1326  let Predicates = [HasAVX512] in {
1327    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1328                                  WriteFShuffle256Ld, _.info512, _.info128, 1>,
1329              avx512_broadcast_scalar<NAME, _.info512, _.info128>,
1330              EVEX_V512;
1331  }
1332
1333  let Predicates = [HasVLX] in {
1334    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1335                                     WriteFShuffle256Ld, _.info256, _.info128, 1>,
1336                 avx512_broadcast_scalar<NAME, _.info256, _.info128>,
1337                 EVEX_V256;
1338    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1339                                     WriteFShuffle256Ld, _.info128, _.info128, 1>,
1340                 avx512_broadcast_scalar<NAME, _.info128, _.info128>,
1341                 EVEX_V128;
1342  }
1343}
1344defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1345                                       avx512vl_f32_info>;
1346defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1347                                       avx512vl_f64_info>, VEX_W1X;
1348
1349multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1350                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1351                                    RegisterClass SrcRC> {
1352  // Fold with a mask even if it has multiple uses since it is cheap.
1353  let ExeDomain = _.ExeDomain in
1354  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1355                          (ins SrcRC:$src),
1356                          "vpbroadcast"#_.Suffix, "$src", "$src",
1357                          (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1358                          /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1359                          T8PD, EVEX, Sched<[SchedRR]>;
1360}
1361
1362multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1363                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1364                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1365  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1366  defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1367                         (outs _.RC:$dst), (ins GR32:$src),
1368                         !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1369                         !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1370                         "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1371                         "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1372
1373  def : Pat <(_.VT (OpNode SrcRC:$src)),
1374             (!cast<Instruction>(Name#rr)
1375              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1376
1377  // Fold with a mask even if it has multiple uses since it is cheap.
1378  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1379             (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1380              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1381
1382  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1383             (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1384              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1385}
1386
1387multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1388                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1389                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1390  let Predicates = [prd] in
1391    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1392              OpNode, SrcRC, Subreg>, EVEX_V512;
1393  let Predicates = [prd, HasVLX] in {
1394    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1395              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1396    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1397              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1398  }
1399}
1400
1401multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1402                                       SDPatternOperator OpNode,
1403                                       RegisterClass SrcRC, Predicate prd> {
1404  let Predicates = [prd] in
1405    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1406                                      SrcRC>, EVEX_V512;
1407  let Predicates = [prd, HasVLX] in {
1408    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1409                                         SrcRC>, EVEX_V256;
1410    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1411                                         SrcRC>, EVEX_V128;
1412  }
1413}
1414
1415defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1416                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1417defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1418                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1419                       HasBWI>;
1420defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1421                                                 X86VBroadcast, GR32, HasAVX512>;
1422defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1423                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1424
1425multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1426                                      AVX512VLVectorVTInfo _, Predicate prd,
1427                                      bit IsConvertibleToThreeAddress> {
1428  let Predicates = [prd] in {
1429    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1430                                   WriteShuffle256Ld, _.info512, _.info128,
1431                                   IsConvertibleToThreeAddress>,
1432                                  EVEX_V512;
1433  }
1434  let Predicates = [prd, HasVLX] in {
1435    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1436                                    WriteShuffle256Ld, _.info256, _.info128,
1437                                    IsConvertibleToThreeAddress>,
1438                                 EVEX_V256;
1439    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1440                                    WriteShuffleXLd, _.info128, _.info128,
1441                                    IsConvertibleToThreeAddress>,
1442                                 EVEX_V128;
1443  }
1444}
1445
1446defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1447                                           avx512vl_i8_info, HasBWI, 0>;
1448defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1449                                           avx512vl_i16_info, HasBWI, 0>;
1450defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1451                                           avx512vl_i32_info, HasAVX512, 1>;
1452defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1453                                           avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1454
1455multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1456                                      SDPatternOperator OpNode,
1457                                      X86VectorVTInfo _Dst,
1458                                      X86VectorVTInfo _Src> {
1459  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1460                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1461                           (_Dst.VT (OpNode addr:$src))>,
1462                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1463                           AVX5128IBase, EVEX;
1464}
1465
1466// This should be used for the AVX512DQ broadcast instructions. It disables
1467// the unmasked patterns so that we only use the DQ instructions when masking
1468//  is requested.
1469multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1470                                         SDPatternOperator OpNode,
1471                                         X86VectorVTInfo _Dst,
1472                                         X86VectorVTInfo _Src> {
1473  let hasSideEffects = 0, mayLoad = 1 in
1474  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1475                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1476                           (null_frag),
1477                           (_Dst.VT (OpNode addr:$src))>,
1478                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1479                           AVX5128IBase, EVEX;
1480}
1481let Predicates = [HasFP16] in {
1482  def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
1483            (VPBROADCASTWZrm addr:$src)>;
1484
1485  def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
1486            (VPBROADCASTWZrr VR128X:$src)>;
1487  def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
1488            (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1489}
1490let Predicates = [HasVLX, HasFP16] in {
1491  def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
1492            (VPBROADCASTWZ128rm addr:$src)>;
1493  def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
1494            (VPBROADCASTWZ256rm addr:$src)>;
1495
1496  def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
1497            (VPBROADCASTWZ128rr VR128X:$src)>;
1498  def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
1499            (VPBROADCASTWZ256rr VR128X:$src)>;
1500
1501  def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
1502            (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1503  def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
1504            (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
1505}
1506
1507//===----------------------------------------------------------------------===//
1508// AVX-512 BROADCAST SUBVECTORS
1509//
1510
1511defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1512                       X86SubVBroadcastld128, v16i32_info, v4i32x_info>,
1513                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1514defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1515                       X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
1516                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1517defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1518                       X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
1519                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1520defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1521                       X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
1522                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1523
1524let Predicates = [HasAVX512] in {
1525def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
1526          (VBROADCASTF64X4rm addr:$src)>;
1527def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
1528          (VBROADCASTF64X4rm addr:$src)>;
1529def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
1530          (VBROADCASTF64X4rm addr:$src)>;
1531def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
1532          (VBROADCASTI64X4rm addr:$src)>;
1533def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
1534          (VBROADCASTI64X4rm addr:$src)>;
1535def : Pat<(v32i16 (X86SubVBroadcastld256 addr:$src)),
1536          (VBROADCASTI64X4rm addr:$src)>;
1537def : Pat<(v64i8 (X86SubVBroadcastld256 addr:$src)),
1538          (VBROADCASTI64X4rm addr:$src)>;
1539
1540def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
1541          (VBROADCASTF32X4rm addr:$src)>;
1542def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
1543          (VBROADCASTF32X4rm addr:$src)>;
1544def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
1545          (VBROADCASTF32X4rm addr:$src)>;
1546def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
1547          (VBROADCASTI32X4rm addr:$src)>;
1548def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
1549          (VBROADCASTI32X4rm addr:$src)>;
1550def : Pat<(v32i16 (X86SubVBroadcastld128 addr:$src)),
1551          (VBROADCASTI32X4rm addr:$src)>;
1552def : Pat<(v64i8 (X86SubVBroadcastld128 addr:$src)),
1553          (VBROADCASTI32X4rm addr:$src)>;
1554
1555// Patterns for selects of bitcasted operations.
1556def : Pat<(vselect_mask VK16WM:$mask,
1557                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1558                        (v16f32 immAllZerosV)),
1559          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1560def : Pat<(vselect_mask VK16WM:$mask,
1561                        (bc_v16f32 (v8f64 (X86SubVBroadcastld128 addr:$src))),
1562                        VR512:$src0),
1563          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1564def : Pat<(vselect_mask VK16WM:$mask,
1565                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1566                        (v16i32 immAllZerosV)),
1567          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1568def : Pat<(vselect_mask VK16WM:$mask,
1569                        (bc_v16i32 (v8i64 (X86SubVBroadcastld128 addr:$src))),
1570                        VR512:$src0),
1571          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1572
1573def : Pat<(vselect_mask VK8WM:$mask,
1574                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1575                        (v8f64 immAllZerosV)),
1576          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1577def : Pat<(vselect_mask VK8WM:$mask,
1578                        (bc_v8f64 (v16f32 (X86SubVBroadcastld256 addr:$src))),
1579                        VR512:$src0),
1580          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1581def : Pat<(vselect_mask VK8WM:$mask,
1582                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1583                        (v8i64 immAllZerosV)),
1584          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1585def : Pat<(vselect_mask VK8WM:$mask,
1586                        (bc_v8i64 (v16i32 (X86SubVBroadcastld256 addr:$src))),
1587                        VR512:$src0),
1588          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1589}
1590
1591let Predicates = [HasVLX] in {
1592defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1593                           X86SubVBroadcastld128, v8i32x_info, v4i32x_info>,
1594                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1595defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1596                           X86SubVBroadcastld128, v8f32x_info, v4f32x_info>,
1597                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1598
1599def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
1600          (VBROADCASTF32X4Z256rm addr:$src)>;
1601def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
1602          (VBROADCASTF32X4Z256rm addr:$src)>;
1603def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
1604          (VBROADCASTF32X4Z256rm addr:$src)>;
1605def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
1606          (VBROADCASTI32X4Z256rm addr:$src)>;
1607def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
1608          (VBROADCASTI32X4Z256rm addr:$src)>;
1609def : Pat<(v16i16 (X86SubVBroadcastld128 addr:$src)),
1610          (VBROADCASTI32X4Z256rm addr:$src)>;
1611def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
1612          (VBROADCASTI32X4Z256rm addr:$src)>;
1613
1614// Patterns for selects of bitcasted operations.
1615def : Pat<(vselect_mask VK8WM:$mask,
1616                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1617                        (v8f32 immAllZerosV)),
1618          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1619def : Pat<(vselect_mask VK8WM:$mask,
1620                        (bc_v8f32 (v4f64 (X86SubVBroadcastld128 addr:$src))),
1621                        VR256X:$src0),
1622          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1623def : Pat<(vselect_mask VK8WM:$mask,
1624                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1625                        (v8i32 immAllZerosV)),
1626          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1627def : Pat<(vselect_mask VK8WM:$mask,
1628                        (bc_v8i32 (v4i64 (X86SubVBroadcastld128 addr:$src))),
1629                        VR256X:$src0),
1630          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1631}
1632
1633let Predicates = [HasVLX, HasDQI] in {
1634defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1635                           X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,
1636                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1637defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1638                           X86SubVBroadcastld128, v4f64x_info, v2f64x_info>, VEX_W1X,
1639                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1640
1641// Patterns for selects of bitcasted operations.
1642def : Pat<(vselect_mask VK4WM:$mask,
1643                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1644                        (v4f64 immAllZerosV)),
1645          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1646def : Pat<(vselect_mask VK4WM:$mask,
1647                        (bc_v4f64 (v8f32 (X86SubVBroadcastld128 addr:$src))),
1648                        VR256X:$src0),
1649          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1650def : Pat<(vselect_mask VK4WM:$mask,
1651                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1652                        (v4i64 immAllZerosV)),
1653          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1654def : Pat<(vselect_mask VK4WM:$mask,
1655                        (bc_v4i64 (v8i32 (X86SubVBroadcastld128 addr:$src))),
1656                        VR256X:$src0),
1657          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1658}
1659
1660let Predicates = [HasDQI] in {
1661defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1662                       X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
1663                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1664defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1665                       X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
1666                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1667defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1668                       X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
1669                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1670defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1671                       X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
1672                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1673
1674// Patterns for selects of bitcasted operations.
1675def : Pat<(vselect_mask VK16WM:$mask,
1676                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1677                        (v16f32 immAllZerosV)),
1678          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1679def : Pat<(vselect_mask VK16WM:$mask,
1680                        (bc_v16f32 (v8f64 (X86SubVBroadcastld256 addr:$src))),
1681                        VR512:$src0),
1682          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1683def : Pat<(vselect_mask VK16WM:$mask,
1684                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1685                        (v16i32 immAllZerosV)),
1686          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1687def : Pat<(vselect_mask VK16WM:$mask,
1688                        (bc_v16i32 (v8i64 (X86SubVBroadcastld256 addr:$src))),
1689                        VR512:$src0),
1690          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1691
1692def : Pat<(vselect_mask VK8WM:$mask,
1693                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1694                        (v8f64 immAllZerosV)),
1695          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1696def : Pat<(vselect_mask VK8WM:$mask,
1697                        (bc_v8f64 (v16f32 (X86SubVBroadcastld128 addr:$src))),
1698                        VR512:$src0),
1699          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1700def : Pat<(vselect_mask VK8WM:$mask,
1701                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1702                        (v8i64 immAllZerosV)),
1703          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1704def : Pat<(vselect_mask VK8WM:$mask,
1705                        (bc_v8i64 (v16i32 (X86SubVBroadcastld128 addr:$src))),
1706                        VR512:$src0),
1707          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1708}
1709
1710multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1711                                        AVX512VLVectorVTInfo _Dst,
1712                                        AVX512VLVectorVTInfo _Src> {
1713  let Predicates = [HasDQI] in
1714    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1715                                          WriteShuffle256Ld, _Dst.info512,
1716                                          _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1717                                          EVEX_V512;
1718  let Predicates = [HasDQI, HasVLX] in
1719    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1720                                          WriteShuffle256Ld, _Dst.info256,
1721                                          _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1722                                          EVEX_V256;
1723}
1724
1725multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1726                                         AVX512VLVectorVTInfo _Dst,
1727                                         AVX512VLVectorVTInfo _Src> :
1728  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1729
1730  let Predicates = [HasDQI, HasVLX] in
1731    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1732                                          WriteShuffleXLd, _Dst.info128,
1733                                          _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1734                                          EVEX_V128;
1735}
1736
1737defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1738                                          avx512vl_i32_info, avx512vl_i64_info>;
1739defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1740                                          avx512vl_f32_info, avx512vl_f64_info>;
1741
1742//===----------------------------------------------------------------------===//
1743// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1744//---
1745multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1746                                  X86VectorVTInfo _, RegisterClass KRC> {
1747  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1748                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1749                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1750                  EVEX, Sched<[WriteShuffle]>;
1751}
1752
1753multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1754                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1755  let Predicates = [HasCDI] in
1756    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1757  let Predicates = [HasCDI, HasVLX] in {
1758    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1759    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1760  }
1761}
1762
1763defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1764                                               avx512vl_i32_info, VK16>;
1765defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1766                                               avx512vl_i64_info, VK8>, VEX_W;
1767
1768//===----------------------------------------------------------------------===//
1769// -- VPERMI2 - 3 source operands form --
1770multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1771                         X86FoldableSchedWrite sched,
1772                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1773let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1774    hasSideEffects = 0 in {
1775  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1776          (ins _.RC:$src2, _.RC:$src3),
1777          OpcodeStr, "$src3, $src2", "$src2, $src3",
1778          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1779          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1780
1781  let mayLoad = 1 in
1782  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1783            (ins _.RC:$src2, _.MemOp:$src3),
1784            OpcodeStr, "$src3, $src2", "$src2, $src3",
1785            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1786                   (_.VT (_.LdFrag addr:$src3)))), 1>,
1787            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1788  }
1789}
1790
1791multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1792                            X86FoldableSchedWrite sched,
1793                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1794  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1795      hasSideEffects = 0, mayLoad = 1 in
1796  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1797              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1798              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1799              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1800              (_.VT (X86VPermt2 _.RC:$src2,
1801               IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1802              AVX5128IBase, EVEX_4V, EVEX_B,
1803              Sched<[sched.Folded, sched.ReadAfterFold]>;
1804}
1805
1806multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1807                               X86FoldableSchedWrite sched,
1808                               AVX512VLVectorVTInfo VTInfo,
1809                               AVX512VLVectorVTInfo ShuffleMask> {
1810  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1811                           ShuffleMask.info512>,
1812            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1813                             ShuffleMask.info512>, EVEX_V512;
1814  let Predicates = [HasVLX] in {
1815  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1816                               ShuffleMask.info128>,
1817                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1818                                  ShuffleMask.info128>, EVEX_V128;
1819  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1820                               ShuffleMask.info256>,
1821                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1822                                  ShuffleMask.info256>, EVEX_V256;
1823  }
1824}
1825
1826multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1827                                  X86FoldableSchedWrite sched,
1828                                  AVX512VLVectorVTInfo VTInfo,
1829                                  AVX512VLVectorVTInfo Idx,
1830                                  Predicate Prd> {
1831  let Predicates = [Prd] in
1832  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1833                           Idx.info512>, EVEX_V512;
1834  let Predicates = [Prd, HasVLX] in {
1835  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1836                               Idx.info128>, EVEX_V128;
1837  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1838                               Idx.info256>,  EVEX_V256;
1839  }
1840}
1841
1842defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1843                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1844defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1845                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1846defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1847                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1848                  VEX_W, EVEX_CD8<16, CD8VF>;
1849defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1850                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1851                  EVEX_CD8<8, CD8VF>;
1852defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1853                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1854defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1855                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1856
1857// Extra patterns to deal with extra bitcasts due to passthru and index being
1858// different types on the fp versions.
1859multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1860                                  X86VectorVTInfo IdxVT,
1861                                  X86VectorVTInfo CastVT> {
1862  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1863                                (X86VPermt2 (_.VT _.RC:$src2),
1864                                            (IdxVT.VT (bitconvert
1865                                                       (CastVT.VT _.RC:$src1))),
1866                                            _.RC:$src3),
1867                                (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1868            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1869                                                _.RC:$src2, _.RC:$src3)>;
1870  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1871                                (X86VPermt2 _.RC:$src2,
1872                                            (IdxVT.VT (bitconvert
1873                                                       (CastVT.VT _.RC:$src1))),
1874                                            (_.LdFrag addr:$src3)),
1875                                (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1876            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1877                                                _.RC:$src2, addr:$src3)>;
1878  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1879                                 (X86VPermt2 _.RC:$src2,
1880                                             (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1881                                             (_.BroadcastLdFrag addr:$src3)),
1882                                 (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1883            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1884                                                 _.RC:$src2, addr:$src3)>;
1885}
1886
1887// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1888defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1889defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1890defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1891
1892// VPERMT2
1893multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1894                         X86FoldableSchedWrite sched,
1895                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1896let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1897  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1898          (ins IdxVT.RC:$src2, _.RC:$src3),
1899          OpcodeStr, "$src3, $src2", "$src2, $src3",
1900          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1901          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1902
1903  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1904            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1905            OpcodeStr, "$src3, $src2", "$src2, $src3",
1906            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1907                   (_.LdFrag addr:$src3))), 1>,
1908            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1909  }
1910}
1911multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1912                            X86FoldableSchedWrite sched,
1913                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1914  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1915  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1916              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1917              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1918              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1919              (_.VT (X86VPermt2 _.RC:$src1,
1920               IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1921              AVX5128IBase, EVEX_4V, EVEX_B,
1922              Sched<[sched.Folded, sched.ReadAfterFold]>;
1923}
1924
1925multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1926                               X86FoldableSchedWrite sched,
1927                               AVX512VLVectorVTInfo VTInfo,
1928                               AVX512VLVectorVTInfo ShuffleMask> {
1929  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1930                              ShuffleMask.info512>,
1931            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1932                              ShuffleMask.info512>, EVEX_V512;
1933  let Predicates = [HasVLX] in {
1934  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1935                              ShuffleMask.info128>,
1936                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1937                              ShuffleMask.info128>, EVEX_V128;
1938  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1939                              ShuffleMask.info256>,
1940                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1941                              ShuffleMask.info256>, EVEX_V256;
1942  }
1943}
1944
1945multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1946                                  X86FoldableSchedWrite sched,
1947                                  AVX512VLVectorVTInfo VTInfo,
1948                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1949  let Predicates = [Prd] in
1950  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1951                           Idx.info512>, EVEX_V512;
1952  let Predicates = [Prd, HasVLX] in {
1953  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1954                               Idx.info128>, EVEX_V128;
1955  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1956                               Idx.info256>, EVEX_V256;
1957  }
1958}
1959
1960defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1961                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1962defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1963                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1964defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1965                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1966                  VEX_W, EVEX_CD8<16, CD8VF>;
1967defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1968                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1969                  EVEX_CD8<8, CD8VF>;
1970defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1971                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1972defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1973                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1974
1975//===----------------------------------------------------------------------===//
1976// AVX-512 - BLEND using mask
1977//
1978
1979multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1980                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1981  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1982  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1983             (ins _.RC:$src1, _.RC:$src2),
1984             !strconcat(OpcodeStr,
1985             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1986             EVEX_4V, Sched<[sched]>;
1987  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1988             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1989             !strconcat(OpcodeStr,
1990             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1991             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1992  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1993             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1994             !strconcat(OpcodeStr,
1995             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1996             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1997  let mayLoad = 1 in {
1998  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1999             (ins _.RC:$src1, _.MemOp:$src2),
2000             !strconcat(OpcodeStr,
2001             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
2002             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
2003             Sched<[sched.Folded, sched.ReadAfterFold]>;
2004  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2006             !strconcat(OpcodeStr,
2007             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
2008             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
2009             Sched<[sched.Folded, sched.ReadAfterFold]>;
2010  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2011             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2012             !strconcat(OpcodeStr,
2013             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
2014             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
2015             Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2016  }
2017  }
2018}
2019multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
2020                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2021  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
2022  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2023      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2024       !strconcat(OpcodeStr,
2025            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2026            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2027      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2028      Sched<[sched.Folded, sched.ReadAfterFold]>;
2029
2030  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2031      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2032       !strconcat(OpcodeStr,
2033            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2034            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2035      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2036      Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2037
2038  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2039      (ins _.RC:$src1, _.ScalarMemOp:$src2),
2040       !strconcat(OpcodeStr,
2041            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2042            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2043      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2044      Sched<[sched.Folded, sched.ReadAfterFold]>;
2045  }
2046}
2047
2048multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2049                        AVX512VLVectorVTInfo VTInfo> {
2050  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2051           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2052                                 EVEX_V512;
2053
2054  let Predicates = [HasVLX] in {
2055    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2056                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2057                                      EVEX_V256;
2058    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2059                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2060                                      EVEX_V128;
2061  }
2062}
2063
2064multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2065                        AVX512VLVectorVTInfo VTInfo> {
2066  let Predicates = [HasBWI] in
2067    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2068                               EVEX_V512;
2069
2070  let Predicates = [HasBWI, HasVLX] in {
2071    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2072                                  EVEX_V256;
2073    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2074                                  EVEX_V128;
2075  }
2076}
2077
2078defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2079                              avx512vl_f32_info>;
2080defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2081                              avx512vl_f64_info>, VEX_W;
2082defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2083                              avx512vl_i32_info>;
2084defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2085                              avx512vl_i64_info>, VEX_W;
2086defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2087                              avx512vl_i8_info>;
2088defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2089                              avx512vl_i16_info>, VEX_W;
2090
2091//===----------------------------------------------------------------------===//
2092// Compare Instructions
2093//===----------------------------------------------------------------------===//
2094
2095// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2096
2097multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2098                             PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2099                             X86FoldableSchedWrite sched> {
2100  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2101                      (outs _.KRC:$dst),
2102                      (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2103                      "vcmp"#_.Suffix,
2104                      "$cc, $src2, $src1", "$src1, $src2, $cc",
2105                      (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2106                      (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2107                                 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2108  let mayLoad = 1 in
2109  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2110                    (outs _.KRC:$dst),
2111                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2112                    "vcmp"#_.Suffix,
2113                    "$cc, $src2, $src1", "$src1, $src2, $cc",
2114                    (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2115                        timm:$cc),
2116                    (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2117                        timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2118                    Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2119
2120  let Uses = [MXCSR] in
2121  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2122                     (outs _.KRC:$dst),
2123                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2124                     "vcmp"#_.Suffix,
2125                     "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2126                     (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2127                                timm:$cc),
2128                     (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2129                                   timm:$cc)>,
2130                     EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2131
2132  let isCodeGenOnly = 1 in {
2133    let isCommutable = 1 in
2134    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2135                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2136                !strconcat("vcmp", _.Suffix,
2137                           "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2138                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2139                                          _.FRC:$src2,
2140                                          timm:$cc))]>,
2141                EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2142    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2143              (outs _.KRC:$dst),
2144              (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2145              !strconcat("vcmp", _.Suffix,
2146                         "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2147              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2148                                        (_.ScalarLdFrag addr:$src2),
2149                                        timm:$cc))]>,
2150              EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2151              Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2152  }
2153}
2154
2155def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2156                          (X86cmpms node:$src1, node:$src2, node:$cc), [{
2157  return N->hasOneUse();
2158}]>;
2159def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2160                          (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2161  return N->hasOneUse();
2162}]>;
2163
2164let Predicates = [HasAVX512] in {
2165  let ExeDomain = SSEPackedSingle in
2166  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2167                                   X86cmpms_su, X86cmpmsSAE_su,
2168                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2169  let ExeDomain = SSEPackedDouble in
2170  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2171                                   X86cmpms_su, X86cmpmsSAE_su,
2172                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2173}
2174let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
2175  defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
2176                                   X86cmpms_su, X86cmpmsSAE_su,
2177                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
2178
2179multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2180                              X86FoldableSchedWrite sched,
2181                              X86VectorVTInfo _, bit IsCommutable> {
2182  let isCommutable = IsCommutable, hasSideEffects = 0 in
2183  def rr : AVX512BI<opc, MRMSrcReg,
2184             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2185             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2186             []>, EVEX_4V, Sched<[sched]>;
2187  let mayLoad = 1, hasSideEffects = 0 in
2188  def rm : AVX512BI<opc, MRMSrcMem,
2189             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2190             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2191             []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2192  let isCommutable = IsCommutable, hasSideEffects = 0 in
2193  def rrk : AVX512BI<opc, MRMSrcReg,
2194              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2195              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2196                          "$dst {${mask}}, $src1, $src2}"),
2197              []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2198  let mayLoad = 1, hasSideEffects = 0 in
2199  def rmk : AVX512BI<opc, MRMSrcMem,
2200              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2201              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2202                          "$dst {${mask}}, $src1, $src2}"),
2203              []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2204}
2205
2206multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2207                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2208                                  bit IsCommutable> :
2209           avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2210  let mayLoad = 1, hasSideEffects = 0 in {
2211  def rmb : AVX512BI<opc, MRMSrcMem,
2212              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2213              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2214                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2215              []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2216  def rmbk : AVX512BI<opc, MRMSrcMem,
2217               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2218                                       _.ScalarMemOp:$src2),
2219               !strconcat(OpcodeStr,
2220                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2221                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2222               []>, EVEX_4V, EVEX_K, EVEX_B,
2223               Sched<[sched.Folded, sched.ReadAfterFold]>;
2224  }
2225}
2226
2227multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2228                                 X86SchedWriteWidths sched,
2229                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2230                                 bit IsCommutable = 0> {
2231  let Predicates = [prd] in
2232  defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2233                              VTInfo.info512, IsCommutable>, EVEX_V512;
2234
2235  let Predicates = [prd, HasVLX] in {
2236    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2237                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2238    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2239                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2240  }
2241}
2242
2243multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2244                                     X86SchedWriteWidths sched,
2245                                     AVX512VLVectorVTInfo VTInfo,
2246                                     Predicate prd, bit IsCommutable = 0> {
2247  let Predicates = [prd] in
2248  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2249                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2250
2251  let Predicates = [prd, HasVLX] in {
2252    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2253                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2254    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2255                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2256  }
2257}
2258
2259// This fragment treats X86cmpm as commutable to help match loads in both
2260// operands for PCMPEQ.
2261def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2262def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2263                         (setcc node:$src1, node:$src2, SETGT)>;
2264
2265// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2266// increase the pattern complexity the way an immediate would.
2267let AddedComplexity = 2 in {
2268// FIXME: Is there a better scheduler class for VPCMP?
2269defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2270                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2271                EVEX_CD8<8, CD8VF>, VEX_WIG;
2272
2273defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2274                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2275                EVEX_CD8<16, CD8VF>, VEX_WIG;
2276
2277defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2278                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2279                EVEX_CD8<32, CD8VF>;
2280
2281defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2282                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2283                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2284
2285defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2286                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2287                EVEX_CD8<8, CD8VF>, VEX_WIG;
2288
2289defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2290                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2291                EVEX_CD8<16, CD8VF>, VEX_WIG;
2292
2293defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2294                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2295                EVEX_CD8<32, CD8VF>;
2296
2297defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2298                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2299                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2300}
2301
2302def X86pcmpm_imm : SDNodeXForm<setcc, [{
2303  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2304  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2305  return getI8Imm(SSECC, SDLoc(N));
2306}]>;
2307
2308// Swapped operand version of the above.
2309def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2310  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2311  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2312  SSECC = X86::getSwappedVPCMPImm(SSECC);
2313  return getI8Imm(SSECC, SDLoc(N));
2314}]>;
2315
2316multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2317                          PatFrag Frag_su,
2318                          X86FoldableSchedWrite sched,
2319                          X86VectorVTInfo _, string Name> {
2320  let isCommutable = 1 in
2321  def rri : AVX512AIi8<opc, MRMSrcReg,
2322             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2323             !strconcat("vpcmp", Suffix,
2324                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2325             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2326                                                (_.VT _.RC:$src2),
2327                                                cond)))]>,
2328             EVEX_4V, Sched<[sched]>;
2329  def rmi : AVX512AIi8<opc, MRMSrcMem,
2330             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2331             !strconcat("vpcmp", Suffix,
2332                        "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2333             [(set _.KRC:$dst, (_.KVT
2334                                (Frag:$cc
2335                                 (_.VT _.RC:$src1),
2336                                 (_.VT (_.LdFrag addr:$src2)),
2337                                 cond)))]>,
2338             EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2339  let isCommutable = 1 in
2340  def rrik : AVX512AIi8<opc, MRMSrcReg,
2341              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2342                                      u8imm:$cc),
2343              !strconcat("vpcmp", Suffix,
2344                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2345                         "$dst {${mask}}, $src1, $src2, $cc}"),
2346              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2347                                     (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2348                                                         (_.VT _.RC:$src2),
2349                                                         cond))))]>,
2350              EVEX_4V, EVEX_K, Sched<[sched]>;
2351  def rmik : AVX512AIi8<opc, MRMSrcMem,
2352              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2353                                    u8imm:$cc),
2354              !strconcat("vpcmp", Suffix,
2355                         "\t{$cc, $src2, $src1, $dst {${mask}}|",
2356                         "$dst {${mask}}, $src1, $src2, $cc}"),
2357              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2358                                     (_.KVT
2359                                      (Frag_su:$cc
2360                                       (_.VT _.RC:$src1),
2361                                       (_.VT (_.LdFrag addr:$src2)),
2362                                       cond))))]>,
2363              EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2364
2365  def : Pat<(_.KVT (Frag:$cc (_.LdFrag addr:$src2),
2366                             (_.VT _.RC:$src1), cond)),
2367            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2368             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2369
2370  def : Pat<(and _.KRCWM:$mask,
2371                 (_.KVT (Frag_su:$cc (_.LdFrag addr:$src2),
2372                                     (_.VT _.RC:$src1), cond))),
2373            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2374             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2375             (X86pcmpm_imm_commute $cc))>;
2376}
2377
2378multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2379                              PatFrag Frag_su, X86FoldableSchedWrite sched,
2380                              X86VectorVTInfo _, string Name> :
2381           avx512_icmp_cc<opc, Suffix, Frag, Frag_su, sched, _, Name> {
2382  def rmib : AVX512AIi8<opc, MRMSrcMem,
2383             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2384                                     u8imm:$cc),
2385             !strconcat("vpcmp", Suffix,
2386                        "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2387                        "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2388             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2389                                       (_.VT _.RC:$src1),
2390                                       (_.BroadcastLdFrag addr:$src2),
2391                                       cond)))]>,
2392             EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2393  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2394              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2395                                       _.ScalarMemOp:$src2, u8imm:$cc),
2396              !strconcat("vpcmp", Suffix,
2397                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2398                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2399              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2400                                     (_.KVT (Frag_su:$cc
2401                                             (_.VT _.RC:$src1),
2402                                             (_.BroadcastLdFrag addr:$src2),
2403                                             cond))))]>,
2404              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2405
2406  def : Pat<(_.KVT (Frag:$cc (_.BroadcastLdFrag addr:$src2),
2407                    (_.VT _.RC:$src1), cond)),
2408            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2409             _.RC:$src1, addr:$src2, (X86pcmpm_imm_commute $cc))>;
2410
2411  def : Pat<(and _.KRCWM:$mask,
2412                 (_.KVT (Frag_su:$cc (_.BroadcastLdFrag addr:$src2),
2413                                     (_.VT _.RC:$src1), cond))),
2414            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2415             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2416             (X86pcmpm_imm_commute $cc))>;
2417}
2418
2419multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2420                             PatFrag Frag_su, X86SchedWriteWidths sched,
2421                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2422  let Predicates = [prd] in
2423  defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2424                          sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2425
2426  let Predicates = [prd, HasVLX] in {
2427    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2428                               sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2429    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su,
2430                               sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2431  }
2432}
2433
2434multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2435                                 PatFrag Frag_su, X86SchedWriteWidths sched,
2436                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2437  let Predicates = [prd] in
2438  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2439                              sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2440
2441  let Predicates = [prd, HasVLX] in {
2442    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2443                                   sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2444    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su,
2445                                   sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2446  }
2447}
2448
2449def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2450                       (setcc node:$src1, node:$src2, node:$cc), [{
2451  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2452  return !ISD::isUnsignedIntSetCC(CC);
2453}], X86pcmpm_imm>;
2454
2455def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2456                          (setcc node:$src1, node:$src2, node:$cc), [{
2457  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2458  return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2459}], X86pcmpm_imm>;
2460
2461def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2462                        (setcc node:$src1, node:$src2, node:$cc), [{
2463  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464  return ISD::isUnsignedIntSetCC(CC);
2465}], X86pcmpm_imm>;
2466
2467def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2468                           (setcc node:$src1, node:$src2, node:$cc), [{
2469  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2470  return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471}], X86pcmpm_imm>;
2472
2473// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2474defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2475                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2476                                EVEX_CD8<8, CD8VF>;
2477defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2478                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2479                                 EVEX_CD8<8, CD8VF>;
2480
2481defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2482                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2483                                VEX_W, EVEX_CD8<16, CD8VF>;
2484defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2485                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2486                                 VEX_W, EVEX_CD8<16, CD8VF>;
2487
2488defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2489                                    SchedWriteVecALU, avx512vl_i32_info,
2490                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2491defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2492                                     SchedWriteVecALU, avx512vl_i32_info,
2493                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2494
2495defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2496                                    SchedWriteVecALU, avx512vl_i64_info,
2497                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2498defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2499                                     SchedWriteVecALU, avx512vl_i64_info,
2500                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2501
2502def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2503                         (X86cmpm node:$src1, node:$src2, node:$cc), [{
2504  return N->hasOneUse();
2505}]>;
2506
2507def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2508  uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2509  return getI8Imm(Imm, SDLoc(N));
2510}]>;
2511
2512multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2513                              string Name> {
2514let Uses = [MXCSR], mayRaiseFPException = 1 in {
2515  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2516                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2517                   "vcmp"#_.Suffix,
2518                   "$cc, $src2, $src1", "$src1, $src2, $cc",
2519                   (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2520                   (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2521                   1>, Sched<[sched]>;
2522
2523  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2524                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2525                "vcmp"#_.Suffix,
2526                "$cc, $src2, $src1", "$src1, $src2, $cc",
2527                (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2528                             timm:$cc),
2529                (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2530                            timm:$cc)>,
2531                Sched<[sched.Folded, sched.ReadAfterFold]>;
2532
2533  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2534                (outs _.KRC:$dst),
2535                (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2536                "vcmp"#_.Suffix,
2537                "$cc, ${src2}"#_.BroadcastStr#", $src1",
2538                "$src1, ${src2}"#_.BroadcastStr#", $cc",
2539                (X86any_cmpm (_.VT _.RC:$src1),
2540                             (_.VT (_.BroadcastLdFrag addr:$src2)),
2541                             timm:$cc),
2542                (X86cmpm_su (_.VT _.RC:$src1),
2543                            (_.VT (_.BroadcastLdFrag addr:$src2)),
2544                            timm:$cc)>,
2545                EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2546  }
2547
2548  // Patterns for selecting with loads in other operand.
2549  def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2550                         timm:$cc),
2551            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2552                                                      (X86cmpm_imm_commute timm:$cc))>;
2553
2554  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2555                                            (_.VT _.RC:$src1),
2556                                            timm:$cc)),
2557            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2558                                                       _.RC:$src1, addr:$src2,
2559                                                       (X86cmpm_imm_commute timm:$cc))>;
2560
2561  def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2562                         (_.VT _.RC:$src1), timm:$cc),
2563            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2564                                                       (X86cmpm_imm_commute timm:$cc))>;
2565
2566  def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2567                                            (_.VT _.RC:$src1),
2568                                            timm:$cc)),
2569            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2570                                                        _.RC:$src1, addr:$src2,
2571                                                        (X86cmpm_imm_commute timm:$cc))>;
2572
2573  // Patterns for mask intrinsics.
2574  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc,
2575                      (_.KVT immAllOnesV)),
2576            (!cast<Instruction>(Name#_.ZSuffix#"rri") _.RC:$src1, _.RC:$src2, timm:$cc)>;
2577
2578  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask),
2579            (!cast<Instruction>(Name#_.ZSuffix#"rrik") _.KRCWM:$mask, _.RC:$src1,
2580                                                       _.RC:$src2, timm:$cc)>;
2581
2582  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2583                      (_.KVT immAllOnesV)),
2584            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, timm:$cc)>;
2585
2586  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)), timm:$cc,
2587                      _.KRCWM:$mask),
2588            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask, _.RC:$src1,
2589                                                       addr:$src2, timm:$cc)>;
2590
2591  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2592                      (_.KVT immAllOnesV)),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, timm:$cc)>;
2594
2595  def : Pat<(X86cmpmm (_.VT _.RC:$src1), (_.VT (_.BroadcastLdFrag addr:$src2)), timm:$cc,
2596                      _.KRCWM:$mask),
2597            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask, _.RC:$src1,
2598                                                        addr:$src2, timm:$cc)>;
2599
2600  // Patterns for mask intrinsics with loads in other operand.
2601  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2602                      (_.KVT immAllOnesV)),
2603            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2604                                                      (X86cmpm_imm_commute timm:$cc))>;
2605
2606  def : Pat<(X86cmpmm (_.VT (_.LdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2607                      _.KRCWM:$mask),
2608            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2609                                                       _.RC:$src1, addr:$src2,
2610                                                       (X86cmpm_imm_commute timm:$cc))>;
2611
2612  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2613                      (_.KVT immAllOnesV)),
2614            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2615                                                       (X86cmpm_imm_commute timm:$cc))>;
2616
2617  def : Pat<(X86cmpmm (_.VT (_.BroadcastLdFrag addr:$src2)), (_.VT _.RC:$src1), timm:$cc,
2618                      _.KRCWM:$mask),
2619            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2620                                                        _.RC:$src1, addr:$src2,
2621                                                        (X86cmpm_imm_commute  timm:$cc))>;
2622}
2623
2624multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2625  // comparison code form (VCMP[EQ/LT/LE/...]
2626  let Uses = [MXCSR] in
2627  defm  rrib  : AVX512_maskable_custom_cmp<0xC2, MRMSrcReg, (outs _.KRC:$dst),
2628                     (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2629                     (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc),
2630                     "vcmp"#_.Suffix,
2631                     "$cc, {sae}, $src2, $src1",
2632                     "$src1, $src2, {sae}, $cc",
2633                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2634                                        (_.VT _.RC:$src2), timm:$cc, (_.KVT immAllOnesV)))],
2635                     [(set _.KRC:$dst, (X86cmpmmSAE (_.VT _.RC:$src1),
2636                                        (_.VT _.RC:$src2), timm:$cc, _.KRCWM:$mask))]>,
2637                     EVEX_B, Sched<[sched]>;
2638}
2639
2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
2641                       Predicate Pred = HasAVX512> {
2642  let Predicates = [Pred] in {
2643    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2644                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2645
2646  }
2647  let Predicates = [Pred,HasVLX] in {
2648   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2649   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2650  }
2651}
2652
2653defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2654                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2655defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2656                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2657defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
2658                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
2659
2660// Patterns to select fp compares with load as first operand.
2661let Predicates = [HasAVX512] in {
2662  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
2663            (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2664
2665  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
2666            (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2667}
2668
2669let Predicates = [HasFP16] in {
2670  def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
2671            (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2672}
2673
2674// ----------------------------------------------------------------
2675// FPClass
2676
2677def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2678                              (X86Vfpclasss node:$src1, node:$src2), [{
2679  return N->hasOneUse();
2680}]>;
2681
2682def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2683                             (X86Vfpclass node:$src1, node:$src2), [{
2684  return N->hasOneUse();
2685}]>;
2686
2687//handle fpclass instruction  mask =  op(reg_scalar,imm)
2688//                                    op(mem_scalar,imm)
2689multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2690                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2691                                 Predicate prd> {
2692  let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2693      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694                      (ins _.RC:$src1, i32u8imm:$src2),
2695                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2696                      [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2697                              (i32 timm:$src2)))]>,
2698                      Sched<[sched]>;
2699      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2700                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2701                      OpcodeStr#_.Suffix#
2702                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2703                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2704                                      (X86Vfpclasss_su (_.VT _.RC:$src1),
2705                                      (i32 timm:$src2))))]>,
2706                      EVEX_K, Sched<[sched]>;
2707    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2708                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2709                    OpcodeStr#_.Suffix#
2710                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2711                    [(set _.KRC:$dst,
2712                          (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2713                                        (i32 timm:$src2)))]>,
2714                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2715    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2716                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2717                    OpcodeStr#_.Suffix#
2718                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2719                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2720                        (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2721                            (i32 timm:$src2))))]>,
2722                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2723  }
2724}
2725
2726//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2727//                                  fpclass(reg_vec, mem_vec, imm)
2728//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2729multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2730                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2731                                 string mem>{
2732  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2733  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2734                      (ins _.RC:$src1, i32u8imm:$src2),
2735                      OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2736                      [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2737                                       (i32 timm:$src2)))]>,
2738                      Sched<[sched]>;
2739  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2740                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2741                      OpcodeStr#_.Suffix#
2742                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2743                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2744                                       (X86Vfpclass_su (_.VT _.RC:$src1),
2745                                       (i32 timm:$src2))))]>,
2746                      EVEX_K, Sched<[sched]>;
2747  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2748                    (ins _.MemOp:$src1, i32u8imm:$src2),
2749                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2750                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2751                    [(set _.KRC:$dst,(X86Vfpclass
2752                                     (_.VT (_.LdFrag addr:$src1)),
2753                                     (i32 timm:$src2)))]>,
2754                    Sched<[sched.Folded, sched.ReadAfterFold]>;
2755  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2756                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2757                    OpcodeStr#_.Suffix#"{"#mem#"}"#
2758                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2759                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2760                                  (_.VT (_.LdFrag addr:$src1)),
2761                                  (i32 timm:$src2))))]>,
2762                    EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2763  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2764                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2765                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2766                                      _.BroadcastStr#", $dst|$dst, ${src1}"
2767                                                  #_.BroadcastStr#", $src2}",
2768                    [(set _.KRC:$dst,(X86Vfpclass
2769                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2770                                     (i32 timm:$src2)))]>,
2771                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2772  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2773                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2774                    OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2775                          _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2776                                                   _.BroadcastStr#", $src2}",
2777                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2778                                     (_.VT (_.BroadcastLdFrag addr:$src1)),
2779                                     (i32 timm:$src2))))]>,
2780                    EVEX_B, EVEX_K,  Sched<[sched.Folded, sched.ReadAfterFold]>;
2781  }
2782
2783  // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2784  // the memory form.
2785  def : InstAlias<OpcodeStr#_.Suffix#mem#
2786                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2787                  (!cast<Instruction>(NAME#"rr")
2788                   _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2789  def : InstAlias<OpcodeStr#_.Suffix#mem#
2790                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2791                  (!cast<Instruction>(NAME#"rrk")
2792                   _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2793  def : InstAlias<OpcodeStr#_.Suffix#mem#
2794                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2795                  _.BroadcastStr#", $src2}",
2796                  (!cast<Instruction>(NAME#"rmb")
2797                   _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2798  def : InstAlias<OpcodeStr#_.Suffix#mem#
2799                  "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2800                  "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2801                  (!cast<Instruction>(NAME#"rmbk")
2802                   _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2803}
2804
2805multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2806                                     bits<8> opc, X86SchedWriteWidths sched,
2807                                     Predicate prd>{
2808  let Predicates = [prd] in {
2809    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2810                                      _.info512, "z">, EVEX_V512;
2811  }
2812  let Predicates = [prd, HasVLX] in {
2813    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2814                                      _.info128, "x">, EVEX_V128;
2815    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2816                                      _.info256, "y">, EVEX_V256;
2817  }
2818}
2819
2820multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2821                                 bits<8> opcScalar, X86SchedWriteWidths sched> {
2822  defm PH : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f16_info, opcVec,
2823                                      sched, HasFP16>,
2824                                      EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
2825  defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2826                                   sched.Scl, f16x_info, HasFP16>,
2827                                   EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
2828  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2829                                      sched, HasDQI>,
2830                                      EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
2831  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2832                                      sched, HasDQI>,
2833                                      EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
2834  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2835                                   sched.Scl, f32x_info, HasDQI>, VEX_LIG,
2836                                   EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
2837  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2838                                   sched.Scl, f64x_info, HasDQI>, VEX_LIG,
2839                                   EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
2840}
2841
2842defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
2843
2844//-----------------------------------------------------------------
2845// Mask register copy, including
2846// - copy between mask registers
2847// - load/store mask registers
2848// - copy from GPR to mask register and vice versa
2849//
2850multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2851                         string OpcodeStr, RegisterClass KRC,
2852                         ValueType vvt, X86MemOperand x86memop> {
2853  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2854  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2855             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2856             Sched<[WriteMove]>;
2857  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2858             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2859             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2860             Sched<[WriteLoad]>;
2861  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2862             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2863             [(store KRC:$src, addr:$dst)]>,
2864             Sched<[WriteStore]>;
2865}
2866
2867multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2868                             string OpcodeStr,
2869                             RegisterClass KRC, RegisterClass GRC> {
2870  let hasSideEffects = 0 in {
2871    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2872               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2873               Sched<[WriteMove]>;
2874    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2875               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2876               Sched<[WriteMove]>;
2877  }
2878}
2879
2880let Predicates = [HasDQI] in
2881  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2882               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2883               VEX, PD;
2884
2885let Predicates = [HasAVX512] in
2886  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2887               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2888               VEX, PS;
2889
2890let Predicates = [HasBWI] in {
2891  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2892               VEX, PD, VEX_W;
2893  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2894               VEX, XD;
2895  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2896               VEX, PS, VEX_W;
2897  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2898               VEX, XD, VEX_W;
2899}
2900
2901// GR from/to mask register
2902def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2903          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2904def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2905          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2906def : Pat<(i8 (trunc (i16 (bitconvert (v16i1 VK16:$src))))),
2907          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_8bit)>;
2908
2909def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2910          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2911def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2912          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2913
2914def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2915          (KMOVWrk VK16:$src)>;
2916def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2917          (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2918def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2919          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2920def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2921          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2922
2923def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2924          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2925def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2926          (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2927def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2928          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2929def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2930          (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2931
2932def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2933          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2934def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2935          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2936def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2937          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2938def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2939          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2940
2941// Load/store kreg
2942let Predicates = [HasDQI] in {
2943  def : Pat<(v1i1 (load addr:$src)),
2944            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2945  def : Pat<(v2i1 (load addr:$src)),
2946            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2947  def : Pat<(v4i1 (load addr:$src)),
2948            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2949}
2950
2951let Predicates = [HasAVX512] in {
2952  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2953            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2954  def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2955            (KMOVWkm addr:$src)>;
2956}
2957
2958def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2959                         SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2960                                              SDTCVecEltisVT<1, i1>,
2961                                              SDTCisPtrTy<2>]>>;
2962
2963let Predicates = [HasAVX512] in {
2964  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2965    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2966              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2967
2968    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2969              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2970
2971    def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2972              (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2973
2974    def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2975              (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2976  }
2977
2978  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2979  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2980  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2981  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2982  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2983  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2984  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2985
2986  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2987                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2988            (KMOVWkr (AND32ri8
2989                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2990                      (i32 1)))>;
2991}
2992
2993// Mask unary operation
2994// - KNOT
2995multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2996                            RegisterClass KRC, SDPatternOperator OpNode,
2997                            X86FoldableSchedWrite sched, Predicate prd> {
2998  let Predicates = [prd] in
2999    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
3000               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3001               [(set KRC:$dst, (OpNode KRC:$src))]>,
3002               Sched<[sched]>;
3003}
3004
3005multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
3006                                SDPatternOperator OpNode,
3007                                X86FoldableSchedWrite sched> {
3008  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3009                            sched, HasDQI>, VEX, PD;
3010  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3011                            sched, HasAVX512>, VEX, PS;
3012  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3013                            sched, HasBWI>, VEX, PD, VEX_W;
3014  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3015                            sched, HasBWI>, VEX, PS, VEX_W;
3016}
3017
3018// TODO - do we need a X86SchedWriteWidths::KMASK type?
3019defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
3020
3021// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
3022let Predicates = [HasAVX512, NoDQI] in
3023def : Pat<(vnot VK8:$src),
3024          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
3025
3026def : Pat<(vnot VK4:$src),
3027          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
3028def : Pat<(vnot VK2:$src),
3029          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
3030def : Pat<(vnot VK1:$src),
3031          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
3032
3033// Mask binary operation
3034// - KAND, KANDN, KOR, KXNOR, KXOR
3035multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
3036                           RegisterClass KRC, SDPatternOperator OpNode,
3037                           X86FoldableSchedWrite sched, Predicate prd,
3038                           bit IsCommutable> {
3039  let Predicates = [prd], isCommutable = IsCommutable in
3040    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
3041               !strconcat(OpcodeStr,
3042                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3043               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
3044               Sched<[sched]>;
3045}
3046
3047multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
3048                                 SDPatternOperator OpNode,
3049                                 X86FoldableSchedWrite sched, bit IsCommutable,
3050                                 Predicate prdW = HasAVX512> {
3051  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3052                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3053  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3054                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3055  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3056                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3057  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3058                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3059}
3060
3061// These nodes use 'vnot' instead of 'not' to support vectors.
3062def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3063def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3064
3065// TODO - do we need a X86SchedWriteWidths::KMASK type?
3066defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3067defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3068defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3069defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3070defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3071defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3072
3073multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3074                            Instruction Inst> {
3075  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3076  // for the DQI set, this type is legal and KxxxB instruction is used
3077  let Predicates = [NoDQI] in
3078  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3079            (COPY_TO_REGCLASS
3080              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3081                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3082
3083  // All types smaller than 8 bits require conversion anyway
3084  def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3085        (COPY_TO_REGCLASS (Inst
3086                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3087                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3088  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3089        (COPY_TO_REGCLASS (Inst
3090                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3091                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3092  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3093        (COPY_TO_REGCLASS (Inst
3094                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3095                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3096}
3097
3098defm : avx512_binop_pat<and,   KANDWrr>;
3099defm : avx512_binop_pat<vandn, KANDNWrr>;
3100defm : avx512_binop_pat<or,    KORWrr>;
3101defm : avx512_binop_pat<vxnor, KXNORWrr>;
3102defm : avx512_binop_pat<xor,   KXORWrr>;
3103
3104// Mask unpacking
3105multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3106                             X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3107                             Predicate prd> {
3108  let Predicates = [prd] in {
3109    let hasSideEffects = 0 in
3110    def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3111               (ins Src.KRC:$src1, Src.KRC:$src2),
3112               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3113               VEX_4V, VEX_L, Sched<[sched]>;
3114
3115    def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3116              (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3117  }
3118}
3119
3120defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info,  WriteShuffle, HasAVX512>, PD;
3121defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3122defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3123
3124// Mask bit testing
3125multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3126                              SDNode OpNode, X86FoldableSchedWrite sched,
3127                              Predicate prd> {
3128  let Predicates = [prd], Defs = [EFLAGS] in
3129    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3130               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3131               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3132               Sched<[sched]>;
3133}
3134
3135multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3136                                X86FoldableSchedWrite sched,
3137                                Predicate prdW = HasAVX512> {
3138  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3139                                                                VEX, PD;
3140  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3141                                                                VEX, PS;
3142  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3143                                                                VEX, PS, VEX_W;
3144  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3145                                                                VEX, PD, VEX_W;
3146}
3147
3148// TODO - do we need a X86SchedWriteWidths::KMASK type?
3149defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3150defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3151
3152// Mask shift
3153multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3154                               SDNode OpNode, X86FoldableSchedWrite sched> {
3155  let Predicates = [HasAVX512] in
3156    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3157                 !strconcat(OpcodeStr,
3158                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3159                            [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3160                 Sched<[sched]>;
3161}
3162
3163multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3164                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3165  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3166                               sched>, VEX, TAPD, VEX_W;
3167  let Predicates = [HasDQI] in
3168  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3169                               sched>, VEX, TAPD;
3170  let Predicates = [HasBWI] in {
3171  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3172                               sched>, VEX, TAPD, VEX_W;
3173  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3174                               sched>, VEX, TAPD;
3175  }
3176}
3177
3178defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3179defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3180
3181// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3182multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3183                                                 string InstStr,
3184                                                 X86VectorVTInfo Narrow,
3185                                                 X86VectorVTInfo Wide> {
3186def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3187                                (Narrow.VT Narrow.RC:$src2), cond)),
3188          (COPY_TO_REGCLASS
3189           (!cast<Instruction>(InstStr#"Zrri")
3190            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3191            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3192            (X86pcmpm_imm $cc)), Narrow.KRC)>;
3193
3194def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3195                           (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3196                                                    (Narrow.VT Narrow.RC:$src2),
3197                                                    cond)))),
3198          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3199           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3200           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3201           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3202           (X86pcmpm_imm $cc)), Narrow.KRC)>;
3203}
3204
3205multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3206                                                     string InstStr,
3207                                                     X86VectorVTInfo Narrow,
3208                                                     X86VectorVTInfo Wide> {
3209// Broadcast load.
3210def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3211                                (Narrow.BroadcastLdFrag addr:$src2), cond)),
3212          (COPY_TO_REGCLASS
3213           (!cast<Instruction>(InstStr#"Zrmib")
3214            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3215            addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3216
3217def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3218                           (Narrow.KVT
3219                            (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3220                                         (Narrow.BroadcastLdFrag addr:$src2),
3221                                         cond)))),
3222          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3223           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3224           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3225           addr:$src2, (X86pcmpm_imm $cc)), Narrow.KRC)>;
3226
3227// Commuted with broadcast load.
3228def : Pat<(Narrow.KVT (Frag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3229                                (Narrow.VT Narrow.RC:$src1),
3230                                cond)),
3231          (COPY_TO_REGCLASS
3232           (!cast<Instruction>(InstStr#"Zrmib")
3233            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3234            addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3235
3236def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3237                           (Narrow.KVT
3238                            (Frag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3239                                         (Narrow.VT Narrow.RC:$src1),
3240                                         cond)))),
3241          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3242           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3243           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3244           addr:$src2, (X86pcmpm_imm_commute $cc)), Narrow.KRC)>;
3245}
3246
3247// Same as above, but for fp types which don't use PatFrags.
3248multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3249                                                X86VectorVTInfo Narrow,
3250                                                X86VectorVTInfo Wide> {
3251def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3252                               (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3253          (COPY_TO_REGCLASS
3254           (!cast<Instruction>(InstStr#"Zrri")
3255            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3256            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3257            timm:$cc), Narrow.KRC)>;
3258
3259def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3260                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3261                                       (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3262          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3263           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3264           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3265           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3266           timm:$cc), Narrow.KRC)>;
3267
3268// Broadcast load.
3269def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3270                               (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3271          (COPY_TO_REGCLASS
3272           (!cast<Instruction>(InstStr#"Zrmbi")
3273            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3274            addr:$src2, timm:$cc), Narrow.KRC)>;
3275
3276def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3277                           (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3278                                       (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3279          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3280           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3281           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3282           addr:$src2, timm:$cc), Narrow.KRC)>;
3283
3284// Commuted with broadcast load.
3285def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3286                               (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3287          (COPY_TO_REGCLASS
3288           (!cast<Instruction>(InstStr#"Zrmbi")
3289            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3290            addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3291
3292def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3293                           (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3294                                       (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3295          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3296           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3297           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3298           addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3299}
3300
3301let Predicates = [HasAVX512, NoVLX] in {
3302  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3303  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3304
3305  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3306  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3307
3308  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3309  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3310
3311  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3312  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3313
3314  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3315  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3316
3317  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3318  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3319
3320  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3321  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3322
3323  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3324  defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3325
3326  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3327  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3328  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3329  defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3330}
3331
3332let Predicates = [HasBWI, NoVLX] in {
3333  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3334  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3335
3336  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3337  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3338
3339  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3340  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3341
3342  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3343  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3344}
3345
3346// Mask setting all 0s or 1s
3347multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, SDPatternOperator Val> {
3348  let Predicates = [HasAVX512] in
3349    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3350        SchedRW = [WriteZero] in
3351      def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3352                     [(set KRC:$dst, (VT Val))]>;
3353}
3354
3355multiclass avx512_mask_setop_w<SDPatternOperator Val> {
3356  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3357  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3358  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3359}
3360
3361defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3362defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3363
3364// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3365let Predicates = [HasAVX512] in {
3366  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3367  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3368  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3369  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3370  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3371  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3372  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3373  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3374}
3375
3376// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3377multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3378                                             RegisterClass RC, ValueType VT> {
3379  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3380            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3381
3382  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3383            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3384}
3385defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3386defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3387defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3388defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3389defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3390defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3391
3392defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3393defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3394defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3395defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3396defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3397
3398defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3399defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3400defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3401defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3402
3403defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3404defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3405defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3406
3407defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3408defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3409
3410defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3411
3412//===----------------------------------------------------------------------===//
3413// AVX-512 - Aligned and unaligned load and store
3414//
3415
3416multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3417                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3418                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3419                       bit NoRMPattern = 0,
3420                       SDPatternOperator SelectOprr = vselect> {
3421  let hasSideEffects = 0 in {
3422  let isMoveReg = 1 in
3423  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3424                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3425                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3426                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3427  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3428                      (ins _.KRCWM:$mask,  _.RC:$src),
3429                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3430                       "${dst} {${mask}} {z}, $src}"),
3431                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3432                                           (_.VT _.RC:$src),
3433                                           _.ImmAllZerosV)))], _.ExeDomain>,
3434                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3435
3436  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3437  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3438                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3439                    !if(NoRMPattern, [],
3440                        [(set _.RC:$dst,
3441                          (_.VT (ld_frag addr:$src)))]),
3442                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3443                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3444
3445  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3446    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3447                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3448                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3449                      "${dst} {${mask}}, $src1}"),
3450                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3451                                          (_.VT _.RC:$src1),
3452                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3453                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3454    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3455                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3456                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3457                      "${dst} {${mask}}, $src1}"),
3458                     [(set _.RC:$dst, (_.VT
3459                         (vselect_mask _.KRCWM:$mask,
3460                          (_.VT (ld_frag addr:$src1)),
3461                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3462                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3463  }
3464  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3465                  (ins _.KRCWM:$mask, _.MemOp:$src),
3466                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3467                                "${dst} {${mask}} {z}, $src}",
3468                  [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3469                    (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3470                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3471  }
3472  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3473            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3474
3475  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3476            (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3477
3478  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3479            (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3480             _.KRCWM:$mask, addr:$ptr)>;
3481}
3482
3483multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3484                                 AVX512VLVectorVTInfo _, Predicate prd,
3485                                 X86SchedWriteMoveLSWidths Sched,
3486                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3487  let Predicates = [prd] in
3488  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3489                       _.info512.AlignedLdFrag, masked_load_aligned,
3490                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3491
3492  let Predicates = [prd, HasVLX] in {
3493  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3494                          _.info256.AlignedLdFrag, masked_load_aligned,
3495                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3496  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3497                          _.info128.AlignedLdFrag, masked_load_aligned,
3498                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3499  }
3500}
3501
3502multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3503                          AVX512VLVectorVTInfo _, Predicate prd,
3504                          X86SchedWriteMoveLSWidths Sched,
3505                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3506                          SDPatternOperator SelectOprr = vselect> {
3507  let Predicates = [prd] in
3508  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3509                       masked_load, Sched.ZMM, "",
3510                       NoRMPattern, SelectOprr>, EVEX_V512;
3511
3512  let Predicates = [prd, HasVLX] in {
3513  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3514                         masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3515                         NoRMPattern, SelectOprr>, EVEX_V256;
3516  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3517                         masked_load, Sched.XMM, EVEX2VEXOvrd,
3518                         NoRMPattern, SelectOprr>, EVEX_V128;
3519  }
3520}
3521
3522multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3523                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3524                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3525                        bit NoMRPattern = 0> {
3526  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3527  let isMoveReg = 1 in
3528  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3529                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3530                         [], _.ExeDomain>, EVEX,
3531                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3532                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3533  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3534                         (ins _.KRCWM:$mask, _.RC:$src),
3535                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3536                         "${dst} {${mask}}, $src}",
3537                         [], _.ExeDomain>,  EVEX, EVEX_K,
3538                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3539                         Sched<[Sched.RR]>;
3540  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3541                          (ins _.KRCWM:$mask, _.RC:$src),
3542                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3543                          "${dst} {${mask}} {z}, $src}",
3544                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3545                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3546                          Sched<[Sched.RR]>;
3547  }
3548
3549  let hasSideEffects = 0, mayStore = 1 in
3550  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3551                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3552                    !if(NoMRPattern, [],
3553                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3554                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3555                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3556  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3557                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3558              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3559               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3560               NotMemoryFoldable;
3561
3562  def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3563           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3564                                                        _.KRCWM:$mask, _.RC:$src)>;
3565
3566  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3567                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3568                   _.RC:$dst, _.RC:$src), 0>;
3569  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3570                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3571                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3572  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3573                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3574                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3575}
3576
3577multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3578                            AVX512VLVectorVTInfo _, Predicate prd,
3579                            X86SchedWriteMoveLSWidths Sched,
3580                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3581  let Predicates = [prd] in
3582  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3583                        masked_store, Sched.ZMM, "",
3584                        NoMRPattern>, EVEX_V512;
3585  let Predicates = [prd, HasVLX] in {
3586    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3587                             masked_store, Sched.YMM,
3588                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3589    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3590                             masked_store, Sched.XMM, EVEX2VEXOvrd,
3591                             NoMRPattern>, EVEX_V128;
3592  }
3593}
3594
3595multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3596                                  AVX512VLVectorVTInfo _, Predicate prd,
3597                                  X86SchedWriteMoveLSWidths Sched,
3598                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3599  let Predicates = [prd] in
3600  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3601                        masked_store_aligned, Sched.ZMM, "",
3602                        NoMRPattern>, EVEX_V512;
3603
3604  let Predicates = [prd, HasVLX] in {
3605    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3606                             masked_store_aligned, Sched.YMM,
3607                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3608    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3609                             masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3610                             NoMRPattern>, EVEX_V128;
3611  }
3612}
3613
3614defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3615                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3616               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3617                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3618               PS, EVEX_CD8<32, CD8VF>;
3619
3620defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3621                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3622               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3623                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3624               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3625
3626defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3627                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3628               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3629                               SchedWriteFMoveLS, "VMOVUPS">,
3630                               PS, EVEX_CD8<32, CD8VF>;
3631
3632defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3633                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3634               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3635                               SchedWriteFMoveLS, "VMOVUPD">,
3636               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3637
3638defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3639                                       HasAVX512, SchedWriteVecMoveLS,
3640                                       "VMOVDQA", 1>,
3641                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3642                                        HasAVX512, SchedWriteVecMoveLS,
3643                                        "VMOVDQA", 1>,
3644                 PD, EVEX_CD8<32, CD8VF>;
3645
3646defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3647                                       HasAVX512, SchedWriteVecMoveLS,
3648                                       "VMOVDQA">,
3649                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3650                                        HasAVX512, SchedWriteVecMoveLS,
3651                                        "VMOVDQA">,
3652                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3653
3654defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3655                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3656                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3657                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3658                XD, EVEX_CD8<8, CD8VF>;
3659
3660defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3661                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3662                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3663                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3664                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3665
3666defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3667                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3668                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3669                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3670                 XS, EVEX_CD8<32, CD8VF>;
3671
3672defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3673                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3674                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3675                                 SchedWriteVecMoveLS, "VMOVDQU">,
3676                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3677
3678// Special instructions to help with spilling when we don't have VLX. We need
3679// to load or store from a ZMM register instead. These are converted in
3680// expandPostRAPseudos.
3681let isReMaterializable = 1, canFoldAsLoad = 1,
3682    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3683def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3684                            "", []>, Sched<[WriteFLoadX]>;
3685def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3686                            "", []>, Sched<[WriteFLoadY]>;
3687def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3688                            "", []>, Sched<[WriteFLoadX]>;
3689def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3690                            "", []>, Sched<[WriteFLoadY]>;
3691}
3692
3693let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3694def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3695                            "", []>, Sched<[WriteFStoreX]>;
3696def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3697                            "", []>, Sched<[WriteFStoreY]>;
3698def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3699                            "", []>, Sched<[WriteFStoreX]>;
3700def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3701                            "", []>, Sched<[WriteFStoreY]>;
3702}
3703
3704def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3705                          (v8i64 VR512:$src))),
3706   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3707                                              VK8), VR512:$src)>;
3708
3709def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3710                           (v16i32 VR512:$src))),
3711                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3712
3713// These patterns exist to prevent the above patterns from introducing a second
3714// mask inversion when one already exists.
3715def : Pat<(v8i64 (vselect (v8i1 (vnot VK8:$mask)),
3716                          (v8i64 immAllZerosV),
3717                          (v8i64 VR512:$src))),
3718                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3719def : Pat<(v16i32 (vselect (v16i1 (vnot VK16:$mask)),
3720                           (v16i32 immAllZerosV),
3721                           (v16i32 VR512:$src))),
3722                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3723
3724multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3725                              X86VectorVTInfo Wide> {
3726 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3727                               Narrow.RC:$src1, Narrow.RC:$src0)),
3728           (EXTRACT_SUBREG
3729            (Wide.VT
3730             (!cast<Instruction>(InstrStr#"rrk")
3731              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3732              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3733              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3734            Narrow.SubRegIdx)>;
3735
3736 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3737                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3738           (EXTRACT_SUBREG
3739            (Wide.VT
3740             (!cast<Instruction>(InstrStr#"rrkz")
3741              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3742              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3743            Narrow.SubRegIdx)>;
3744}
3745
3746// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3747// available. Use a 512-bit operation and extract.
3748let Predicates = [HasAVX512, NoVLX] in {
3749  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3750  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3751  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3752  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3753
3754  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3755  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3756  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3757  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3758}
3759
3760let Predicates = [HasBWI, NoVLX] in {
3761  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3762  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3763
3764  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3765  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3766}
3767
3768let Predicates = [HasAVX512] in {
3769  // 512-bit load.
3770  def : Pat<(alignedloadv16i32 addr:$src),
3771            (VMOVDQA64Zrm addr:$src)>;
3772  def : Pat<(alignedloadv32i16 addr:$src),
3773            (VMOVDQA64Zrm addr:$src)>;
3774  def : Pat<(alignedloadv64i8 addr:$src),
3775            (VMOVDQA64Zrm addr:$src)>;
3776  def : Pat<(loadv16i32 addr:$src),
3777            (VMOVDQU64Zrm addr:$src)>;
3778  def : Pat<(loadv32i16 addr:$src),
3779            (VMOVDQU64Zrm addr:$src)>;
3780  def : Pat<(loadv64i8 addr:$src),
3781            (VMOVDQU64Zrm addr:$src)>;
3782
3783  // 512-bit store.
3784  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3785            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3786  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3787            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3788  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3789            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3790  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3791            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3792  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3793            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3794  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3795            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3796}
3797
3798let Predicates = [HasVLX] in {
3799  // 128-bit load.
3800  def : Pat<(alignedloadv4i32 addr:$src),
3801            (VMOVDQA64Z128rm addr:$src)>;
3802  def : Pat<(alignedloadv8i16 addr:$src),
3803            (VMOVDQA64Z128rm addr:$src)>;
3804  def : Pat<(alignedloadv16i8 addr:$src),
3805            (VMOVDQA64Z128rm addr:$src)>;
3806  def : Pat<(loadv4i32 addr:$src),
3807            (VMOVDQU64Z128rm addr:$src)>;
3808  def : Pat<(loadv8i16 addr:$src),
3809            (VMOVDQU64Z128rm addr:$src)>;
3810  def : Pat<(loadv16i8 addr:$src),
3811            (VMOVDQU64Z128rm addr:$src)>;
3812
3813  // 128-bit store.
3814  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3815            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3816  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3817            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3818  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3819            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3820  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3821            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3822  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3823            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3824  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3825            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3826
3827  // 256-bit load.
3828  def : Pat<(alignedloadv8i32 addr:$src),
3829            (VMOVDQA64Z256rm addr:$src)>;
3830  def : Pat<(alignedloadv16i16 addr:$src),
3831            (VMOVDQA64Z256rm addr:$src)>;
3832  def : Pat<(alignedloadv32i8 addr:$src),
3833            (VMOVDQA64Z256rm addr:$src)>;
3834  def : Pat<(loadv8i32 addr:$src),
3835            (VMOVDQU64Z256rm addr:$src)>;
3836  def : Pat<(loadv16i16 addr:$src),
3837            (VMOVDQU64Z256rm addr:$src)>;
3838  def : Pat<(loadv32i8 addr:$src),
3839            (VMOVDQU64Z256rm addr:$src)>;
3840
3841  // 256-bit store.
3842  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3843            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3844  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3845            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3846  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3847            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3848  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3849            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3850  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3851            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3852  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3853            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3854}
3855let Predicates = [HasFP16] in {
3856  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
3857            (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
3858  def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
3859            (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
3860  def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
3861            (VMOVAPSZrm addr:$src)>;
3862  def : Pat<(v32f16 (vselect VK32WM:$mask,
3863                     (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3864            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3865  def : Pat<(v32f16 (vselect VK32WM:$mask,
3866                     (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3867            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3868  def : Pat<(v32f16 (loadv32f16 addr:$src)),
3869            (VMOVUPSZrm addr:$src)>;
3870  def : Pat<(v32f16 (vselect VK32WM:$mask,
3871                     (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
3872            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3873  def : Pat<(v32f16 (vselect VK32WM:$mask,
3874                     (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
3875            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3876  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
3877            (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
3878  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
3879            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3880  def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
3881            (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
3882
3883  def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
3884            (VMOVAPSZmr addr:$dst, VR512:$src)>;
3885  def : Pat<(store (v32f16 VR512:$src), addr:$dst),
3886            (VMOVUPSZmr addr:$dst, VR512:$src)>;
3887  def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
3888            (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
3889}
3890let Predicates = [HasFP16, HasVLX] in {
3891  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
3892            (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
3893  def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
3894            (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
3895  def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
3896            (VMOVAPSZ256rm addr:$src)>;
3897  def : Pat<(v16f16 (vselect VK16WM:$mask,
3898                     (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3899            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3900  def : Pat<(v16f16 (vselect VK16WM:$mask,
3901                     (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3902            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3903  def : Pat<(v16f16 (loadv16f16 addr:$src)),
3904            (VMOVUPSZ256rm addr:$src)>;
3905  def : Pat<(v16f16 (vselect VK16WM:$mask,
3906                     (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
3907            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3908  def : Pat<(v16f16 (vselect VK16WM:$mask,
3909                     (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
3910            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3911  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
3912            (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
3913  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
3914            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3915  def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
3916            (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
3917
3918  def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
3919            (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
3920  def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
3921            (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
3922  def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
3923            (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
3924
3925  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
3926            (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
3927  def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
3928            (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
3929  def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
3930            (VMOVAPSZ128rm addr:$src)>;
3931  def : Pat<(v8f16 (vselect VK8WM:$mask,
3932                     (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3933            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3934  def : Pat<(v8f16 (vselect VK8WM:$mask,
3935                     (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3936            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3937  def : Pat<(v8f16 (loadv8f16 addr:$src)),
3938            (VMOVUPSZ128rm addr:$src)>;
3939  def : Pat<(v8f16 (vselect VK8WM:$mask,
3940                     (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
3941            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3942  def : Pat<(v8f16 (vselect VK8WM:$mask,
3943                     (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
3944            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3945  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
3946            (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
3947  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
3948            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3949  def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
3950            (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
3951
3952  def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
3953            (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
3954  def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
3955            (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
3956  def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
3957            (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
3958}
3959
3960// Move Int Doubleword to Packed Double Int
3961//
3962let ExeDomain = SSEPackedInt in {
3963def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3964                      "vmovd\t{$src, $dst|$dst, $src}",
3965                      [(set VR128X:$dst,
3966                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3967                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3968def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3969                      "vmovd\t{$src, $dst|$dst, $src}",
3970                      [(set VR128X:$dst,
3971                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3972                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3973def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3974                      "vmovq\t{$src, $dst|$dst, $src}",
3975                        [(set VR128X:$dst,
3976                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3977                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3978let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3979def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3980                      (ins i64mem:$src),
3981                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3982                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3983let isCodeGenOnly = 1 in {
3984def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3985                       "vmovq\t{$src, $dst|$dst, $src}",
3986                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3987                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3988def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3989                         "vmovq\t{$src, $dst|$dst, $src}",
3990                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3991                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3992}
3993} // ExeDomain = SSEPackedInt
3994
3995// Move Int Doubleword to Single Scalar
3996//
3997let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3998def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3999                      "vmovd\t{$src, $dst|$dst, $src}",
4000                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
4001                      EVEX, Sched<[WriteVecMoveFromGpr]>;
4002} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4003
4004// Move doubleword from xmm register to r/m32
4005//
4006let ExeDomain = SSEPackedInt in {
4007def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
4008                       "vmovd\t{$src, $dst|$dst, $src}",
4009                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
4010                                        (iPTR 0)))]>,
4011                       EVEX, Sched<[WriteVecMoveToGpr]>;
4012def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
4013                       (ins i32mem:$dst, VR128X:$src),
4014                       "vmovd\t{$src, $dst|$dst, $src}",
4015                       [(store (i32 (extractelt (v4i32 VR128X:$src),
4016                                     (iPTR 0))), addr:$dst)]>,
4017                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
4018} // ExeDomain = SSEPackedInt
4019
4020// Move quadword from xmm1 register to r/m64
4021//
4022let ExeDomain = SSEPackedInt in {
4023def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
4024                      "vmovq\t{$src, $dst|$dst, $src}",
4025                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
4026                                                   (iPTR 0)))]>,
4027                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
4028                      Requires<[HasAVX512]>;
4029
4030let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
4031def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
4032                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
4033                      EVEX, VEX_W, Sched<[WriteVecStore]>,
4034                      Requires<[HasAVX512, In64BitMode]>;
4035
4036def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
4037                      (ins i64mem:$dst, VR128X:$src),
4038                      "vmovq\t{$src, $dst|$dst, $src}",
4039                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
4040                              addr:$dst)]>,
4041                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
4042                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
4043
4044let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
4045def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
4046                             (ins VR128X:$src),
4047                             "vmovq\t{$src, $dst|$dst, $src}", []>,
4048                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
4049} // ExeDomain = SSEPackedInt
4050
4051def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
4052                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
4053
4054let Predicates = [HasAVX512] in {
4055  def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
4056            (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
4057}
4058
4059// Move Scalar Single to Double Int
4060//
4061let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
4062def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
4063                      (ins FR32X:$src),
4064                      "vmovd\t{$src, $dst|$dst, $src}",
4065                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
4066                      EVEX, Sched<[WriteVecMoveToGpr]>;
4067} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
4068
4069// Move Quadword Int to Packed Quadword Int
4070//
4071let ExeDomain = SSEPackedInt in {
4072def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
4073                      (ins i64mem:$src),
4074                      "vmovq\t{$src, $dst|$dst, $src}",
4075                      [(set VR128X:$dst,
4076                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
4077                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
4078} // ExeDomain = SSEPackedInt
4079
4080// Allow "vmovd" but print "vmovq".
4081def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4082                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
4083def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
4084                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
4085
4086// Conversions between masks and scalar fp.
4087def : Pat<(v32i1 (bitconvert FR32X:$src)),
4088          (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
4089def : Pat<(f32 (bitconvert VK32:$src)),
4090          (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
4091
4092def : Pat<(v64i1 (bitconvert FR64X:$src)),
4093          (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
4094def : Pat<(f64 (bitconvert VK64:$src)),
4095          (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
4096
4097//===----------------------------------------------------------------------===//
4098// AVX-512  MOVSH, MOVSS, MOVSD
4099//===----------------------------------------------------------------------===//
4100
4101multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
4102                              X86VectorVTInfo _,
4103                              list<Predicate> prd = [HasAVX512, OptForSize]> {
4104  let Predicates = prd in
4105  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4106             (ins _.RC:$src1, _.RC:$src2),
4107             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4108             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
4109             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
4110  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4111              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4112              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
4113              "$dst {${mask}} {z}, $src1, $src2}"),
4114              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4115                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4116                                      _.ImmAllZerosV)))],
4117              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
4118  let Constraints = "$src0 = $dst"  in
4119  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
4120             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
4121             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
4122             "$dst {${mask}}, $src1, $src2}"),
4123             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
4124                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4125                                     (_.VT _.RC:$src0))))],
4126             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4127  let canFoldAsLoad = 1, isReMaterializable = 1 in {
4128  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
4129             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4130             [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
4131             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4132  // _alt version uses FR32/FR64 register class.
4133  let isCodeGenOnly = 1 in
4134  def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4135                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4136                 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4137                 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4138  }
4139  let mayLoad = 1, hasSideEffects = 0 in {
4140    let Constraints = "$src0 = $dst" in
4141    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4142               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4143               !strconcat(asm, "\t{$src, $dst {${mask}}|",
4144               "$dst {${mask}}, $src}"),
4145               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4146    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4147               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4148               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4149               "$dst {${mask}} {z}, $src}"),
4150               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4151  }
4152  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4153             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4154             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
4155             EVEX, Sched<[WriteFStore]>;
4156  let mayStore = 1, hasSideEffects = 0 in
4157  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4158              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4159              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4160              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4161              NotMemoryFoldable;
4162}
4163
4164defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4165                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4166
4167defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4168                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4169
4170defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
4171                                  [HasFP16]>,
4172                                  VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
4173
4174multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4175                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4176
4177def : Pat<(_.VT (OpNode _.RC:$src0,
4178                        (_.VT (scalar_to_vector
4179                                  (_.EltVT (X86selects VK1WM:$mask,
4180                                                       (_.EltVT _.FRC:$src1),
4181                                                       (_.EltVT _.FRC:$src2))))))),
4182          (!cast<Instruction>(InstrStr#rrk)
4183                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4184                        VK1WM:$mask,
4185                        (_.VT _.RC:$src0),
4186                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4187
4188def : Pat<(_.VT (OpNode _.RC:$src0,
4189                        (_.VT (scalar_to_vector
4190                                  (_.EltVT (X86selects VK1WM:$mask,
4191                                                       (_.EltVT _.FRC:$src1),
4192                                                       (_.EltVT ZeroFP))))))),
4193          (!cast<Instruction>(InstrStr#rrkz)
4194                        VK1WM:$mask,
4195                        (_.VT _.RC:$src0),
4196                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4197}
4198
4199multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4200                                        dag Mask, RegisterClass MaskRC> {
4201
4202def : Pat<(masked_store
4203             (_.info512.VT (insert_subvector undef,
4204                               (_.info128.VT _.info128.RC:$src),
4205                               (iPTR 0))), addr:$dst, Mask),
4206          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4207                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4208                      _.info128.RC:$src)>;
4209
4210}
4211
4212multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4213                                               AVX512VLVectorVTInfo _,
4214                                               dag Mask, RegisterClass MaskRC,
4215                                               SubRegIndex subreg> {
4216
4217def : Pat<(masked_store
4218             (_.info512.VT (insert_subvector undef,
4219                               (_.info128.VT _.info128.RC:$src),
4220                               (iPTR 0))), addr:$dst, Mask),
4221          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4222                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4223                      _.info128.RC:$src)>;
4224
4225}
4226
4227// This matches the more recent codegen from clang that avoids emitting a 512
4228// bit masked store directly. Codegen will widen 128-bit masked store to 512
4229// bits on AVX512F only targets.
4230multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4231                                               AVX512VLVectorVTInfo _,
4232                                               dag Mask512, dag Mask128,
4233                                               RegisterClass MaskRC,
4234                                               SubRegIndex subreg> {
4235
4236// AVX512F pattern.
4237def : Pat<(masked_store
4238             (_.info512.VT (insert_subvector undef,
4239                               (_.info128.VT _.info128.RC:$src),
4240                               (iPTR 0))), addr:$dst, Mask512),
4241          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4242                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4243                      _.info128.RC:$src)>;
4244
4245// AVX512VL pattern.
4246def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4247          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4248                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4249                      _.info128.RC:$src)>;
4250}
4251
4252multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4253                                       dag Mask, RegisterClass MaskRC> {
4254
4255def : Pat<(_.info128.VT (extract_subvector
4256                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4257                                        _.info512.ImmAllZerosV)),
4258                           (iPTR 0))),
4259          (!cast<Instruction>(InstrStr#rmkz)
4260                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4261                      addr:$srcAddr)>;
4262
4263def : Pat<(_.info128.VT (extract_subvector
4264                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4265                      (_.info512.VT (insert_subvector undef,
4266                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4267                            (iPTR 0))))),
4268                (iPTR 0))),
4269          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4270                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4271                      addr:$srcAddr)>;
4272
4273}
4274
4275multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4276                                              AVX512VLVectorVTInfo _,
4277                                              dag Mask, RegisterClass MaskRC,
4278                                              SubRegIndex subreg> {
4279
4280def : Pat<(_.info128.VT (extract_subvector
4281                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4282                                        _.info512.ImmAllZerosV)),
4283                           (iPTR 0))),
4284          (!cast<Instruction>(InstrStr#rmkz)
4285                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4286                      addr:$srcAddr)>;
4287
4288def : Pat<(_.info128.VT (extract_subvector
4289                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4290                      (_.info512.VT (insert_subvector undef,
4291                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4292                            (iPTR 0))))),
4293                (iPTR 0))),
4294          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4295                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4296                      addr:$srcAddr)>;
4297
4298}
4299
4300// This matches the more recent codegen from clang that avoids emitting a 512
4301// bit masked load directly. Codegen will widen 128-bit masked load to 512
4302// bits on AVX512F only targets.
4303multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4304                                              AVX512VLVectorVTInfo _,
4305                                              dag Mask512, dag Mask128,
4306                                              RegisterClass MaskRC,
4307                                              SubRegIndex subreg> {
4308// AVX512F patterns.
4309def : Pat<(_.info128.VT (extract_subvector
4310                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4311                                        _.info512.ImmAllZerosV)),
4312                           (iPTR 0))),
4313          (!cast<Instruction>(InstrStr#rmkz)
4314                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4315                      addr:$srcAddr)>;
4316
4317def : Pat<(_.info128.VT (extract_subvector
4318                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4319                      (_.info512.VT (insert_subvector undef,
4320                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4321                            (iPTR 0))))),
4322                (iPTR 0))),
4323          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4324                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4325                      addr:$srcAddr)>;
4326
4327// AVX512Vl patterns.
4328def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4329                         _.info128.ImmAllZerosV)),
4330          (!cast<Instruction>(InstrStr#rmkz)
4331                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4332                      addr:$srcAddr)>;
4333
4334def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4335                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4336          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4337                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4338                      addr:$srcAddr)>;
4339}
4340
4341defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
4342defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4343defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4344
4345defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4346                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4347defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4348                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4349defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4350                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4351defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4352                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4353defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4354                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4355
4356defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4357                   (v32i1 (insert_subvector
4358                           (v32i1 immAllZerosV),
4359                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4360                           (iPTR 0))),
4361                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4362                   GR8, sub_8bit>;
4363defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4364                   (v16i1 (insert_subvector
4365                           (v16i1 immAllZerosV),
4366                           (v4i1 (extract_subvector
4367                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4368                                  (iPTR 0))),
4369                           (iPTR 0))),
4370                   (v4i1 (extract_subvector
4371                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4372                          (iPTR 0))), GR8, sub_8bit>;
4373defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4374                   (v8i1
4375                    (extract_subvector
4376                     (v16i1
4377                      (insert_subvector
4378                       (v16i1 immAllZerosV),
4379                       (v2i1 (extract_subvector
4380                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4381                              (iPTR 0))),
4382                       (iPTR 0))),
4383                     (iPTR 0))),
4384                   (v2i1 (extract_subvector
4385                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4386                          (iPTR 0))), GR8, sub_8bit>;
4387
4388defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
4389                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
4390defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
4391                   (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
4392defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4393                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4394defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4395                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4396defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4397                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4398
4399defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
4400                   (v32i1 (insert_subvector
4401                           (v32i1 immAllZerosV),
4402                           (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4403                           (iPTR 0))),
4404                   (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4405                   GR8, sub_8bit>;
4406defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4407                   (v16i1 (insert_subvector
4408                           (v16i1 immAllZerosV),
4409                           (v4i1 (extract_subvector
4410                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4411                                  (iPTR 0))),
4412                           (iPTR 0))),
4413                   (v4i1 (extract_subvector
4414                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4415                          (iPTR 0))), GR8, sub_8bit>;
4416defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4417                   (v8i1
4418                    (extract_subvector
4419                     (v16i1
4420                      (insert_subvector
4421                       (v16i1 immAllZerosV),
4422                       (v2i1 (extract_subvector
4423                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4424                              (iPTR 0))),
4425                       (iPTR 0))),
4426                     (iPTR 0))),
4427                   (v2i1 (extract_subvector
4428                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4429                          (iPTR 0))), GR8, sub_8bit>;
4430
4431def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
4432          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
4433           (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
4434           VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4435           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4436
4437def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
4438          (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
4439           (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
4440
4441def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4442          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4443           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4444           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4445           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4446
4447def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4448          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4449           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4450
4451def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4452          (COPY_TO_REGCLASS
4453           (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4454                                                       VK1WM:$mask, addr:$src)),
4455           FR32X)>;
4456def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4457          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4458
4459def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4460          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4461           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4462           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4463           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4464
4465def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4466          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4467           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4468
4469def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4470          (COPY_TO_REGCLASS
4471           (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4472                                                       VK1WM:$mask, addr:$src)),
4473           FR64X)>;
4474def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4475          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4476
4477
4478def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4479          (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4480def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4481          (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4482
4483def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4484          (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4485def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4486          (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4487
4488let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4489  let Predicates = [HasFP16] in {
4490    def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4491        (ins VR128X:$src1, VR128X:$src2),
4492        "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4493        []>, T_MAP5XS, EVEX_4V, VEX_LIG,
4494        FoldGenData<"VMOVSHZrr">,
4495        Sched<[SchedWriteFShuffle.XMM]>;
4496
4497    let Constraints = "$src0 = $dst" in
4498    def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4499        (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
4500         VR128X:$src1, VR128X:$src2),
4501        "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
4502          "$dst {${mask}}, $src1, $src2}",
4503        []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
4504        FoldGenData<"VMOVSHZrrk">,
4505        Sched<[SchedWriteFShuffle.XMM]>;
4506
4507    def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4508        (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4509        "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
4510          "$dst {${mask}} {z}, $src1, $src2}",
4511        []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
4512        FoldGenData<"VMOVSHZrrkz">,
4513        Sched<[SchedWriteFShuffle.XMM]>;
4514  }
4515  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4516                           (ins VR128X:$src1, VR128X:$src2),
4517                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4518                           []>, XS, EVEX_4V, VEX_LIG,
4519                           FoldGenData<"VMOVSSZrr">,
4520                           Sched<[SchedWriteFShuffle.XMM]>;
4521
4522  let Constraints = "$src0 = $dst" in
4523  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4524                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4525                                                   VR128X:$src1, VR128X:$src2),
4526                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4527                                        "$dst {${mask}}, $src1, $src2}",
4528                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4529                             FoldGenData<"VMOVSSZrrk">,
4530                             Sched<[SchedWriteFShuffle.XMM]>;
4531
4532  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4533                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4534                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4535                                    "$dst {${mask}} {z}, $src1, $src2}",
4536                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4537                         FoldGenData<"VMOVSSZrrkz">,
4538                         Sched<[SchedWriteFShuffle.XMM]>;
4539
4540  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4541                           (ins VR128X:$src1, VR128X:$src2),
4542                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4543                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4544                           FoldGenData<"VMOVSDZrr">,
4545                           Sched<[SchedWriteFShuffle.XMM]>;
4546
4547  let Constraints = "$src0 = $dst" in
4548  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4549                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4550                                                   VR128X:$src1, VR128X:$src2),
4551                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4552                                        "$dst {${mask}}, $src1, $src2}",
4553                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4554                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4555                             Sched<[SchedWriteFShuffle.XMM]>;
4556
4557  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4558                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4559                                                          VR128X:$src2),
4560                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4561                                         "$dst {${mask}} {z}, $src1, $src2}",
4562                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4563                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4564                              Sched<[SchedWriteFShuffle.XMM]>;
4565}
4566
4567def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4568                (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4569def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
4570                             "$dst {${mask}}, $src1, $src2}",
4571                (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
4572                                VR128X:$src1, VR128X:$src2), 0>;
4573def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4574                             "$dst {${mask}} {z}, $src1, $src2}",
4575                (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
4576                                 VR128X:$src1, VR128X:$src2), 0>;
4577def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4578                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4579def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4580                             "$dst {${mask}}, $src1, $src2}",
4581                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4582                                VR128X:$src1, VR128X:$src2), 0>;
4583def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4584                             "$dst {${mask}} {z}, $src1, $src2}",
4585                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4586                                 VR128X:$src1, VR128X:$src2), 0>;
4587def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4588                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4589def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4590                             "$dst {${mask}}, $src1, $src2}",
4591                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4592                                VR128X:$src1, VR128X:$src2), 0>;
4593def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4594                             "$dst {${mask}} {z}, $src1, $src2}",
4595                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4596                                 VR128X:$src1, VR128X:$src2), 0>;
4597
4598let Predicates = [HasAVX512, OptForSize] in {
4599  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4600            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4601  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4602            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4603
4604  // Move low f32 and clear high bits.
4605  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4606            (SUBREG_TO_REG (i32 0),
4607             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4608              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4609  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4610            (SUBREG_TO_REG (i32 0),
4611             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4612              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4613
4614  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4615            (SUBREG_TO_REG (i32 0),
4616             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4617              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4618  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4619            (SUBREG_TO_REG (i32 0),
4620             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4621              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4622}
4623
4624// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4625// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4626let Predicates = [HasAVX512, OptForSpeed] in {
4627  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4628            (SUBREG_TO_REG (i32 0),
4629             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4630                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4631                          (i8 1))), sub_xmm)>;
4632  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4633            (SUBREG_TO_REG (i32 0),
4634             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4635                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4636                          (i8 3))), sub_xmm)>;
4637}
4638
4639let Predicates = [HasAVX512] in {
4640  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4641            (VMOVSSZrm addr:$src)>;
4642  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4643            (VMOVSDZrm addr:$src)>;
4644
4645  // Represent the same patterns above but in the form they appear for
4646  // 256-bit types
4647  def : Pat<(v8f32 (X86vzload32 addr:$src)),
4648            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4649  def : Pat<(v4f64 (X86vzload64 addr:$src)),
4650            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4651
4652  // Represent the same patterns above but in the form they appear for
4653  // 512-bit types
4654  def : Pat<(v16f32 (X86vzload32 addr:$src)),
4655            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4656  def : Pat<(v8f64 (X86vzload64 addr:$src)),
4657            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4658}
4659let Predicates = [HasFP16] in {
4660  def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
4661            (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
4662
4663  // FIXME we need better canonicalization in dag combine
4664  def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
4665            (SUBREG_TO_REG (i32 0),
4666             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4667              (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
4668  def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
4669            (SUBREG_TO_REG (i32 0),
4670             (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
4671              (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
4672
4673  def : Pat<(v8f16 (X86vzload16 addr:$src)),
4674            (VMOVSHZrm addr:$src)>;
4675
4676  def : Pat<(v16f16 (X86vzload16 addr:$src)),
4677            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4678
4679  def : Pat<(v32f16 (X86vzload16 addr:$src)),
4680            (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
4681}
4682
4683let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4684def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4685                                (ins VR128X:$src),
4686                                "vmovq\t{$src, $dst|$dst, $src}",
4687                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4688                                                   (v2i64 VR128X:$src))))]>,
4689                                EVEX, VEX_W;
4690}
4691
4692let Predicates = [HasAVX512] in {
4693  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4694            (VMOVDI2PDIZrr GR32:$src)>;
4695
4696  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4697            (VMOV64toPQIZrr GR64:$src)>;
4698
4699  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4700  def : Pat<(v4i32 (X86vzload32 addr:$src)),
4701            (VMOVDI2PDIZrm addr:$src)>;
4702  def : Pat<(v8i32 (X86vzload32 addr:$src)),
4703            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4704  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4705            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4706  def : Pat<(v2i64 (X86vzload64 addr:$src)),
4707            (VMOVQI2PQIZrm addr:$src)>;
4708  def : Pat<(v4i64 (X86vzload64 addr:$src)),
4709            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4710
4711  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4712  def : Pat<(v16i32 (X86vzload32 addr:$src)),
4713            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4714  def : Pat<(v8i64 (X86vzload64 addr:$src)),
4715            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4716
4717  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4718            (SUBREG_TO_REG (i32 0),
4719             (v2f64 (VMOVZPQILo2PQIZrr
4720                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4721             sub_xmm)>;
4722  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4723            (SUBREG_TO_REG (i32 0),
4724             (v2i64 (VMOVZPQILo2PQIZrr
4725                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4726             sub_xmm)>;
4727
4728  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4729            (SUBREG_TO_REG (i32 0),
4730             (v2f64 (VMOVZPQILo2PQIZrr
4731                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4732             sub_xmm)>;
4733  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4734            (SUBREG_TO_REG (i32 0),
4735             (v2i64 (VMOVZPQILo2PQIZrr
4736                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4737             sub_xmm)>;
4738}
4739
4740//===----------------------------------------------------------------------===//
4741// AVX-512 - Non-temporals
4742//===----------------------------------------------------------------------===//
4743
4744def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4745                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4746                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4747                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4748
4749let Predicates = [HasVLX] in {
4750  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4751                       (ins i256mem:$src),
4752                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4753                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4754                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4755
4756  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4757                      (ins i128mem:$src),
4758                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4759                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4760                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4761}
4762
4763multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4764                        X86SchedWriteMoveLS Sched,
4765                        PatFrag st_frag = alignednontemporalstore> {
4766  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4767  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4768                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4769                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4770                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4771}
4772
4773multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4774                           AVX512VLVectorVTInfo VTInfo,
4775                           X86SchedWriteMoveLSWidths Sched> {
4776  let Predicates = [HasAVX512] in
4777    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4778
4779  let Predicates = [HasAVX512, HasVLX] in {
4780    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4781    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4782  }
4783}
4784
4785defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4786                                SchedWriteVecMoveLSNT>, PD;
4787defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4788                                SchedWriteFMoveLSNT>, PD, VEX_W;
4789defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4790                                SchedWriteFMoveLSNT>, PS;
4791
4792let Predicates = [HasAVX512], AddedComplexity = 400 in {
4793  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4794            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4795  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4796            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4797  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4798            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4799
4800  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4801            (VMOVNTDQAZrm addr:$src)>;
4802  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4803            (VMOVNTDQAZrm addr:$src)>;
4804  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4805            (VMOVNTDQAZrm addr:$src)>;
4806  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4807            (VMOVNTDQAZrm addr:$src)>;
4808  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4809            (VMOVNTDQAZrm addr:$src)>;
4810  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4811            (VMOVNTDQAZrm addr:$src)>;
4812}
4813
4814let Predicates = [HasVLX], AddedComplexity = 400 in {
4815  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4816            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4817  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4818            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4819  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4820            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4821
4822  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4823            (VMOVNTDQAZ256rm addr:$src)>;
4824  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4825            (VMOVNTDQAZ256rm addr:$src)>;
4826  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4827            (VMOVNTDQAZ256rm addr:$src)>;
4828  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4829            (VMOVNTDQAZ256rm addr:$src)>;
4830  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4831            (VMOVNTDQAZ256rm addr:$src)>;
4832  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4833            (VMOVNTDQAZ256rm addr:$src)>;
4834
4835  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4836            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4837  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4838            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4839  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4840            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4841
4842  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4843            (VMOVNTDQAZ128rm addr:$src)>;
4844  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4845            (VMOVNTDQAZ128rm addr:$src)>;
4846  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4847            (VMOVNTDQAZ128rm addr:$src)>;
4848  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4849            (VMOVNTDQAZ128rm addr:$src)>;
4850  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4851            (VMOVNTDQAZ128rm addr:$src)>;
4852  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4853            (VMOVNTDQAZ128rm addr:$src)>;
4854}
4855
4856//===----------------------------------------------------------------------===//
4857// AVX-512 - Integer arithmetic
4858//
4859multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4860                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4861                           bit IsCommutable = 0> {
4862  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4863                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4864                    "$src2, $src1", "$src1, $src2",
4865                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4866                    IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4867                    Sched<[sched]>;
4868
4869  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4870                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4871                  "$src2, $src1", "$src1, $src2",
4872                  (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4873                  AVX512BIBase, EVEX_4V,
4874                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4875}
4876
4877multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4878                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4879                            bit IsCommutable = 0> :
4880           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4881  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4882                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4883                  "${src2}"#_.BroadcastStr#", $src1",
4884                  "$src1, ${src2}"#_.BroadcastStr,
4885                  (_.VT (OpNode _.RC:$src1,
4886                                (_.BroadcastLdFrag addr:$src2)))>,
4887                  AVX512BIBase, EVEX_4V, EVEX_B,
4888                  Sched<[sched.Folded, sched.ReadAfterFold]>;
4889}
4890
4891multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4892                              AVX512VLVectorVTInfo VTInfo,
4893                              X86SchedWriteWidths sched, Predicate prd,
4894                              bit IsCommutable = 0> {
4895  let Predicates = [prd] in
4896    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4897                             IsCommutable>, EVEX_V512;
4898
4899  let Predicates = [prd, HasVLX] in {
4900    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4901                                sched.YMM, IsCommutable>, EVEX_V256;
4902    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4903                                sched.XMM, IsCommutable>, EVEX_V128;
4904  }
4905}
4906
4907multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4908                               AVX512VLVectorVTInfo VTInfo,
4909                               X86SchedWriteWidths sched, Predicate prd,
4910                               bit IsCommutable = 0> {
4911  let Predicates = [prd] in
4912    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4913                             IsCommutable>, EVEX_V512;
4914
4915  let Predicates = [prd, HasVLX] in {
4916    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4917                                 sched.YMM, IsCommutable>, EVEX_V256;
4918    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4919                                 sched.XMM, IsCommutable>, EVEX_V128;
4920  }
4921}
4922
4923multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4924                                X86SchedWriteWidths sched, Predicate prd,
4925                                bit IsCommutable = 0> {
4926  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4927                                  sched, prd, IsCommutable>,
4928                                  VEX_W, EVEX_CD8<64, CD8VF>;
4929}
4930
4931multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4932                                X86SchedWriteWidths sched, Predicate prd,
4933                                bit IsCommutable = 0> {
4934  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4935                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4936}
4937
4938multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4939                                X86SchedWriteWidths sched, Predicate prd,
4940                                bit IsCommutable = 0> {
4941  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4942                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4943                                 VEX_WIG;
4944}
4945
4946multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4947                                X86SchedWriteWidths sched, Predicate prd,
4948                                bit IsCommutable = 0> {
4949  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4950                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4951                                 VEX_WIG;
4952}
4953
4954multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4955                                 SDNode OpNode, X86SchedWriteWidths sched,
4956                                 Predicate prd, bit IsCommutable = 0> {
4957  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4958                                   IsCommutable>;
4959
4960  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4961                                   IsCommutable>;
4962}
4963
4964multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4965                                 SDNode OpNode, X86SchedWriteWidths sched,
4966                                 Predicate prd, bit IsCommutable = 0> {
4967  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4968                                   IsCommutable>;
4969
4970  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4971                                   IsCommutable>;
4972}
4973
4974multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4975                                  bits<8> opc_d, bits<8> opc_q,
4976                                  string OpcodeStr, SDNode OpNode,
4977                                  X86SchedWriteWidths sched,
4978                                  bit IsCommutable = 0> {
4979  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4980                                    sched, HasAVX512, IsCommutable>,
4981              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4982                                    sched, HasBWI, IsCommutable>;
4983}
4984
4985multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4986                            X86FoldableSchedWrite sched,
4987                            SDNode OpNode,X86VectorVTInfo _Src,
4988                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4989                            bit IsCommutable = 0> {
4990  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4991                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4992                            "$src2, $src1","$src1, $src2",
4993                            (_Dst.VT (OpNode
4994                                         (_Src.VT _Src.RC:$src1),
4995                                         (_Src.VT _Src.RC:$src2))),
4996                            IsCommutable>,
4997                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4998  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4999                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5000                        "$src2, $src1", "$src1, $src2",
5001                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5002                                      (_Src.LdFrag addr:$src2)))>,
5003                        AVX512BIBase, EVEX_4V,
5004                        Sched<[sched.Folded, sched.ReadAfterFold]>;
5005
5006  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5007                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
5008                    OpcodeStr,
5009                    "${src2}"#_Brdct.BroadcastStr#", $src1",
5010                     "$src1, ${src2}"#_Brdct.BroadcastStr,
5011                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5012                                 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
5013                    AVX512BIBase, EVEX_4V, EVEX_B,
5014                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5015}
5016
5017defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
5018                                    SchedWriteVecALU, 1>;
5019defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
5020                                    SchedWriteVecALU, 0>;
5021defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
5022                                    SchedWriteVecALU, HasBWI, 1>;
5023defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
5024                                    SchedWriteVecALU, HasBWI, 0>;
5025defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
5026                                     SchedWriteVecALU, HasBWI, 1>;
5027defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
5028                                     SchedWriteVecALU, HasBWI, 0>;
5029defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
5030                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
5031defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
5032                                    SchedWriteVecIMul, HasBWI, 1>;
5033defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
5034                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
5035                                    NotEVEX2VEXConvertible;
5036defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
5037                                    HasBWI, 1>;
5038defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
5039                                     HasBWI, 1>;
5040defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
5041                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
5042defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
5043                                   SchedWriteVecALU, HasBWI, 1>;
5044defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
5045                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
5046defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
5047                                     SchedWriteVecIMul, HasAVX512, 1>;
5048
5049multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
5050                            X86SchedWriteWidths sched,
5051                            AVX512VLVectorVTInfo _SrcVTInfo,
5052                            AVX512VLVectorVTInfo _DstVTInfo,
5053                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
5054  let Predicates = [prd] in
5055    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
5056                                 _SrcVTInfo.info512, _DstVTInfo.info512,
5057                                 v8i64_info, IsCommutable>,
5058                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
5059  let Predicates = [HasVLX, prd] in {
5060    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
5061                                      _SrcVTInfo.info256, _DstVTInfo.info256,
5062                                      v4i64x_info, IsCommutable>,
5063                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
5064    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
5065                                      _SrcVTInfo.info128, _DstVTInfo.info128,
5066                                      v2i64x_info, IsCommutable>,
5067                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
5068  }
5069}
5070
5071defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
5072                                avx512vl_i8_info, avx512vl_i8_info,
5073                                X86multishift, HasVBMI, 0>, T8PD;
5074
5075multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5076                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
5077                            X86FoldableSchedWrite sched> {
5078  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5079                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
5080                    OpcodeStr,
5081                    "${src2}"#_Src.BroadcastStr#", $src1",
5082                     "$src1, ${src2}"#_Src.BroadcastStr,
5083                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
5084                                 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
5085                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
5086                    Sched<[sched.Folded, sched.ReadAfterFold]>;
5087}
5088
5089multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
5090                            SDNode OpNode,X86VectorVTInfo _Src,
5091                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
5092                            bit IsCommutable = 0> {
5093  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
5094                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
5095                            "$src2, $src1","$src1, $src2",
5096                            (_Dst.VT (OpNode
5097                                         (_Src.VT _Src.RC:$src1),
5098                                         (_Src.VT _Src.RC:$src2))),
5099                            IsCommutable, IsCommutable>,
5100                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
5101  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
5102                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
5103                        "$src2, $src1", "$src1, $src2",
5104                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
5105                                      (_Src.LdFrag addr:$src2)))>,
5106                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
5107                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5108}
5109
5110multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
5111                                    SDNode OpNode> {
5112  let Predicates = [HasBWI] in
5113  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
5114                                 v32i16_info, SchedWriteShuffle.ZMM>,
5115                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
5116                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
5117  let Predicates = [HasBWI, HasVLX] in {
5118    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
5119                                     v16i16x_info, SchedWriteShuffle.YMM>,
5120                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
5121                                      v16i16x_info, SchedWriteShuffle.YMM>,
5122                                      EVEX_V256;
5123    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
5124                                     v8i16x_info, SchedWriteShuffle.XMM>,
5125                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
5126                                      v8i16x_info, SchedWriteShuffle.XMM>,
5127                                      EVEX_V128;
5128  }
5129}
5130multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
5131                            SDNode OpNode> {
5132  let Predicates = [HasBWI] in
5133  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
5134                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
5135  let Predicates = [HasBWI, HasVLX] in {
5136    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
5137                                     v32i8x_info, SchedWriteShuffle.YMM>,
5138                                     EVEX_V256, VEX_WIG;
5139    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
5140                                     v16i8x_info, SchedWriteShuffle.XMM>,
5141                                     EVEX_V128, VEX_WIG;
5142  }
5143}
5144
5145multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
5146                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
5147                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
5148  let Predicates = [HasBWI] in
5149  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
5150                                _Dst.info512, SchedWriteVecIMul.ZMM,
5151                                IsCommutable>, EVEX_V512;
5152  let Predicates = [HasBWI, HasVLX] in {
5153    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
5154                                     _Dst.info256, SchedWriteVecIMul.YMM,
5155                                     IsCommutable>, EVEX_V256;
5156    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
5157                                     _Dst.info128, SchedWriteVecIMul.XMM,
5158                                     IsCommutable>, EVEX_V128;
5159  }
5160}
5161
5162defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
5163defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
5164defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
5165defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
5166
5167defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
5168                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
5169defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
5170                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
5171
5172defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
5173                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5174defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5175                                    SchedWriteVecALU, HasBWI, 1>;
5176defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5177                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5178defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5179                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5180                                    NotEVEX2VEXConvertible;
5181
5182defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5183                                    SchedWriteVecALU, HasBWI, 1>;
5184defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5185                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5186defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5187                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5188defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5189                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5190                                    NotEVEX2VEXConvertible;
5191
5192defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5193                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5194defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5195                                    SchedWriteVecALU, HasBWI, 1>;
5196defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5197                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5198defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5199                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5200                                    NotEVEX2VEXConvertible;
5201
5202defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5203                                    SchedWriteVecALU, HasBWI, 1>;
5204defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5205                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
5206defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5207                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
5208defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5209                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
5210                                    NotEVEX2VEXConvertible;
5211
5212// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5213let Predicates = [HasDQI, NoVLX] in {
5214  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5215            (EXTRACT_SUBREG
5216                (VPMULLQZrr
5217                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5218                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5219             sub_ymm)>;
5220  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5221            (EXTRACT_SUBREG
5222                (VPMULLQZrmb
5223                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5224                    addr:$src2),
5225             sub_ymm)>;
5226
5227  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5228            (EXTRACT_SUBREG
5229                (VPMULLQZrr
5230                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5231                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5232             sub_xmm)>;
5233  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5234            (EXTRACT_SUBREG
5235                (VPMULLQZrmb
5236                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5237                    addr:$src2),
5238             sub_xmm)>;
5239}
5240
5241multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
5242  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5243            (EXTRACT_SUBREG
5244                (!cast<Instruction>(Instr#"rr")
5245                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5246                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5247             sub_ymm)>;
5248  def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
5249            (EXTRACT_SUBREG
5250                (!cast<Instruction>(Instr#"rmb")
5251                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5252                    addr:$src2),
5253             sub_ymm)>;
5254
5255  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5256            (EXTRACT_SUBREG
5257                (!cast<Instruction>(Instr#"rr")
5258                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5259                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5260             sub_xmm)>;
5261  def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5262            (EXTRACT_SUBREG
5263                (!cast<Instruction>(Instr#"rmb")
5264                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5265                    addr:$src2),
5266             sub_xmm)>;
5267}
5268
5269let Predicates = [HasAVX512, NoVLX] in {
5270  defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5271  defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5272  defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5273  defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5274}
5275
5276//===----------------------------------------------------------------------===//
5277// AVX-512  Logical Instructions
5278//===----------------------------------------------------------------------===//
5279
5280defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5281                                   SchedWriteVecLogic, HasAVX512, 1>;
5282defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5283                                  SchedWriteVecLogic, HasAVX512, 1>;
5284defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5285                                   SchedWriteVecLogic, HasAVX512, 1>;
5286defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5287                                    SchedWriteVecLogic, HasAVX512>;
5288
5289let Predicates = [HasVLX] in {
5290  def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5291            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5292  def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5293            (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5294
5295  def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5296            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5297  def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5298            (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5299
5300  def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5301            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5302  def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5303            (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5304
5305  def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5306            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5307  def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5308            (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5309
5310  def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5311            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5312  def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5313            (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5314
5315  def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5316            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5317  def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5318            (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5319
5320  def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5321            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5322  def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5323            (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5324
5325  def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5326            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5327  def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5328            (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5329
5330  def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5331            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5332  def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5333            (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5334
5335  def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5336            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5337  def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5338            (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5339
5340  def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5341            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5342  def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5343            (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5344
5345  def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5346            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5347  def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5348            (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5349
5350  def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5351            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5352  def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5353            (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5354
5355  def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5356            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5357  def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5358            (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5359
5360  def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5361            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5362  def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5363            (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5364
5365  def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5366            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5367  def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5368            (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5369}
5370
5371let Predicates = [HasAVX512] in {
5372  def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5373            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5374  def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5375            (VPANDQZrr VR512:$src1, VR512:$src2)>;
5376
5377  def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5378            (VPORQZrr VR512:$src1, VR512:$src2)>;
5379  def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5380            (VPORQZrr VR512:$src1, VR512:$src2)>;
5381
5382  def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5383            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5384  def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5385            (VPXORQZrr VR512:$src1, VR512:$src2)>;
5386
5387  def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5388            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5389  def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5390            (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5391
5392  def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5393            (VPANDQZrm VR512:$src1, addr:$src2)>;
5394  def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5395            (VPANDQZrm VR512:$src1, addr:$src2)>;
5396
5397  def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5398            (VPORQZrm VR512:$src1, addr:$src2)>;
5399  def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5400            (VPORQZrm VR512:$src1, addr:$src2)>;
5401
5402  def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5403            (VPXORQZrm VR512:$src1, addr:$src2)>;
5404  def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5405            (VPXORQZrm VR512:$src1, addr:$src2)>;
5406
5407  def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5408            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5409  def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5410            (VPANDNQZrm VR512:$src1, addr:$src2)>;
5411}
5412
5413// Patterns to catch vselect with different type than logic op.
5414multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5415                                    X86VectorVTInfo _,
5416                                    X86VectorVTInfo IntInfo> {
5417  // Masked register-register logical operations.
5418  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5419                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5420                   _.RC:$src0)),
5421            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5422             _.RC:$src1, _.RC:$src2)>;
5423
5424  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5425                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5426                   _.ImmAllZerosV)),
5427            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5428             _.RC:$src2)>;
5429
5430  // Masked register-memory logical operations.
5431  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5432                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5433                                            (load addr:$src2)))),
5434                   _.RC:$src0)),
5435            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5436             _.RC:$src1, addr:$src2)>;
5437  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5438                   (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5439                                            (load addr:$src2)))),
5440                   _.ImmAllZerosV)),
5441            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5442             addr:$src2)>;
5443}
5444
5445multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5446                                         X86VectorVTInfo _,
5447                                         X86VectorVTInfo IntInfo> {
5448  // Register-broadcast logical operations.
5449  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5450                   (bitconvert
5451                    (IntInfo.VT (OpNode _.RC:$src1,
5452                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5453                   _.RC:$src0)),
5454            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5455             _.RC:$src1, addr:$src2)>;
5456  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5457                   (bitconvert
5458                    (IntInfo.VT (OpNode _.RC:$src1,
5459                                 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5460                   _.ImmAllZerosV)),
5461            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5462             _.RC:$src1, addr:$src2)>;
5463}
5464
5465multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5466                                         AVX512VLVectorVTInfo SelectInfo,
5467                                         AVX512VLVectorVTInfo IntInfo> {
5468let Predicates = [HasVLX] in {
5469  defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5470                                 IntInfo.info128>;
5471  defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5472                                 IntInfo.info256>;
5473}
5474let Predicates = [HasAVX512] in {
5475  defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5476                                 IntInfo.info512>;
5477}
5478}
5479
5480multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5481                                               AVX512VLVectorVTInfo SelectInfo,
5482                                               AVX512VLVectorVTInfo IntInfo> {
5483let Predicates = [HasVLX] in {
5484  defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5485                                       SelectInfo.info128, IntInfo.info128>;
5486  defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5487                                       SelectInfo.info256, IntInfo.info256>;
5488}
5489let Predicates = [HasAVX512] in {
5490  defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5491                                       SelectInfo.info512, IntInfo.info512>;
5492}
5493}
5494
5495multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5496  // i64 vselect with i32/i16/i8 logic op
5497  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5498                                       avx512vl_i32_info>;
5499  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5500                                       avx512vl_i16_info>;
5501  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5502                                       avx512vl_i8_info>;
5503
5504  // i32 vselect with i64/i16/i8 logic op
5505  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5506                                       avx512vl_i64_info>;
5507  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5508                                       avx512vl_i16_info>;
5509  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5510                                       avx512vl_i8_info>;
5511
5512  // f32 vselect with i64/i32/i16/i8 logic op
5513  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5514                                       avx512vl_i64_info>;
5515  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5516                                       avx512vl_i32_info>;
5517  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5518                                       avx512vl_i16_info>;
5519  defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5520                                       avx512vl_i8_info>;
5521
5522  // f64 vselect with i64/i32/i16/i8 logic op
5523  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5524                                       avx512vl_i64_info>;
5525  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5526                                       avx512vl_i32_info>;
5527  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5528                                       avx512vl_i16_info>;
5529  defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5530                                       avx512vl_i8_info>;
5531
5532  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5533                                             avx512vl_f32_info,
5534                                             avx512vl_i32_info>;
5535  defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5536                                             avx512vl_f64_info,
5537                                             avx512vl_i64_info>;
5538}
5539
5540defm : avx512_logical_lowering_types<"VPAND", and>;
5541defm : avx512_logical_lowering_types<"VPOR",  or>;
5542defm : avx512_logical_lowering_types<"VPXOR", xor>;
5543defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5544
5545//===----------------------------------------------------------------------===//
5546// AVX-512  FP arithmetic
5547//===----------------------------------------------------------------------===//
5548
5549multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5550                            SDPatternOperator OpNode, SDNode VecNode,
5551                            X86FoldableSchedWrite sched, bit IsCommutable> {
5552  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5553  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5554                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5555                           "$src2, $src1", "$src1, $src2",
5556                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5557                           Sched<[sched]>;
5558
5559  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5560                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5561                         "$src2, $src1", "$src1, $src2",
5562                         (_.VT (VecNode _.RC:$src1,
5563                                        (_.ScalarIntMemFrags addr:$src2)))>,
5564                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5565  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5566  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5567                         (ins _.FRC:$src1, _.FRC:$src2),
5568                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5569                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5570                          Sched<[sched]> {
5571    let isCommutable = IsCommutable;
5572  }
5573  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5574                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5575                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5576                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5577                         (_.ScalarLdFrag addr:$src2)))]>,
5578                         Sched<[sched.Folded, sched.ReadAfterFold]>;
5579  }
5580  }
5581}
5582
5583multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5584                                  SDNode VecNode, X86FoldableSchedWrite sched> {
5585  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5586  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5587                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5588                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5589                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5590                          (i32 timm:$rc))>,
5591                          EVEX_B, EVEX_RC, Sched<[sched]>;
5592}
5593multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5594                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5595                                X86FoldableSchedWrite sched, bit IsCommutable,
5596                                string EVEX2VexOvrd> {
5597  let ExeDomain = _.ExeDomain in {
5598  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5599                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5600                           "$src2, $src1", "$src1, $src2",
5601                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5602                           Sched<[sched]>, SIMD_EXC;
5603
5604  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5605                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5606                         "$src2, $src1", "$src1, $src2",
5607                         (_.VT (VecNode _.RC:$src1,
5608                                        (_.ScalarIntMemFrags addr:$src2)))>,
5609                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5610
5611  let isCodeGenOnly = 1, Predicates = [HasAVX512],
5612      Uses = [MXCSR], mayRaiseFPException = 1 in {
5613  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5614                         (ins _.FRC:$src1, _.FRC:$src2),
5615                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5616                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5617                          Sched<[sched]>,
5618                          EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5619    let isCommutable = IsCommutable;
5620  }
5621  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5622                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5623                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5624                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5625                         (_.ScalarLdFrag addr:$src2)))]>,
5626                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5627                         EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5628  }
5629
5630  let Uses = [MXCSR] in
5631  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5632                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5633                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5634                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5635                            EVEX_B, Sched<[sched]>;
5636  }
5637}
5638
5639multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5640                                SDNode VecNode, SDNode RndNode,
5641                                X86SchedWriteSizes sched, bit IsCommutable> {
5642  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5643                              sched.PS.Scl, IsCommutable>,
5644             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5645                              sched.PS.Scl>,
5646                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5647  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5648                              sched.PD.Scl, IsCommutable>,
5649             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5650                              sched.PD.Scl>,
5651                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5652  let Predicates = [HasFP16] in
5653    defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
5654                                VecNode, sched.PH.Scl, IsCommutable>,
5655               avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
5656                                sched.PH.Scl>,
5657                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
5658}
5659
5660multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5661                              SDNode VecNode, SDNode SaeNode,
5662                              X86SchedWriteSizes sched, bit IsCommutable> {
5663  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5664                              VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5665                              NAME#"SS">,
5666                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5667  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5668                              VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5669                              NAME#"SD">,
5670                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5671  let Predicates = [HasFP16] in {
5672    defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
5673                                VecNode, SaeNode, sched.PH.Scl, IsCommutable,
5674                                NAME#"SH">,
5675                                T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
5676                                NotEVEX2VEXConvertible;
5677  }
5678}
5679defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5680                                 SchedWriteFAddSizes, 1>;
5681defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5682                                 SchedWriteFMulSizes, 1>;
5683defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5684                                 SchedWriteFAddSizes, 0>;
5685defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5686                                 SchedWriteFDivSizes, 0>;
5687defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5688                               SchedWriteFCmpSizes, 0>;
5689defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5690                               SchedWriteFCmpSizes, 0>;
5691
5692// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5693// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5694multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5695                                    X86VectorVTInfo _, SDNode OpNode,
5696                                    X86FoldableSchedWrite sched,
5697                                    string EVEX2VEXOvrd> {
5698  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5699  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5700                         (ins _.FRC:$src1, _.FRC:$src2),
5701                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5702                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5703                          Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5704    let isCommutable = 1;
5705  }
5706  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5707                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5708                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5709                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5710                         (_.ScalarLdFrag addr:$src2)))]>,
5711                         Sched<[sched.Folded, sched.ReadAfterFold]>,
5712                         EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5713  }
5714}
5715defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5716                                         SchedWriteFCmp.Scl, "VMINCSS">, XS,
5717                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5718
5719defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5720                                         SchedWriteFCmp.Scl, "VMINCSD">, XD,
5721                                         VEX_W, EVEX_4V, VEX_LIG,
5722                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5723
5724defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5725                                         SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5726                                         EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5727
5728defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5729                                         SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5730                                         VEX_W, EVEX_4V, VEX_LIG,
5731                                         EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5732
5733defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
5734                                         SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
5735                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5736                                         NotEVEX2VEXConvertible;
5737defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
5738                                         SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
5739                                         EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
5740                                         NotEVEX2VEXConvertible;
5741
5742multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5743                            SDPatternOperator MaskOpNode,
5744                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5745                            bit IsCommutable,
5746                            bit IsKCommutable = IsCommutable,
5747                            string suffix = _.Suffix,
5748                            string ClobberConstraint = "",
5749                            bit MayRaiseFPException = 1> {
5750  let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5751      Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
5752  defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5753                                 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
5754                                 "$src2, $src1", "$src1, $src2",
5755                                 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5756                                 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
5757                                 IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
5758  let mayLoad = 1 in {
5759    defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5760                                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
5761                                   "$src2, $src1", "$src1, $src2",
5762                                   (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5763                                   (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5764                                   ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5765    defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5766                                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
5767                                    "${src2}"#_.BroadcastStr#", $src1",
5768                                    "$src1, ${src2}"#_.BroadcastStr,
5769                                    (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5770                                    (MaskOpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5771                                    ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5772    }
5773  }
5774}
5775
5776multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5777                                  SDPatternOperator OpNodeRnd,
5778                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
5779                                  string suffix = _.Suffix,
5780                                  string ClobberConstraint = ""> {
5781  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5782  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5783                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
5784                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5785                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
5786                  0, 0, 0, vselect_mask, ClobberConstraint>,
5787                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5788}
5789
5790multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5791                                SDPatternOperator OpNodeSAE,
5792                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5793  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5794  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5795                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5796                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5797                  (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5798                  EVEX_4V, EVEX_B, Sched<[sched]>;
5799}
5800
5801multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5802                             SDPatternOperator MaskOpNode,
5803                             Predicate prd, X86SchedWriteSizes sched,
5804                             bit IsCommutable = 0,
5805                             bit IsPD128Commutable = IsCommutable> {
5806  let Predicates = [prd] in {
5807  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5808                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5809                              EVEX_CD8<32, CD8VF>;
5810  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5811                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5812                              EVEX_CD8<64, CD8VF>;
5813  }
5814
5815    // Define only if AVX512VL feature is present.
5816  let Predicates = [prd, HasVLX] in {
5817    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5818                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5819                                   EVEX_CD8<32, CD8VF>;
5820    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5821                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5822                                   EVEX_CD8<32, CD8VF>;
5823    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5824                                   sched.PD.XMM, IsPD128Commutable,
5825                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5826                                   EVEX_CD8<64, CD8VF>;
5827    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5828                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5829                                   EVEX_CD8<64, CD8VF>;
5830  }
5831}
5832
5833multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5834                              SDPatternOperator MaskOpNode,
5835                              X86SchedWriteSizes sched, bit IsCommutable = 0> {
5836  let Predicates = [HasFP16] in {
5837    defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
5838                                sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
5839                                EVEX_CD8<16, CD8VF>;
5840  }
5841  let Predicates = [HasVLX, HasFP16] in {
5842    defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
5843                                   sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
5844                                   EVEX_CD8<16, CD8VF>;
5845    defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
5846                                   sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
5847                                   EVEX_CD8<16, CD8VF>;
5848  }
5849}
5850
5851let Uses = [MXCSR] in
5852multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5853                                   X86SchedWriteSizes sched> {
5854  let Predicates = [HasFP16] in {
5855    defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5856                                      v32f16_info>,
5857                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5858  }
5859  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5860                                    v16f32_info>,
5861                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5862  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5863                                    v8f64_info>,
5864                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5865}
5866
5867let Uses = [MXCSR] in
5868multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5869                                 X86SchedWriteSizes sched> {
5870  let Predicates = [HasFP16] in {
5871    defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
5872                                    v32f16_info>,
5873                                    EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
5874  }
5875  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5876                                  v16f32_info>,
5877                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5878  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5879                                  v8f64_info>,
5880                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5881}
5882
5883defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5884                              SchedWriteFAddSizes, 1>,
5885            avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
5886            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5887defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5888                              SchedWriteFMulSizes, 1>,
5889            avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
5890            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5891defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5892                              SchedWriteFAddSizes>,
5893            avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
5894            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5895defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5896                              SchedWriteFDivSizes>,
5897            avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
5898            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5899defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5900                              SchedWriteFCmpSizes, 0>,
5901            avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
5902            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5903defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5904                              SchedWriteFCmpSizes, 0>,
5905            avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
5906            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5907let isCodeGenOnly = 1 in {
5908  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5909                                 SchedWriteFCmpSizes, 1>,
5910               avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
5911                                 SchedWriteFCmpSizes, 1>;
5912  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5913                                 SchedWriteFCmpSizes, 1>,
5914               avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
5915                                 SchedWriteFCmpSizes, 1>;
5916}
5917let Uses = []<Register>, mayRaiseFPException = 0 in {
5918defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5919                               SchedWriteFLogicSizes, 1>;
5920defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5921                               SchedWriteFLogicSizes, 0>;
5922defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5923                               SchedWriteFLogicSizes, 1>;
5924defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5925                               SchedWriteFLogicSizes, 1>;
5926}
5927
5928multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5929                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5930  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5931  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5932                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5933                  "$src2, $src1", "$src1, $src2",
5934                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5935                  EVEX_4V, Sched<[sched]>;
5936  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5937                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5938                  "$src2, $src1", "$src1, $src2",
5939                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5940                  EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5941  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5942                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5943                   "${src2}"#_.BroadcastStr#", $src1",
5944                   "$src1, ${src2}"#_.BroadcastStr,
5945                   (OpNode  _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5946                   EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5947  }
5948}
5949
5950multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5951                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5952  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5953  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5954                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5955                  "$src2, $src1", "$src1, $src2",
5956                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5957                  Sched<[sched]>;
5958  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5960                  "$src2, $src1", "$src1, $src2",
5961                  (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5962                  Sched<[sched.Folded, sched.ReadAfterFold]>;
5963  }
5964}
5965
5966multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5967                                X86SchedWriteWidths sched> {
5968  let Predicates = [HasFP16] in {
5969    defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
5970               avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
5971                                EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
5972    defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
5973               avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
5974                             EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
5975  }
5976  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5977             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5978                              EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
5979  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5980             avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5981                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5982  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5983             avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5984                                    X86scalefsRnd, sched.Scl>,
5985                                    EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
5986  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5987             avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5988                                    X86scalefsRnd, sched.Scl>,
5989                                    EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
5990
5991  // Define only if AVX512VL feature is present.
5992  let Predicates = [HasVLX] in {
5993    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5994                                   EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
5995    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5996                                   EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
5997    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5998                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
5999    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
6000                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
6001  }
6002
6003  let Predicates = [HasFP16, HasVLX] in {
6004    defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
6005                                   EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6006    defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
6007                                   EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
6008  }
6009}
6010defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
6011                                    SchedWriteFAdd>, NotEVEX2VEXConvertible;
6012
6013//===----------------------------------------------------------------------===//
6014// AVX-512  VPTESTM instructions
6015//===----------------------------------------------------------------------===//
6016
6017multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
6018                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6019  // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
6020  // There are just too many permutations due to commutability and bitcasts.
6021  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6022  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
6023                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6024                      "$src2, $src1", "$src1, $src2",
6025                   (null_frag), (null_frag), 1>,
6026                   EVEX_4V, Sched<[sched]>;
6027  let mayLoad = 1 in
6028  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6029                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6030                       "$src2, $src1", "$src1, $src2",
6031                   (null_frag), (null_frag)>,
6032                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6033                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6034  }
6035}
6036
6037multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
6038                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6039  let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
6040  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
6041                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6042                    "${src2}"#_.BroadcastStr#", $src1",
6043                    "$src1, ${src2}"#_.BroadcastStr,
6044                    (null_frag), (null_frag)>,
6045                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6046                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6047}
6048
6049multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
6050                                  X86SchedWriteWidths sched,
6051                                  AVX512VLVectorVTInfo _> {
6052  let Predicates  = [HasAVX512] in
6053  defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
6054           avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
6055
6056  let Predicates = [HasAVX512, HasVLX] in {
6057  defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
6058              avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
6059  defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
6060              avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
6061  }
6062}
6063
6064multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
6065                            X86SchedWriteWidths sched> {
6066  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
6067                                 avx512vl_i32_info>;
6068  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
6069                                 avx512vl_i64_info>, VEX_W;
6070}
6071
6072multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
6073                            X86SchedWriteWidths sched> {
6074  let Predicates = [HasBWI] in {
6075  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
6076                            v32i16_info>, EVEX_V512, VEX_W;
6077  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
6078                            v64i8_info>, EVEX_V512;
6079  }
6080
6081  let Predicates = [HasVLX, HasBWI] in {
6082  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
6083                            v16i16x_info>, EVEX_V256, VEX_W;
6084  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
6085                            v8i16x_info>, EVEX_V128, VEX_W;
6086  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
6087                            v32i8x_info>, EVEX_V256;
6088  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
6089                            v16i8x_info>, EVEX_V128;
6090  }
6091}
6092
6093multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6094                                   X86SchedWriteWidths sched> :
6095  avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
6096  avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
6097
6098defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
6099                                         SchedWriteVecLogic>, T8PD;
6100defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
6101                                         SchedWriteVecLogic>, T8XS;
6102
6103//===----------------------------------------------------------------------===//
6104// AVX-512  Shift instructions
6105//===----------------------------------------------------------------------===//
6106
6107multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6108                            string OpcodeStr, SDNode OpNode,
6109                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6110  let ExeDomain = _.ExeDomain in {
6111  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6112                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6113                      "$src2, $src1", "$src1, $src2",
6114                   (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
6115                   Sched<[sched]>;
6116  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6117                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6118                       "$src2, $src1", "$src1, $src2",
6119                   (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6120                          (i8 timm:$src2)))>,
6121                   Sched<[sched.Folded]>;
6122  }
6123}
6124
6125multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6126                             string OpcodeStr, SDNode OpNode,
6127                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6128  let ExeDomain = _.ExeDomain in
6129  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6130                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6131      "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
6132     (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
6133     EVEX_B, Sched<[sched.Folded]>;
6134}
6135
6136multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6137                            X86FoldableSchedWrite sched, ValueType SrcVT,
6138                            X86VectorVTInfo _> {
6139   // src2 is always 128-bit
6140  let ExeDomain = _.ExeDomain in {
6141  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6142                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6143                      "$src2, $src1", "$src1, $src2",
6144                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6145                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
6146  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6147                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6148                       "$src2, $src1", "$src1, $src2",
6149                   (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6150                   AVX512BIBase,
6151                   EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6152  }
6153}
6154
6155multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6156                              X86SchedWriteWidths sched, ValueType SrcVT,
6157                              AVX512VLVectorVTInfo VTInfo,
6158                              Predicate prd> {
6159  let Predicates = [prd] in
6160  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6161                               VTInfo.info512>, EVEX_V512,
6162                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6163  let Predicates = [prd, HasVLX] in {
6164  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6165                               VTInfo.info256>, EVEX_V256,
6166                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6167  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6168                               VTInfo.info128>, EVEX_V128,
6169                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6170  }
6171}
6172
6173multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6174                              string OpcodeStr, SDNode OpNode,
6175                              X86SchedWriteWidths sched,
6176                              bit NotEVEX2VEXConvertibleQ = 0> {
6177  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6178                              avx512vl_i32_info, HasAVX512>;
6179  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6180  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6181                              avx512vl_i64_info, HasAVX512>, VEX_W;
6182  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6183                              avx512vl_i16_info, HasBWI>;
6184}
6185
6186multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6187                                  string OpcodeStr, SDNode OpNode,
6188                                  X86SchedWriteWidths sched,
6189                                  AVX512VLVectorVTInfo VTInfo> {
6190  let Predicates = [HasAVX512] in
6191  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6192                              sched.ZMM, VTInfo.info512>,
6193             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6194                               VTInfo.info512>, EVEX_V512;
6195  let Predicates = [HasAVX512, HasVLX] in {
6196  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6197                              sched.YMM, VTInfo.info256>,
6198             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6199                               VTInfo.info256>, EVEX_V256;
6200  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6201                              sched.XMM, VTInfo.info128>,
6202             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6203                               VTInfo.info128>, EVEX_V128;
6204  }
6205}
6206
6207multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6208                              string OpcodeStr, SDNode OpNode,
6209                              X86SchedWriteWidths sched> {
6210  let Predicates = [HasBWI] in
6211  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6212                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6213  let Predicates = [HasVLX, HasBWI] in {
6214  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6215                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6216  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6217                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6218  }
6219}
6220
6221multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6222                               Format ImmFormR, Format ImmFormM,
6223                               string OpcodeStr, SDNode OpNode,
6224                               X86SchedWriteWidths sched,
6225                               bit NotEVEX2VEXConvertibleQ = 0> {
6226  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6227                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6228  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6229  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6230                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6231}
6232
6233defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6234                                 SchedWriteVecShiftImm>,
6235             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6236                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6237
6238defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6239                                 SchedWriteVecShiftImm>,
6240             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6241                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6242
6243defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6244                                 SchedWriteVecShiftImm, 1>,
6245             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6246                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6247
6248defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6249                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6250defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6251                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6252
6253defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6254                                SchedWriteVecShift>;
6255defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6256                                SchedWriteVecShift, 1>;
6257defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6258                                SchedWriteVecShift>;
6259
6260// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6261let Predicates = [HasAVX512, NoVLX] in {
6262  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6263            (EXTRACT_SUBREG (v8i64
6264              (VPSRAQZrr
6265                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6266                 VR128X:$src2)), sub_ymm)>;
6267
6268  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6269            (EXTRACT_SUBREG (v8i64
6270              (VPSRAQZrr
6271                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6272                 VR128X:$src2)), sub_xmm)>;
6273
6274  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
6275            (EXTRACT_SUBREG (v8i64
6276              (VPSRAQZri
6277                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6278                 timm:$src2)), sub_ymm)>;
6279
6280  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
6281            (EXTRACT_SUBREG (v8i64
6282              (VPSRAQZri
6283                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6284                 timm:$src2)), sub_xmm)>;
6285}
6286
6287//===-------------------------------------------------------------------===//
6288// Variable Bit Shifts
6289//===-------------------------------------------------------------------===//
6290
6291multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6292                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6293  let ExeDomain = _.ExeDomain in {
6294  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6295                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6296                      "$src2, $src1", "$src1, $src2",
6297                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6298                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
6299  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6300                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6301                       "$src2, $src1", "$src1, $src2",
6302                   (_.VT (OpNode _.RC:$src1,
6303                   (_.VT (_.LdFrag addr:$src2))))>,
6304                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6305                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6306  }
6307}
6308
6309multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6310                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6311  let ExeDomain = _.ExeDomain in
6312  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6313                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6314                    "${src2}"#_.BroadcastStr#", $src1",
6315                    "$src1, ${src2}"#_.BroadcastStr,
6316                    (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
6317                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6318                    Sched<[sched.Folded, sched.ReadAfterFold]>;
6319}
6320
6321multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6322                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6323  let Predicates  = [HasAVX512] in
6324  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6325           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6326
6327  let Predicates = [HasAVX512, HasVLX] in {
6328  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6329              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6330  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6331              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6332  }
6333}
6334
6335multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6336                                  SDNode OpNode, X86SchedWriteWidths sched> {
6337  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6338                                 avx512vl_i32_info>;
6339  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6340                                 avx512vl_i64_info>, VEX_W;
6341}
6342
6343// Use 512bit version to implement 128/256 bit in case NoVLX.
6344multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6345                                     SDNode OpNode, list<Predicate> p> {
6346  let Predicates = p in {
6347  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6348                                  (_.info256.VT _.info256.RC:$src2))),
6349            (EXTRACT_SUBREG
6350                (!cast<Instruction>(OpcodeStr#"Zrr")
6351                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6352                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6353             sub_ymm)>;
6354
6355  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6356                                  (_.info128.VT _.info128.RC:$src2))),
6357            (EXTRACT_SUBREG
6358                (!cast<Instruction>(OpcodeStr#"Zrr")
6359                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6360                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6361             sub_xmm)>;
6362  }
6363}
6364multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6365                              SDNode OpNode, X86SchedWriteWidths sched> {
6366  let Predicates = [HasBWI] in
6367  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6368              EVEX_V512, VEX_W;
6369  let Predicates = [HasVLX, HasBWI] in {
6370
6371  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6372              EVEX_V256, VEX_W;
6373  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6374              EVEX_V128, VEX_W;
6375  }
6376}
6377
6378defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6379              avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6380
6381defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6382              avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6383
6384defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6385              avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6386
6387defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6388defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6389
6390defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6391defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6392defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6393defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6394
6395
6396// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6397let Predicates = [HasAVX512, NoVLX] in {
6398  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6399            (EXTRACT_SUBREG (v8i64
6400              (VPROLVQZrr
6401                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6402                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6403                       sub_xmm)>;
6404  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6405            (EXTRACT_SUBREG (v8i64
6406              (VPROLVQZrr
6407                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6408                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6409                       sub_ymm)>;
6410
6411  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6412            (EXTRACT_SUBREG (v16i32
6413              (VPROLVDZrr
6414                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6415                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6416                        sub_xmm)>;
6417  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6418            (EXTRACT_SUBREG (v16i32
6419              (VPROLVDZrr
6420                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6421                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6422                        sub_ymm)>;
6423
6424  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6425            (EXTRACT_SUBREG (v8i64
6426              (VPROLQZri
6427                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6428                        timm:$src2)), sub_xmm)>;
6429  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6430            (EXTRACT_SUBREG (v8i64
6431              (VPROLQZri
6432                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6433                       timm:$src2)), sub_ymm)>;
6434
6435  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6436            (EXTRACT_SUBREG (v16i32
6437              (VPROLDZri
6438                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6439                        timm:$src2)), sub_xmm)>;
6440  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6441            (EXTRACT_SUBREG (v16i32
6442              (VPROLDZri
6443                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6444                        timm:$src2)), sub_ymm)>;
6445}
6446
6447// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6448let Predicates = [HasAVX512, NoVLX] in {
6449  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6450            (EXTRACT_SUBREG (v8i64
6451              (VPRORVQZrr
6452                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6453                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6454                       sub_xmm)>;
6455  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6456            (EXTRACT_SUBREG (v8i64
6457              (VPRORVQZrr
6458                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6459                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6460                       sub_ymm)>;
6461
6462  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6463            (EXTRACT_SUBREG (v16i32
6464              (VPRORVDZrr
6465                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6466                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6467                        sub_xmm)>;
6468  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6469            (EXTRACT_SUBREG (v16i32
6470              (VPRORVDZrr
6471                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6472                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6473                        sub_ymm)>;
6474
6475  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6476            (EXTRACT_SUBREG (v8i64
6477              (VPRORQZri
6478                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6479                        timm:$src2)), sub_xmm)>;
6480  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6481            (EXTRACT_SUBREG (v8i64
6482              (VPRORQZri
6483                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6484                       timm:$src2)), sub_ymm)>;
6485
6486  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6487            (EXTRACT_SUBREG (v16i32
6488              (VPRORDZri
6489                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6490                        timm:$src2)), sub_xmm)>;
6491  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6492            (EXTRACT_SUBREG (v16i32
6493              (VPRORDZri
6494                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6495                        timm:$src2)), sub_ymm)>;
6496}
6497
6498//===-------------------------------------------------------------------===//
6499// 1-src variable permutation VPERMW/D/Q
6500//===-------------------------------------------------------------------===//
6501
6502multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6503                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6504  let Predicates  = [HasAVX512] in
6505  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6506           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6507
6508  let Predicates = [HasAVX512, HasVLX] in
6509  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6510              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6511}
6512
6513multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6514                                 string OpcodeStr, SDNode OpNode,
6515                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6516  let Predicates = [HasAVX512] in
6517  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6518                              sched, VTInfo.info512>,
6519             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6520                               sched, VTInfo.info512>, EVEX_V512;
6521  let Predicates = [HasAVX512, HasVLX] in
6522  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6523                              sched, VTInfo.info256>,
6524             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6525                               sched, VTInfo.info256>, EVEX_V256;
6526}
6527
6528multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6529                              Predicate prd, SDNode OpNode,
6530                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6531  let Predicates = [prd] in
6532  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6533              EVEX_V512 ;
6534  let Predicates = [HasVLX, prd] in {
6535  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6536              EVEX_V256 ;
6537  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6538              EVEX_V128 ;
6539  }
6540}
6541
6542defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6543                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6544defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6545                               WriteVarShuffle256, avx512vl_i8_info>;
6546
6547defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6548                                    WriteVarShuffle256, avx512vl_i32_info>;
6549defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6550                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6551defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6552                                     WriteFVarShuffle256, avx512vl_f32_info>;
6553defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6554                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6555
6556defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6557                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6558                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6559defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6560                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6561                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6562
6563//===----------------------------------------------------------------------===//
6564// AVX-512 - VPERMIL
6565//===----------------------------------------------------------------------===//
6566
6567multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6568                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6569                             X86VectorVTInfo Ctrl> {
6570  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6571                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6572                  "$src2, $src1", "$src1, $src2",
6573                  (_.VT (OpNode _.RC:$src1,
6574                               (Ctrl.VT Ctrl.RC:$src2)))>,
6575                  T8PD, EVEX_4V, Sched<[sched]>;
6576  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6577                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6578                  "$src2, $src1", "$src1, $src2",
6579                  (_.VT (OpNode
6580                           _.RC:$src1,
6581                           (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6582                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6583                  Sched<[sched.Folded, sched.ReadAfterFold]>;
6584  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6585                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6586                   "${src2}"#_.BroadcastStr#", $src1",
6587                   "$src1, ${src2}"#_.BroadcastStr,
6588                   (_.VT (OpNode
6589                            _.RC:$src1,
6590                            (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6591                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6592                   Sched<[sched.Folded, sched.ReadAfterFold]>;
6593}
6594
6595multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6596                                    X86SchedWriteWidths sched,
6597                                    AVX512VLVectorVTInfo _,
6598                                    AVX512VLVectorVTInfo Ctrl> {
6599  let Predicates = [HasAVX512] in {
6600    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6601                                  _.info512, Ctrl.info512>, EVEX_V512;
6602  }
6603  let Predicates = [HasAVX512, HasVLX] in {
6604    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6605                                  _.info128, Ctrl.info128>, EVEX_V128;
6606    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6607                                  _.info256, Ctrl.info256>, EVEX_V256;
6608  }
6609}
6610
6611multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6612                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6613  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6614                                      _, Ctrl>;
6615  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6616                                    X86VPermilpi, SchedWriteFShuffle, _>,
6617                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6618}
6619
6620let ExeDomain = SSEPackedSingle in
6621defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6622                               avx512vl_i32_info>;
6623let ExeDomain = SSEPackedDouble in
6624defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6625                               avx512vl_i64_info>, VEX_W1X;
6626
6627//===----------------------------------------------------------------------===//
6628// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6629//===----------------------------------------------------------------------===//
6630
6631defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6632                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6633                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6634defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6635                                  X86PShufhw, SchedWriteShuffle>,
6636                                  EVEX, AVX512XSIi8Base;
6637defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6638                                  X86PShuflw, SchedWriteShuffle>,
6639                                  EVEX, AVX512XDIi8Base;
6640
6641//===----------------------------------------------------------------------===//
6642// AVX-512 - VPSHUFB
6643//===----------------------------------------------------------------------===//
6644
6645multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6646                               X86SchedWriteWidths sched> {
6647  let Predicates = [HasBWI] in
6648  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6649                              EVEX_V512;
6650
6651  let Predicates = [HasVLX, HasBWI] in {
6652  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6653                              EVEX_V256;
6654  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6655                              EVEX_V128;
6656  }
6657}
6658
6659defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6660                                  SchedWriteVarShuffle>, VEX_WIG;
6661
6662//===----------------------------------------------------------------------===//
6663// Move Low to High and High to Low packed FP Instructions
6664//===----------------------------------------------------------------------===//
6665
6666def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6667          (ins VR128X:$src1, VR128X:$src2),
6668          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6669          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6670          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6671let isCommutable = 1 in
6672def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6673          (ins VR128X:$src1, VR128X:$src2),
6674          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6675          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6676          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6677
6678//===----------------------------------------------------------------------===//
6679// VMOVHPS/PD VMOVLPS Instructions
6680// All patterns was taken from SSS implementation.
6681//===----------------------------------------------------------------------===//
6682
6683multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6684                                  SDPatternOperator OpNode,
6685                                  X86VectorVTInfo _> {
6686  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6687  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6688                  (ins _.RC:$src1, f64mem:$src2),
6689                  !strconcat(OpcodeStr,
6690                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6691                  [(set _.RC:$dst,
6692                     (OpNode _.RC:$src1,
6693                       (_.VT (bitconvert
6694                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6695                  Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6696}
6697
6698// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6699// SSE1. And MOVLPS pattern is even more complex.
6700defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6701                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6702defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6703                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6704defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6705                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6706defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6707                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6708
6709let Predicates = [HasAVX512] in {
6710  // VMOVHPD patterns
6711  def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6712            (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6713
6714  // VMOVLPD patterns
6715  def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6716            (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6717}
6718
6719let SchedRW = [WriteFStore] in {
6720let mayStore = 1, hasSideEffects = 0 in
6721def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6722                       (ins f64mem:$dst, VR128X:$src),
6723                       "vmovhps\t{$src, $dst|$dst, $src}",
6724                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6725def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6726                       (ins f64mem:$dst, VR128X:$src),
6727                       "vmovhpd\t{$src, $dst|$dst, $src}",
6728                       [(store (f64 (extractelt
6729                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6730                                     (iPTR 0))), addr:$dst)]>,
6731                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6732let mayStore = 1, hasSideEffects = 0 in
6733def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6734                       (ins f64mem:$dst, VR128X:$src),
6735                       "vmovlps\t{$src, $dst|$dst, $src}",
6736                       []>, EVEX, EVEX_CD8<32, CD8VT2>;
6737def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6738                       (ins f64mem:$dst, VR128X:$src),
6739                       "vmovlpd\t{$src, $dst|$dst, $src}",
6740                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6741                                     (iPTR 0))), addr:$dst)]>,
6742                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6743} // SchedRW
6744
6745let Predicates = [HasAVX512] in {
6746  // VMOVHPD patterns
6747  def : Pat<(store (f64 (extractelt
6748                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6749                           (iPTR 0))), addr:$dst),
6750           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6751}
6752//===----------------------------------------------------------------------===//
6753// FMA - Fused Multiply Operations
6754//
6755
6756multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6757                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6758                               X86VectorVTInfo _> {
6759  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6760      Uses = [MXCSR], mayRaiseFPException = 1 in {
6761  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6762          (ins _.RC:$src2, _.RC:$src3),
6763          OpcodeStr, "$src3, $src2", "$src2, $src3",
6764          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6765          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6766          EVEX_4V, Sched<[sched]>;
6767
6768  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6769          (ins _.RC:$src2, _.MemOp:$src3),
6770          OpcodeStr, "$src3, $src2", "$src2, $src3",
6771          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6772          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6773          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6774
6775  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6776            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6777            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6778            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6779            (OpNode _.RC:$src2,
6780             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6781            (MaskOpNode _.RC:$src2,
6782             _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6783            EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6784  }
6785}
6786
6787multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6788                                 X86FoldableSchedWrite sched,
6789                                 X86VectorVTInfo _> {
6790  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6791      Uses = [MXCSR] in
6792  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6793          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6794          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6795          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6796          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6797          EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6798}
6799
6800multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6801                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6802                                   X86SchedWriteWidths sched,
6803                                   AVX512VLVectorVTInfo _,
6804                                   Predicate prd = HasAVX512> {
6805  let Predicates = [prd] in {
6806    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6807                                      sched.ZMM, _.info512>,
6808                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6809                                        _.info512>,
6810                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6811  }
6812  let Predicates = [HasVLX, prd] in {
6813    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6814                                    sched.YMM, _.info256>,
6815                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6816    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6817                                    sched.XMM, _.info128>,
6818                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6819  }
6820}
6821
6822multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6823                              SDNode MaskOpNode, SDNode OpNodeRnd> {
6824    defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6825                                      OpNodeRnd, SchedWriteFMA,
6826                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6827    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6828                                      OpNodeRnd, SchedWriteFMA,
6829                                      avx512vl_f32_info>, T8PD;
6830    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6831                                      OpNodeRnd, SchedWriteFMA,
6832                                      avx512vl_f64_info>, T8PD, VEX_W;
6833}
6834
6835defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
6836                                       fma, X86FmaddRnd>;
6837defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6838                                       X86Fmsub, X86FmsubRnd>;
6839defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6840                                       X86Fmaddsub, X86FmaddsubRnd>;
6841defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6842                                       X86Fmsubadd, X86FmsubaddRnd>;
6843defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6844                                       X86Fnmadd, X86FnmaddRnd>;
6845defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6846                                       X86Fnmsub, X86FnmsubRnd>;
6847
6848
6849multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6850                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6851                               X86VectorVTInfo _> {
6852  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6853      Uses = [MXCSR], mayRaiseFPException = 1 in {
6854  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6855          (ins _.RC:$src2, _.RC:$src3),
6856          OpcodeStr, "$src3, $src2", "$src2, $src3",
6857          (null_frag),
6858          (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6859          EVEX_4V, Sched<[sched]>;
6860
6861  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6862          (ins _.RC:$src2, _.MemOp:$src3),
6863          OpcodeStr, "$src3, $src2", "$src2, $src3",
6864          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6865          (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6866          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6867
6868  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6869         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6870         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6871         "$src2, ${src3}"#_.BroadcastStr,
6872         (_.VT (OpNode _.RC:$src2,
6873                      (_.VT (_.BroadcastLdFrag addr:$src3)),
6874                      _.RC:$src1)),
6875         (_.VT (MaskOpNode _.RC:$src2,
6876                           (_.VT (_.BroadcastLdFrag addr:$src3)),
6877                           _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
6878         Sched<[sched.Folded, sched.ReadAfterFold]>;
6879  }
6880}
6881
6882multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6883                                 X86FoldableSchedWrite sched,
6884                                 X86VectorVTInfo _> {
6885  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6886      Uses = [MXCSR] in
6887  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6888          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6889          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6890          (null_frag),
6891          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6892          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6893}
6894
6895multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6896                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6897                                   X86SchedWriteWidths sched,
6898                                   AVX512VLVectorVTInfo _,
6899                                   Predicate prd = HasAVX512> {
6900  let Predicates = [prd] in {
6901    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6902                                      sched.ZMM, _.info512>,
6903                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6904                                        _.info512>,
6905                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6906  }
6907  let Predicates = [HasVLX, prd] in {
6908    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6909                                    sched.YMM, _.info256>,
6910                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6911    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6912                                    sched.XMM, _.info128>,
6913                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6914  }
6915}
6916
6917multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6918                              SDNode MaskOpNode, SDNode OpNodeRnd > {
6919    defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
6920                                      OpNodeRnd, SchedWriteFMA,
6921                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
6922    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6923                                      OpNodeRnd, SchedWriteFMA,
6924                                      avx512vl_f32_info>, T8PD;
6925    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6926                                      OpNodeRnd, SchedWriteFMA,
6927                                      avx512vl_f64_info>, T8PD, VEX_W;
6928}
6929
6930defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
6931                                       fma, X86FmaddRnd>;
6932defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6933                                       X86Fmsub, X86FmsubRnd>;
6934defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6935                                       X86Fmaddsub, X86FmaddsubRnd>;
6936defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6937                                       X86Fmsubadd, X86FmsubaddRnd>;
6938defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6939                                       X86Fnmadd, X86FnmaddRnd>;
6940defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6941                                       X86Fnmsub, X86FnmsubRnd>;
6942
6943multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6944                               SDNode MaskOpNode, X86FoldableSchedWrite sched,
6945                               X86VectorVTInfo _> {
6946  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6947      Uses = [MXCSR], mayRaiseFPException = 1 in {
6948  defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6949          (ins _.RC:$src2, _.RC:$src3),
6950          OpcodeStr, "$src3, $src2", "$src2, $src3",
6951          (null_frag),
6952          (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6953          EVEX_4V, Sched<[sched]>;
6954
6955  // Pattern is 312 order so that the load is in a different place from the
6956  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6957  defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6958          (ins _.RC:$src2, _.MemOp:$src3),
6959          OpcodeStr, "$src3, $src2", "$src2, $src3",
6960          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6961          (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6962          EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6963
6964  // Pattern is 312 order so that the load is in a different place from the
6965  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6966  defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6967         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6968         OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6969         "$src2, ${src3}"#_.BroadcastStr,
6970         (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6971                       _.RC:$src1, _.RC:$src2)),
6972         (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6973                           _.RC:$src1, _.RC:$src2)), 1, 0>,
6974         EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6975  }
6976}
6977
6978multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6979                                 X86FoldableSchedWrite sched,
6980                                 X86VectorVTInfo _> {
6981  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6982      Uses = [MXCSR] in
6983  defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6984          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6985          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6986          (null_frag),
6987          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6988          1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
6989}
6990
6991multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
6992                                   SDNode MaskOpNode, SDNode OpNodeRnd,
6993                                   X86SchedWriteWidths sched,
6994                                   AVX512VLVectorVTInfo _,
6995                                   Predicate prd = HasAVX512> {
6996  let Predicates = [prd] in {
6997    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6998                                      sched.ZMM, _.info512>,
6999                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7000                                        _.info512>,
7001                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7002  }
7003  let Predicates = [HasVLX, prd] in {
7004    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7005                                    sched.YMM, _.info256>,
7006                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7007    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
7008                                    sched.XMM, _.info128>,
7009                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7010  }
7011}
7012
7013multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
7014                              SDNode MaskOpNode, SDNode OpNodeRnd > {
7015    defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
7016                                      OpNodeRnd, SchedWriteFMA,
7017                                      avx512vl_f16_info, HasFP16>, T_MAP6PD;
7018    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
7019                                      OpNodeRnd, SchedWriteFMA,
7020                                      avx512vl_f32_info>, T8PD;
7021    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
7022                                      OpNodeRnd, SchedWriteFMA,
7023                                      avx512vl_f64_info>, T8PD, VEX_W;
7024}
7025
7026defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
7027                                       fma, X86FmaddRnd>;
7028defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
7029                                       X86Fmsub, X86FmsubRnd>;
7030defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
7031                                       X86Fmaddsub, X86FmaddsubRnd>;
7032defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
7033                                       X86Fmsubadd, X86FmsubaddRnd>;
7034defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
7035                                       X86Fnmadd, X86FnmaddRnd>;
7036defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
7037                                       X86Fnmsub, X86FnmsubRnd>;
7038
7039// Scalar FMA
7040multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7041                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7042let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7043  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7044          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7045          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7046          EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
7047
7048  let mayLoad = 1 in
7049  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7050          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7051          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7052          EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
7053
7054  let Uses = [MXCSR] in
7055  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7056         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7057         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7058         EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7059
7060  let isCodeGenOnly = 1, isCommutable = 1 in {
7061    def r     : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7062                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7063                     !strconcat(OpcodeStr,
7064                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7065                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
7066    def m     : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
7067                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7068                    !strconcat(OpcodeStr,
7069                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7070                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
7071
7072    let Uses = [MXCSR] in
7073    def rb    : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
7074                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7075                     !strconcat(OpcodeStr,
7076                              "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
7077                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7078                     Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
7079  }// isCodeGenOnly = 1
7080}// Constraints = "$src1 = $dst"
7081}
7082
7083multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7084                            string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd,
7085                            X86VectorVTInfo _, string SUFF> {
7086  let ExeDomain = _.ExeDomain in {
7087  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7088                // Operands for intrinsic are in 123 order to preserve passthu
7089                // semantics.
7090                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7091                         _.FRC:$src3))),
7092                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7093                         (_.ScalarLdFrag addr:$src3)))),
7094                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7095                         _.FRC:$src3, (i32 timm:$rc)))), 0>;
7096
7097  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7098                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7099                                          _.FRC:$src1))),
7100                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7101                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7102                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7103                         _.FRC:$src1, (i32 timm:$rc)))), 1>;
7104
7105  // One pattern is 312 order so that the load is in a different place from the
7106  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7107  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7108                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7109                         _.FRC:$src2))),
7110                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7111                                 _.FRC:$src1, _.FRC:$src2))),
7112                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7113                         _.FRC:$src2, (i32 timm:$rc)))), 1>;
7114  }
7115}
7116
7117multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7118                        string OpcodeStr, SDPatternOperator OpNode, SDNode OpNodeRnd> {
7119  let Predicates = [HasAVX512] in {
7120    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7121                                 OpNodeRnd, f32x_info, "SS">,
7122                                 EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
7123    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7124                                 OpNodeRnd, f64x_info, "SD">,
7125                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
7126  }
7127  let Predicates = [HasFP16] in {
7128    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7129                                 OpNodeRnd, f16x_info, "SH">,
7130                                 EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
7131  }
7132}
7133
7134defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>;
7135defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
7136defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
7137defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
7138
7139multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
7140                                      SDNode RndOp, string Prefix,
7141                                      string Suffix, SDNode Move,
7142                                      X86VectorVTInfo _, PatLeaf ZeroFP,
7143                                      Predicate prd = HasAVX512> {
7144  let Predicates = [prd] in {
7145    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7146                (Op _.FRC:$src2,
7147                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7148                    _.FRC:$src3))))),
7149              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7150               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7151               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7152
7153    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7154                (Op _.FRC:$src2, _.FRC:$src3,
7155                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7156              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7157               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7158               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7159
7160    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7161                (Op _.FRC:$src2,
7162                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7163                    (_.ScalarLdFrag addr:$src3)))))),
7164              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7165               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7166               addr:$src3)>;
7167
7168    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7169                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7170                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7171              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7172               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7173               addr:$src3)>;
7174
7175    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7176                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7177                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7178              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7179               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7180               addr:$src3)>;
7181
7182    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7183               (X86selects_mask VK1WM:$mask,
7184                (MaskedOp _.FRC:$src2,
7185                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7186                    _.FRC:$src3),
7187                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7188              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7189               VR128X:$src1, VK1WM:$mask,
7190               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7191               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7192
7193    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7194               (X86selects_mask VK1WM:$mask,
7195                (MaskedOp _.FRC:$src2,
7196                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7197                    (_.ScalarLdFrag addr:$src3)),
7198                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7199              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7200               VR128X:$src1, VK1WM:$mask,
7201               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7202
7203    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7204               (X86selects_mask VK1WM:$mask,
7205                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7206                          (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7207                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7208              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7209               VR128X:$src1, VK1WM:$mask,
7210               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7211
7212    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7213               (X86selects_mask VK1WM:$mask,
7214                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7215                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7216                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7217              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7218               VR128X:$src1, VK1WM:$mask,
7219               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7220               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7221
7222    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7223               (X86selects_mask VK1WM:$mask,
7224                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7225                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7226                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7227              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7228               VR128X:$src1, VK1WM:$mask,
7229               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7230
7231    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232               (X86selects_mask VK1WM:$mask,
7233                (MaskedOp _.FRC:$src2,
7234                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7235                          _.FRC:$src3),
7236                (_.EltVT ZeroFP)))))),
7237              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7238               VR128X:$src1, VK1WM:$mask,
7239               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7240               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7241
7242    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7243               (X86selects_mask VK1WM:$mask,
7244                (MaskedOp _.FRC:$src2, _.FRC:$src3,
7245                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7246                (_.EltVT ZeroFP)))))),
7247              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7248               VR128X:$src1, VK1WM:$mask,
7249               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7250               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7251
7252    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7253               (X86selects_mask VK1WM:$mask,
7254                (MaskedOp _.FRC:$src2,
7255                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7256                          (_.ScalarLdFrag addr:$src3)),
7257                (_.EltVT ZeroFP)))))),
7258              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7259               VR128X:$src1, VK1WM:$mask,
7260               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7261
7262    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7263               (X86selects_mask VK1WM:$mask,
7264                (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7265                          _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7266                (_.EltVT ZeroFP)))))),
7267              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7268               VR128X:$src1, VK1WM:$mask,
7269               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7270
7271    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7272               (X86selects_mask VK1WM:$mask,
7273                (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7274                          (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7275                (_.EltVT ZeroFP)))))),
7276              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7277               VR128X:$src1, VK1WM:$mask,
7278               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7279
7280    // Patterns with rounding mode.
7281    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7282                (RndOp _.FRC:$src2,
7283                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7284                       _.FRC:$src3, (i32 timm:$rc)))))),
7285              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7286               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7287               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7288
7289    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7290                (RndOp _.FRC:$src2, _.FRC:$src3,
7291                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7292                       (i32 timm:$rc)))))),
7293              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7294               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7295               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7296
7297    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7298               (X86selects_mask VK1WM:$mask,
7299                (RndOp _.FRC:$src2,
7300                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7301                       _.FRC:$src3, (i32 timm:$rc)),
7302                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7303              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7304               VR128X:$src1, VK1WM:$mask,
7305               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7306               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7307
7308    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7309               (X86selects_mask VK1WM:$mask,
7310                (RndOp _.FRC:$src2, _.FRC:$src3,
7311                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7312                       (i32 timm:$rc)),
7313                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7314              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7315               VR128X:$src1, VK1WM:$mask,
7316               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7317               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7318
7319    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7320               (X86selects_mask VK1WM:$mask,
7321                (RndOp _.FRC:$src2,
7322                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7323                       _.FRC:$src3, (i32 timm:$rc)),
7324                (_.EltVT ZeroFP)))))),
7325              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7326               VR128X:$src1, VK1WM:$mask,
7327               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7328               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7329
7330    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7331               (X86selects_mask VK1WM:$mask,
7332                (RndOp _.FRC:$src2, _.FRC:$src3,
7333                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7334                       (i32 timm:$rc)),
7335                (_.EltVT ZeroFP)))))),
7336              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7337               VR128X:$src1, VK1WM:$mask,
7338               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7339               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
7340  }
7341}
7342defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
7343                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7344defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
7345                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7346defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
7347                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7348defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
7349                                  X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
7350
7351defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7352                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7353defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7354                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7355defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7356                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7357defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7358                                  "SS", X86Movss, v4f32x_info, fp32imm0>;
7359
7360defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
7361                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7362defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
7363                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7364defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
7365                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7366defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7367                                  "SD", X86Movsd, v2f64x_info, fp64imm0>;
7368
7369//===----------------------------------------------------------------------===//
7370// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7371//===----------------------------------------------------------------------===//
7372let Constraints = "$src1 = $dst" in {
7373multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7374                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7375  // NOTE: The SDNode have the multiply operands first with the add last.
7376  // This enables commuted load patterns to be autogenerated by tablegen.
7377  let ExeDomain = _.ExeDomain in {
7378  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7379          (ins _.RC:$src2, _.RC:$src3),
7380          OpcodeStr, "$src3, $src2", "$src2, $src3",
7381          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7382          T8PD, EVEX_4V, Sched<[sched]>;
7383
7384  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7385          (ins _.RC:$src2, _.MemOp:$src3),
7386          OpcodeStr, "$src3, $src2", "$src2, $src3",
7387          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7388          T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7389
7390  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7391            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7392            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7393            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7394            (OpNode _.RC:$src2,
7395                    (_.VT (_.BroadcastLdFrag addr:$src3)),
7396                    _.RC:$src1)>,
7397            T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7398  }
7399}
7400} // Constraints = "$src1 = $dst"
7401
7402multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7403                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7404  let Predicates = [HasIFMA] in {
7405    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7406                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7407  }
7408  let Predicates = [HasVLX, HasIFMA] in {
7409    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7410                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7411    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7412                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7413  }
7414}
7415
7416defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7417                                         SchedWriteVecIMul, avx512vl_i64_info>,
7418                                         VEX_W;
7419defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7420                                         SchedWriteVecIMul, avx512vl_i64_info>,
7421                                         VEX_W;
7422
7423//===----------------------------------------------------------------------===//
7424// AVX-512  Scalar convert from sign integer to float/double
7425//===----------------------------------------------------------------------===//
7426
7427multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7428                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7429                    X86MemOperand x86memop, PatFrag ld_frag, string asm,
7430                    string mem, list<Register> _Uses = [MXCSR],
7431                    bit _mayRaiseFPException = 1> {
7432let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7433    mayRaiseFPException = _mayRaiseFPException in {
7434  let hasSideEffects = 0, isCodeGenOnly = 1 in {
7435    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7436              (ins DstVT.FRC:$src1, SrcRC:$src),
7437              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7438              EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7439    let mayLoad = 1 in
7440      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7441              (ins DstVT.FRC:$src1, x86memop:$src),
7442              asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7443              EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7444  } // hasSideEffects = 0
7445  def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7446                (ins DstVT.RC:$src1, SrcRC:$src2),
7447                !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7448                [(set DstVT.RC:$dst,
7449                      (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7450               EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7451
7452  def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7453                (ins DstVT.RC:$src1, x86memop:$src2),
7454                asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7455                [(set DstVT.RC:$dst,
7456                      (OpNode (DstVT.VT DstVT.RC:$src1),
7457                               (ld_frag addr:$src2)))]>,
7458                EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7459}
7460  def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7461                  (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7462                  DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7463}
7464
7465multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7466                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7467                               X86VectorVTInfo DstVT, string asm,
7468                               string mem> {
7469  let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7470  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7471              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7472              !strconcat(asm,
7473                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7474              [(set DstVT.RC:$dst,
7475                    (OpNode (DstVT.VT DstVT.RC:$src1),
7476                             SrcRC:$src2,
7477                             (i32 timm:$rc)))]>,
7478              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7479  def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7480                  (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7481                  DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7482}
7483
7484multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7485                                X86FoldableSchedWrite sched,
7486                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7487                                X86MemOperand x86memop, PatFrag ld_frag,
7488                                string asm, string mem> {
7489  defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7490              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7491                            ld_frag, asm, mem>, VEX_LIG;
7492}
7493
7494let Predicates = [HasAVX512] in {
7495defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7496                                 WriteCvtI2SS, GR32,
7497                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7498                                 XS, EVEX_CD8<32, CD8VT1>;
7499defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7500                                 WriteCvtI2SS, GR64,
7501                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7502                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7503defm VCVTSI2SDZ  : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7504                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7505                                 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7506defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7507                                 WriteCvtI2SD, GR64,
7508                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7509                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7510
7511def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7512              (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7513def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7514              (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7515
7516def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7517          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7518def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7519          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7520def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7521          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7522def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7523          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7524
7525def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7526          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7527def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7528          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7529def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7530          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7531def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7532          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7533
7534defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7535                                  WriteCvtI2SS, GR32,
7536                                  v4f32x_info, i32mem, loadi32,
7537                                  "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7538defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7539                                  WriteCvtI2SS, GR64,
7540                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7541                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7542defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7543                                  i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7544                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7545defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7546                                  WriteCvtI2SD, GR64,
7547                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7548                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7549
7550def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7551              (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7552def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7553              (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7554
7555def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7556          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7557def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7558          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7559def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7560          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7561def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7562          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7563
7564def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7565          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7566def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7567          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7568def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7569          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7570def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7571          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7572}
7573
7574//===----------------------------------------------------------------------===//
7575// AVX-512  Scalar convert from float/double to integer
7576//===----------------------------------------------------------------------===//
7577
7578multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7579                                  X86VectorVTInfo DstVT, SDNode OpNode,
7580                                  SDNode OpNodeRnd,
7581                                  X86FoldableSchedWrite sched, string asm,
7582                                  string aliasStr, Predicate prd = HasAVX512> {
7583  let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
7584    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7585                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7586                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7587                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7588    let Uses = [MXCSR] in
7589    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7590                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7591                 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7592                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7593                 Sched<[sched]>;
7594    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7595                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7596                [(set DstVT.RC:$dst, (OpNode
7597                      (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7598                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7599  } // Predicates = [prd]
7600
7601  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7602          (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7603  def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7604          (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7605  def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7606          (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7607                                          SrcVT.IntScalarMemOp:$src), 0, "att">;
7608}
7609
7610// Convert float/double to signed/unsigned int 32/64
7611defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7612                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7613                                   XS, EVEX_CD8<32, CD8VT1>;
7614defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7615                                   X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7616                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7617defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7618                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7619                                   XS, EVEX_CD8<32, CD8VT1>;
7620defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7621                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7622                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7623defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7624                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7625                                   XD, EVEX_CD8<64, CD8VT1>;
7626defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7627                                   X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7628                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7629defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7630                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7631                                   XD, EVEX_CD8<64, CD8VT1>;
7632defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7633                                   X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7634                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7635
7636multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7637                        X86VectorVTInfo DstVT, SDNode OpNode,
7638                        X86FoldableSchedWrite sched> {
7639  let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7640    let isCodeGenOnly = 1 in {
7641    def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7642                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7643                [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7644                EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7645    def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7646                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7647                [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7648                EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7649    }
7650  } // Predicates = [HasAVX512]
7651}
7652
7653defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7654                       lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
7655defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7656                       llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7657defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7658                       lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
7659defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7660                       llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7661
7662let Predicates = [HasAVX512] in {
7663  def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7664  def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7665
7666  def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7667  def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7668}
7669
7670// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7671// which produce unnecessary vmovs{s,d} instructions
7672let Predicates = [HasAVX512] in {
7673def : Pat<(v4f32 (X86Movss
7674                   (v4f32 VR128X:$dst),
7675                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7676          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7677
7678def : Pat<(v4f32 (X86Movss
7679                   (v4f32 VR128X:$dst),
7680                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7681          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7682
7683def : Pat<(v4f32 (X86Movss
7684                   (v4f32 VR128X:$dst),
7685                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7686          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7687
7688def : Pat<(v4f32 (X86Movss
7689                   (v4f32 VR128X:$dst),
7690                   (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7691          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7692
7693def : Pat<(v2f64 (X86Movsd
7694                   (v2f64 VR128X:$dst),
7695                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7696          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7697
7698def : Pat<(v2f64 (X86Movsd
7699                   (v2f64 VR128X:$dst),
7700                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7701          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7702
7703def : Pat<(v2f64 (X86Movsd
7704                   (v2f64 VR128X:$dst),
7705                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7706          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7707
7708def : Pat<(v2f64 (X86Movsd
7709                   (v2f64 VR128X:$dst),
7710                   (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7711          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7712
7713def : Pat<(v4f32 (X86Movss
7714                   (v4f32 VR128X:$dst),
7715                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7716          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7717
7718def : Pat<(v4f32 (X86Movss
7719                   (v4f32 VR128X:$dst),
7720                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7721          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7722
7723def : Pat<(v4f32 (X86Movss
7724                   (v4f32 VR128X:$dst),
7725                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7726          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7727
7728def : Pat<(v4f32 (X86Movss
7729                   (v4f32 VR128X:$dst),
7730                   (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7731          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7732
7733def : Pat<(v2f64 (X86Movsd
7734                   (v2f64 VR128X:$dst),
7735                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7736          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7737
7738def : Pat<(v2f64 (X86Movsd
7739                   (v2f64 VR128X:$dst),
7740                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7741          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7742
7743def : Pat<(v2f64 (X86Movsd
7744                   (v2f64 VR128X:$dst),
7745                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7746          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7747
7748def : Pat<(v2f64 (X86Movsd
7749                   (v2f64 VR128X:$dst),
7750                   (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7751          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7752} // Predicates = [HasAVX512]
7753
7754// Convert float/double to signed/unsigned int 32/64 with truncation
7755multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7756                            X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
7757                            SDNode OpNodeInt, SDNode OpNodeSAE,
7758                            X86FoldableSchedWrite sched, string aliasStr,
7759                            Predicate prd = HasAVX512> {
7760let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
7761  let isCodeGenOnly = 1 in {
7762  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7763              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7764              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7765              EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7766  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7767              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7768              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7769              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7770  }
7771
7772  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7773            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7774           [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7775           EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7776  let Uses = [MXCSR] in
7777  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7778            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7779            [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7780                                  EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7781  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7782              (ins _SrcRC.IntScalarMemOp:$src),
7783              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7784              [(set _DstRC.RC:$dst,
7785                (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7786              EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7787} // Predicates = [prd]
7788
7789  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7790          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7791  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7792          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7793  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7794          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7795                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7796}
7797
7798defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7799                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7800                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7801defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7802                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7803                        "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7804defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7805                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7806                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7807defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7808                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7809                        "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7810
7811defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7812                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7813                        "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7814defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7815                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7816                        "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7817defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7818                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7819                        "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7820defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7821                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7822                        "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7823
7824//===----------------------------------------------------------------------===//
7825// AVX-512  Convert form float to double and back
7826//===----------------------------------------------------------------------===//
7827
7828let Uses = [MXCSR], mayRaiseFPException = 1 in
7829multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7830                                X86VectorVTInfo _Src, SDNode OpNode,
7831                                X86FoldableSchedWrite sched> {
7832  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7833                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7834                         "$src2, $src1", "$src1, $src2",
7835                         (_.VT (OpNode (_.VT _.RC:$src1),
7836                                       (_Src.VT _Src.RC:$src2)))>,
7837                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7838  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7839                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7840                         "$src2, $src1", "$src1, $src2",
7841                         (_.VT (OpNode (_.VT _.RC:$src1),
7842                                  (_Src.ScalarIntMemFrags addr:$src2)))>,
7843                         EVEX_4V, VEX_LIG,
7844                         Sched<[sched.Folded, sched.ReadAfterFold]>;
7845
7846  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7847    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7848               (ins _.FRC:$src1, _Src.FRC:$src2),
7849               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7850               EVEX_4V, VEX_LIG, Sched<[sched]>;
7851    let mayLoad = 1 in
7852    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7853               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7854               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7855               EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7856  }
7857}
7858
7859// Scalar Conversion with SAE - suppress all exceptions
7860multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7861                                    X86VectorVTInfo _Src, SDNode OpNodeSAE,
7862                                    X86FoldableSchedWrite sched> {
7863  let Uses = [MXCSR] in
7864  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7865                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7866                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7867                        (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7868                                         (_Src.VT _Src.RC:$src2)))>,
7869                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7870}
7871
7872// Scalar Conversion with rounding control (RC)
7873multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7874                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7875                                   X86FoldableSchedWrite sched> {
7876  let Uses = [MXCSR] in
7877  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7878                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7879                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7880                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7881                                         (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7882                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7883                        EVEX_B, EVEX_RC;
7884}
7885multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
7886                                      SDNode OpNode, SDNode OpNodeRnd,
7887                                      X86FoldableSchedWrite sched,
7888                                      X86VectorVTInfo _src, X86VectorVTInfo _dst,
7889                                      Predicate prd = HasAVX512> {
7890  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7891    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7892             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7893                               OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
7894  }
7895}
7896
7897multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
7898                                       SDNode OpNode, SDNode OpNodeSAE,
7899                                       X86FoldableSchedWrite sched,
7900                                       X86VectorVTInfo _src, X86VectorVTInfo _dst,
7901                                       Predicate prd = HasAVX512> {
7902  let Predicates = [prd], ExeDomain = SSEPackedSingle in {
7903    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7904             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7905             EVEX_CD8<_src.EltSize, CD8VT1>;
7906  }
7907}
7908defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
7909                                         X86froundsRnd, WriteCvtSD2SS, f64x_info,
7910                                         f32x_info>, XD, VEX_W;
7911defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
7912                                          X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7913                                          f64x_info>, XS;
7914defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
7915                                          X86froundsRnd, WriteCvtSD2SS, f64x_info,
7916                                          f16x_info, HasFP16>, T_MAP5XD, VEX_W;
7917defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
7918                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7919                                          f64x_info, HasFP16>, T_MAP5XS;
7920defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
7921                                          X86froundsRnd, WriteCvtSD2SS, f32x_info,
7922                                          f16x_info, HasFP16>, T_MAP5PS;
7923defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
7924                                          X86fpextsSAE, WriteCvtSS2SD, f16x_info,
7925                                          f32x_info, HasFP16>, T_MAP6PS;
7926
7927def : Pat<(f64 (any_fpextend FR32X:$src)),
7928          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7929          Requires<[HasAVX512]>;
7930def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7931          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7932          Requires<[HasAVX512, OptForSize]>;
7933
7934def : Pat<(f32 (any_fpround FR64X:$src)),
7935          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7936           Requires<[HasAVX512]>;
7937
7938def : Pat<(f32 (any_fpextend FR16X:$src)),
7939          (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
7940          Requires<[HasFP16]>;
7941def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
7942          (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
7943          Requires<[HasFP16, OptForSize]>;
7944
7945def : Pat<(f64 (any_fpextend FR16X:$src)),
7946          (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
7947          Requires<[HasFP16]>;
7948def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
7949          (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7950          Requires<[HasFP16, OptForSize]>;
7951
7952def : Pat<(f16 (any_fpround FR32X:$src)),
7953          (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
7954           Requires<[HasFP16]>;
7955def : Pat<(f16 (any_fpround FR64X:$src)),
7956          (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
7957           Requires<[HasFP16]>;
7958
7959def : Pat<(v4f32 (X86Movss
7960                   (v4f32 VR128X:$dst),
7961                   (v4f32 (scalar_to_vector
7962                     (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7963          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7964          Requires<[HasAVX512]>;
7965
7966def : Pat<(v2f64 (X86Movsd
7967                   (v2f64 VR128X:$dst),
7968                   (v2f64 (scalar_to_vector
7969                     (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7970          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7971          Requires<[HasAVX512]>;
7972
7973//===----------------------------------------------------------------------===//
7974// AVX-512  Vector convert from signed/unsigned integer to float/double
7975//          and from float/double to signed/unsigned integer
7976//===----------------------------------------------------------------------===//
7977
7978multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7979                          X86VectorVTInfo _Src, SDPatternOperator OpNode, SDPatternOperator MaskOpNode,
7980                          X86FoldableSchedWrite sched,
7981                          string Broadcast = _.BroadcastStr,
7982                          string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7983                          RegisterClass MaskRC = _.KRCWM,
7984                          dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7985                          dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7986let Uses = [MXCSR], mayRaiseFPException = 1 in {
7987  defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7988                         (ins _Src.RC:$src),
7989                         (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7990                         (ins MaskRC:$mask, _Src.RC:$src),
7991                          OpcodeStr, "$src", "$src",
7992                         (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7993                         (vselect_mask MaskRC:$mask,
7994                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7995                                       _.RC:$src0),
7996                         (vselect_mask MaskRC:$mask,
7997                                       (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7998                                       _.ImmAllZerosV)>,
7999                         EVEX, Sched<[sched]>;
8000
8001  defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8002                         (ins MemOp:$src),
8003                         (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8004                         (ins MaskRC:$mask, MemOp:$src),
8005                         OpcodeStr#Alias, "$src", "$src",
8006                         LdDAG,
8007                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
8008                         (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
8009                         EVEX, Sched<[sched.Folded]>;
8010
8011  defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
8012                         (ins _Src.ScalarMemOp:$src),
8013                         (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8014                         (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8015                         OpcodeStr,
8016                         "${src}"#Broadcast, "${src}"#Broadcast,
8017                         (_.VT (OpNode (_Src.VT
8018                                  (_Src.BroadcastLdFrag addr:$src))
8019                            )),
8020                         (vselect_mask MaskRC:$mask,
8021                                       (_.VT
8022                                        (MaskOpNode
8023                                         (_Src.VT
8024                                          (_Src.BroadcastLdFrag addr:$src)))),
8025                                       _.RC:$src0),
8026                         (vselect_mask MaskRC:$mask,
8027                                       (_.VT
8028                                        (MaskOpNode
8029                                         (_Src.VT
8030                                          (_Src.BroadcastLdFrag addr:$src)))),
8031                                       _.ImmAllZerosV)>,
8032                         EVEX, EVEX_B, Sched<[sched.Folded]>;
8033  }
8034}
8035// Conversion with SAE - suppress all exceptions
8036multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8037                              X86VectorVTInfo _Src, SDNode OpNodeSAE,
8038                              X86FoldableSchedWrite sched> {
8039  let Uses = [MXCSR] in
8040  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8041                        (ins _Src.RC:$src), OpcodeStr,
8042                        "{sae}, $src", "$src, {sae}",
8043                        (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
8044                        EVEX, EVEX_B, Sched<[sched]>;
8045}
8046
8047// Conversion with rounding control (RC)
8048multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8049                         X86VectorVTInfo _Src, SDPatternOperator OpNodeRnd,
8050                         X86FoldableSchedWrite sched> {
8051  let Uses = [MXCSR] in
8052  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8053                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8054                        "$rc, $src", "$src, $rc",
8055                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
8056                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8057}
8058
8059// Similar to avx512_vcvt_fp, but uses an extload for the memory form.
8060multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8061                                X86VectorVTInfo _Src, SDPatternOperator OpNode,
8062                                SDNode MaskOpNode,
8063                                X86FoldableSchedWrite sched,
8064                                string Broadcast = _.BroadcastStr,
8065                                string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8066                                RegisterClass MaskRC = _.KRCWM>
8067  : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
8068                   Alias, MemOp, MaskRC,
8069                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
8070                   (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
8071
8072// Extend [Float to Double, Half to Float]
8073multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
8074                             AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8075                             X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
8076  let Predicates = [prd] in {
8077    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr,  _dst.info512, _src.info256,
8078                            any_fpextend, fpextend, sched.ZMM>,
8079             avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
8080                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8081  }
8082  let Predicates = [prd, HasVLX] in {
8083    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
8084                               X86any_vfpext, X86vfpext, sched.XMM,
8085                               _dst.info128.BroadcastStr,
8086                               "", f64mem>, EVEX_V128;
8087    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
8088                               any_fpextend, fpextend, sched.YMM>, EVEX_V256;
8089  }
8090}
8091
8092// Truncate [Double to Float, Float to Half]
8093multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
8094                            AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8095                            X86SchedWriteWidths sched, Predicate prd = HasAVX512,
8096                            PatFrag bcast128 = _src.info128.BroadcastLdFrag,
8097                            PatFrag loadVT128 = _src.info128.LdFrag,
8098                            RegisterClass maskRC128 = _src.info128.KRCWM> {
8099  let Predicates = [prd] in {
8100    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
8101                            X86any_vfpround, X86vfpround, sched.ZMM>,
8102             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8103                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8104  }
8105  let Predicates = [prd, HasVLX] in {
8106    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
8107                               null_frag, null_frag, sched.XMM,
8108                               _src.info128.BroadcastStr, "{x}",
8109                               f128mem, maskRC128>, EVEX_V128;
8110    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
8111                               X86any_vfpround, X86vfpround,
8112                               sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
8113
8114    // Special patterns to allow use of X86vmfpround for masking. Instruction
8115    // patterns have been disabled with null_frag.
8116    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
8117              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8118    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8119                            maskRC128:$mask),
8120              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
8121    def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8122                            maskRC128:$mask),
8123              (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
8124
8125    def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
8126              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8127    def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
8128                            maskRC128:$mask),
8129              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8130    def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
8131                            maskRC128:$mask),
8132              (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
8133
8134    def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
8135              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8136    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8137                            (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
8138              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
8139    def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
8140                            _dst.info128.ImmAllZerosV, maskRC128:$mask),
8141              (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
8142  }
8143
8144  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8145                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8146  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8147                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8148                  VK2WM:$mask, VR128X:$src), 0, "att">;
8149  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
8150                  "$dst {${mask}} {z}, $src}",
8151                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8152                  VK2WM:$mask, VR128X:$src), 0, "att">;
8153  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8154                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8155  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8156                  "$dst {${mask}}, ${src}{1to2}}",
8157                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8158                  VK2WM:$mask, f64mem:$src), 0, "att">;
8159  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8160                  "$dst {${mask}} {z}, ${src}{1to2}}",
8161                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8162                  VK2WM:$mask, f64mem:$src), 0, "att">;
8163
8164  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8165                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8166  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8167                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8168                  VK4WM:$mask, VR256X:$src), 0, "att">;
8169  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8170                  "$dst {${mask}} {z}, $src}",
8171                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8172                  VK4WM:$mask, VR256X:$src), 0, "att">;
8173  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8174                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
8175  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8176                  "$dst {${mask}}, ${src}{1to4}}",
8177                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8178                  VK4WM:$mask, f64mem:$src), 0, "att">;
8179  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8180                  "$dst {${mask}} {z}, ${src}{1to4}}",
8181                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8182                  VK4WM:$mask, f64mem:$src), 0, "att">;
8183}
8184
8185defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
8186                                  avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
8187                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
8188defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
8189                                   avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
8190                                   PS, EVEX_CD8<32, CD8VH>;
8191
8192// Extend Half to Double
8193multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
8194                            X86SchedWriteWidths sched> {
8195  let Predicates = [HasFP16] in {
8196    defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
8197                                  any_fpextend, fpextend, sched.ZMM>,
8198             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
8199                                X86vfpextSAE, sched.ZMM>, EVEX_V512;
8200    def : Pat<(v8f64 (extloadv8f16 addr:$src)),
8201                (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
8202  }
8203  let Predicates = [HasFP16, HasVLX] in {
8204    defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
8205                                     X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
8206                                     f32mem>, EVEX_V128;
8207    defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
8208                                     X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
8209                                     f64mem>, EVEX_V256;
8210  }
8211}
8212
8213// Truncate Double to Half
8214multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8215  let Predicates = [HasFP16] in {
8216    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
8217                            X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
8218             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
8219                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
8220  }
8221  let Predicates = [HasFP16, HasVLX] in {
8222    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
8223                               null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8224                               VK2WM>, EVEX_V128;
8225    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
8226                               null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
8227                               VK4WM>, EVEX_V256;
8228  }
8229  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8230                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8231                  VR128X:$src), 0, "att">;
8232  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8233                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8234                  VK2WM:$mask, VR128X:$src), 0, "att">;
8235  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8236                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8237                  VK2WM:$mask, VR128X:$src), 0, "att">;
8238  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8239                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8240                  i64mem:$src), 0, "att">;
8241  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8242                  "$dst {${mask}}, ${src}{1to2}}",
8243                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8244                  VK2WM:$mask, i64mem:$src), 0, "att">;
8245  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8246                  "$dst {${mask}} {z}, ${src}{1to2}}",
8247                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8248                  VK2WM:$mask, i64mem:$src), 0, "att">;
8249
8250  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8251                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8252                  VR256X:$src), 0, "att">;
8253  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8254                  "$dst {${mask}}, $src}",
8255                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8256                  VK4WM:$mask, VR256X:$src), 0, "att">;
8257  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8258                  "$dst {${mask}} {z}, $src}",
8259                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8260                  VK4WM:$mask, VR256X:$src), 0, "att">;
8261  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8262                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8263                  i64mem:$src), 0, "att">;
8264  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8265                  "$dst {${mask}}, ${src}{1to4}}",
8266                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8267                  VK4WM:$mask, i64mem:$src), 0, "att">;
8268  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8269                  "$dst {${mask}} {z}, ${src}{1to4}}",
8270                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8271                  VK4WM:$mask, i64mem:$src), 0, "att">;
8272
8273  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
8274                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
8275                  VR512:$src), 0, "att">;
8276  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
8277                  "$dst {${mask}}, $src}",
8278                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
8279                  VK8WM:$mask, VR512:$src), 0, "att">;
8280  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
8281                  "$dst {${mask}} {z}, $src}",
8282                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
8283                  VK8WM:$mask, VR512:$src), 0, "att">;
8284  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
8285                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
8286                  i64mem:$src), 0, "att">;
8287  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
8288                  "$dst {${mask}}, ${src}{1to8}}",
8289                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
8290                  VK8WM:$mask, i64mem:$src), 0, "att">;
8291  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
8292                  "$dst {${mask}} {z}, ${src}{1to8}}",
8293                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
8294                  VK8WM:$mask, i64mem:$src), 0, "att">;
8295}
8296
8297defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
8298                                   avx512vl_f32_info, SchedWriteCvtPD2PS,
8299                                   HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
8300defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
8301                                    avx512vl_f16_info, SchedWriteCvtPS2PD,
8302                                    HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
8303defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
8304                                 VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
8305defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
8306                                 T_MAP5PS, EVEX_CD8<16, CD8VQ>;
8307
8308let Predicates = [HasFP16, HasVLX] in {
8309  // Special patterns to allow use of X86vmfpround for masking. Instruction
8310  // patterns have been disabled with null_frag.
8311  def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
8312            (VCVTPD2PHZ256rr VR256X:$src)>;
8313  def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
8314                          VK4WM:$mask)),
8315            (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
8316  def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
8317                          VK4WM:$mask),
8318            (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
8319
8320  def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
8321            (VCVTPD2PHZ256rm addr:$src)>;
8322  def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
8323                          VK4WM:$mask),
8324            (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8325  def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
8326                          VK4WM:$mask),
8327            (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
8328
8329  def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
8330            (VCVTPD2PHZ256rmb addr:$src)>;
8331  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8332                          (v8f16 VR128X:$src0), VK4WM:$mask),
8333            (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
8334  def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
8335                          v8f16x_info.ImmAllZerosV, VK4WM:$mask),
8336            (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
8337
8338  def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
8339            (VCVTPD2PHZ128rr VR128X:$src)>;
8340  def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
8341                          VK2WM:$mask),
8342            (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8343  def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
8344                          VK2WM:$mask),
8345            (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
8346
8347  def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
8348            (VCVTPD2PHZ128rm addr:$src)>;
8349  def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
8350                          VK2WM:$mask),
8351            (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8352  def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
8353                          VK2WM:$mask),
8354            (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
8355
8356  def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
8357            (VCVTPD2PHZ128rmb addr:$src)>;
8358  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8359                          (v8f16 VR128X:$src0), VK2WM:$mask),
8360            (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8361  def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
8362                          v8f16x_info.ImmAllZerosV, VK2WM:$mask),
8363            (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
8364}
8365
8366// Convert Signed/Unsigned Doubleword to Double
8367let Uses = []<Register>, mayRaiseFPException = 0 in
8368multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8369                           SDNode MaskOpNode, SDPatternOperator OpNode128,
8370                           SDNode MaskOpNode128,
8371                           X86SchedWriteWidths sched> {
8372  // No rounding in this op
8373  let Predicates = [HasAVX512] in
8374    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8375                            MaskOpNode, sched.ZMM>, EVEX_V512;
8376
8377  let Predicates = [HasVLX] in {
8378    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8379                               OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
8380                               "", i64mem, VK2WM,
8381                               (v2f64 (OpNode128 (bc_v4i32
8382                                (v2i64
8383                                 (scalar_to_vector (loadi64 addr:$src)))))),
8384                               (v2f64 (MaskOpNode128 (bc_v4i32
8385                                (v2i64
8386                                 (scalar_to_vector (loadi64 addr:$src))))))>,
8387                               EVEX_V128;
8388    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8389                               MaskOpNode, sched.YMM>, EVEX_V256;
8390  }
8391}
8392
8393// Convert Signed/Unsigned Doubleword to Float
8394multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8395                           SDNode MaskOpNode, SDNode OpNodeRnd,
8396                           X86SchedWriteWidths sched> {
8397  let Predicates = [HasAVX512] in
8398    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8399                            MaskOpNode, sched.ZMM>,
8400             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8401                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8402
8403  let Predicates = [HasVLX] in {
8404    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8405                               MaskOpNode, sched.XMM>, EVEX_V128;
8406    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8407                               MaskOpNode, sched.YMM>, EVEX_V256;
8408  }
8409}
8410
8411// Convert Float to Signed/Unsigned Doubleword with truncation
8412multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8413                            SDNode MaskOpNode,
8414                            SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8415  let Predicates = [HasAVX512] in {
8416    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8417                            MaskOpNode, sched.ZMM>,
8418             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8419                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8420  }
8421  let Predicates = [HasVLX] in {
8422    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8423                               MaskOpNode, sched.XMM>, EVEX_V128;
8424    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8425                               MaskOpNode, sched.YMM>, EVEX_V256;
8426  }
8427}
8428
8429// Convert Float to Signed/Unsigned Doubleword
8430multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8431                           SDNode MaskOpNode, SDNode OpNodeRnd,
8432                           X86SchedWriteWidths sched> {
8433  let Predicates = [HasAVX512] in {
8434    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8435                            MaskOpNode, sched.ZMM>,
8436             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8437                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8438  }
8439  let Predicates = [HasVLX] in {
8440    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8441                               MaskOpNode, sched.XMM>, EVEX_V128;
8442    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8443                               MaskOpNode, sched.YMM>, EVEX_V256;
8444  }
8445}
8446
8447// Convert Double to Signed/Unsigned Doubleword with truncation
8448multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8449                            SDNode MaskOpNode, SDNode OpNodeSAE,
8450                            X86SchedWriteWidths sched> {
8451  let Predicates = [HasAVX512] in {
8452    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8453                            MaskOpNode, sched.ZMM>,
8454             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8455                                OpNodeSAE, sched.ZMM>, EVEX_V512;
8456  }
8457  let Predicates = [HasVLX] in {
8458    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8459    // memory forms of these instructions in Asm Parser. They have the same
8460    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8461    // due to the same reason.
8462    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8463                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8464                               VK2WM>, EVEX_V128;
8465    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8466                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8467  }
8468
8469  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8470                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8471                  VR128X:$src), 0, "att">;
8472  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8473                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8474                  VK2WM:$mask, VR128X:$src), 0, "att">;
8475  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8476                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8477                  VK2WM:$mask, VR128X:$src), 0, "att">;
8478  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8479                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8480                  f64mem:$src), 0, "att">;
8481  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8482                  "$dst {${mask}}, ${src}{1to2}}",
8483                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8484                  VK2WM:$mask, f64mem:$src), 0, "att">;
8485  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8486                  "$dst {${mask}} {z}, ${src}{1to2}}",
8487                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8488                  VK2WM:$mask, f64mem:$src), 0, "att">;
8489
8490  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8491                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8492                  VR256X:$src), 0, "att">;
8493  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8494                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8495                  VK4WM:$mask, VR256X:$src), 0, "att">;
8496  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8497                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8498                  VK4WM:$mask, VR256X:$src), 0, "att">;
8499  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8500                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8501                  f64mem:$src), 0, "att">;
8502  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8503                  "$dst {${mask}}, ${src}{1to4}}",
8504                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8505                  VK4WM:$mask, f64mem:$src), 0, "att">;
8506  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8507                  "$dst {${mask}} {z}, ${src}{1to4}}",
8508                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8509                  VK4WM:$mask, f64mem:$src), 0, "att">;
8510}
8511
8512// Convert Double to Signed/Unsigned Doubleword
8513multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8514                           SDNode MaskOpNode, SDNode OpNodeRnd,
8515                           X86SchedWriteWidths sched> {
8516  let Predicates = [HasAVX512] in {
8517    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8518                            MaskOpNode, sched.ZMM>,
8519             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8520                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8521  }
8522  let Predicates = [HasVLX] in {
8523    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8524    // memory forms of these instructions in Asm Parcer. They have the same
8525    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8526    // due to the same reason.
8527    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8528                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8529                               VK2WM>, EVEX_V128;
8530    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8531                               MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8532  }
8533
8534  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8535                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
8536  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8537                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8538                  VK2WM:$mask, VR128X:$src), 0, "att">;
8539  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8540                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8541                  VK2WM:$mask, VR128X:$src), 0, "att">;
8542  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8543                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8544                  f64mem:$src), 0, "att">;
8545  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8546                  "$dst {${mask}}, ${src}{1to2}}",
8547                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8548                  VK2WM:$mask, f64mem:$src), 0, "att">;
8549  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8550                  "$dst {${mask}} {z}, ${src}{1to2}}",
8551                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8552                  VK2WM:$mask, f64mem:$src), 0, "att">;
8553
8554  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8555                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
8556  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8557                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8558                  VK4WM:$mask, VR256X:$src), 0, "att">;
8559  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8560                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8561                  VK4WM:$mask, VR256X:$src), 0, "att">;
8562  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8563                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8564                  f64mem:$src), 0, "att">;
8565  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8566                  "$dst {${mask}}, ${src}{1to4}}",
8567                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8568                  VK4WM:$mask, f64mem:$src), 0, "att">;
8569  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8570                  "$dst {${mask}} {z}, ${src}{1to4}}",
8571                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8572                  VK4WM:$mask, f64mem:$src), 0, "att">;
8573}
8574
8575// Convert Double to Signed/Unsigned Quardword
8576multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8577                           SDNode MaskOpNode, SDNode OpNodeRnd,
8578                           X86SchedWriteWidths sched> {
8579  let Predicates = [HasDQI] in {
8580    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8581                            MaskOpNode, sched.ZMM>,
8582             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8583                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8584  }
8585  let Predicates = [HasDQI, HasVLX] in {
8586    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8587                               MaskOpNode, sched.XMM>, EVEX_V128;
8588    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8589                               MaskOpNode, sched.YMM>, EVEX_V256;
8590  }
8591}
8592
8593// Convert Double to Signed/Unsigned Quardword with truncation
8594multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8595                            SDNode MaskOpNode, SDNode OpNodeRnd,
8596                            X86SchedWriteWidths sched> {
8597  let Predicates = [HasDQI] in {
8598    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8599                            MaskOpNode, sched.ZMM>,
8600             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8601                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8602  }
8603  let Predicates = [HasDQI, HasVLX] in {
8604    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8605                               MaskOpNode, sched.XMM>, EVEX_V128;
8606    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8607                               MaskOpNode, sched.YMM>, EVEX_V256;
8608  }
8609}
8610
8611// Convert Signed/Unsigned Quardword to Double
8612multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8613                           SDNode MaskOpNode, SDNode OpNodeRnd,
8614                           X86SchedWriteWidths sched> {
8615  let Predicates = [HasDQI] in {
8616    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8617                            MaskOpNode, sched.ZMM>,
8618             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8619                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8620  }
8621  let Predicates = [HasDQI, HasVLX] in {
8622    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8623                               MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8624    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8625                               MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8626  }
8627}
8628
8629// Convert Float to Signed/Unsigned Quardword
8630multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8631                           SDNode MaskOpNode, SDNode OpNodeRnd,
8632                           X86SchedWriteWidths sched> {
8633  let Predicates = [HasDQI] in {
8634    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8635                            MaskOpNode, sched.ZMM>,
8636             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8637                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8638  }
8639  let Predicates = [HasDQI, HasVLX] in {
8640    // Explicitly specified broadcast string, since we take only 2 elements
8641    // from v4f32x_info source
8642    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8643                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8644                               (v2i64 (OpNode (bc_v4f32
8645                                (v2f64
8646                                 (scalar_to_vector (loadf64 addr:$src)))))),
8647                               (v2i64 (MaskOpNode (bc_v4f32
8648                                (v2f64
8649                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8650                               EVEX_V128;
8651    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8652                               MaskOpNode, sched.YMM>, EVEX_V256;
8653  }
8654}
8655
8656// Convert Float to Signed/Unsigned Quardword with truncation
8657multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8658                            SDNode MaskOpNode, SDNode OpNodeRnd,
8659                            X86SchedWriteWidths sched> {
8660  let Predicates = [HasDQI] in {
8661    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8662                            MaskOpNode, sched.ZMM>,
8663             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8664                                OpNodeRnd, sched.ZMM>, EVEX_V512;
8665  }
8666  let Predicates = [HasDQI, HasVLX] in {
8667    // Explicitly specified broadcast string, since we take only 2 elements
8668    // from v4f32x_info source
8669    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8670                               MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8671                               (v2i64 (OpNode (bc_v4f32
8672                                (v2f64
8673                                 (scalar_to_vector (loadf64 addr:$src)))))),
8674                               (v2i64 (MaskOpNode (bc_v4f32
8675                                (v2f64
8676                                 (scalar_to_vector (loadf64 addr:$src))))))>,
8677                               EVEX_V128;
8678    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8679                               MaskOpNode, sched.YMM>, EVEX_V256;
8680  }
8681}
8682
8683// Convert Signed/Unsigned Quardword to Float
8684// Also Convert Signed/Unsigned Doubleword to Half
8685multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
8686                                 SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
8687                                 SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
8688                                 AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
8689                                 X86SchedWriteWidths sched, Predicate prd = HasDQI> {
8690  let Predicates = [prd] in {
8691    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
8692                            MaskOpNode, sched.ZMM>,
8693             avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
8694                               OpNodeRnd, sched.ZMM>, EVEX_V512;
8695  }
8696  let Predicates = [prd, HasVLX] in {
8697    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8698    // memory forms of these instructions in Asm Parcer. They have the same
8699    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8700    // due to the same reason.
8701    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
8702                               null_frag, sched.XMM, _src.info128.BroadcastStr,
8703                               "{x}", i128mem, _src.info128.KRCWM>,
8704                               EVEX_V128, NotEVEX2VEXConvertible;
8705    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
8706                               MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
8707                               "{y}">, EVEX_V256,
8708                               NotEVEX2VEXConvertible;
8709
8710    // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
8711    // patterns have been disabled with null_frag.
8712    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
8713              (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
8714    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
8715                             _src.info128.KRCWM:$mask),
8716              (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
8717    def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
8718                             _src.info128.KRCWM:$mask),
8719              (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
8720
8721    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
8722              (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
8723    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
8724                             _src.info128.KRCWM:$mask),
8725              (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8726    def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
8727                             _src.info128.KRCWM:$mask),
8728              (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
8729
8730    def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
8731              (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
8732    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8733                             (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
8734              (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
8735    def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
8736                             _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
8737              (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
8738  }
8739
8740  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8741                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8742                  VR128X:$src), 0, "att">;
8743  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8744                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8745                  VK2WM:$mask, VR128X:$src), 0, "att">;
8746  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8747                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8748                  VK2WM:$mask, VR128X:$src), 0, "att">;
8749  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8750                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8751                  i64mem:$src), 0, "att">;
8752  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8753                  "$dst {${mask}}, ${src}{1to2}}",
8754                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8755                  VK2WM:$mask, i64mem:$src), 0, "att">;
8756  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8757                  "$dst {${mask}} {z}, ${src}{1to2}}",
8758                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8759                  VK2WM:$mask, i64mem:$src), 0, "att">;
8760
8761  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8762                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8763                  VR256X:$src), 0, "att">;
8764  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8765                  "$dst {${mask}}, $src}",
8766                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8767                  VK4WM:$mask, VR256X:$src), 0, "att">;
8768  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8769                  "$dst {${mask}} {z}, $src}",
8770                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8771                  VK4WM:$mask, VR256X:$src), 0, "att">;
8772  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8773                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8774                  i64mem:$src), 0, "att">;
8775  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8776                  "$dst {${mask}}, ${src}{1to4}}",
8777                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8778                  VK4WM:$mask, i64mem:$src), 0, "att">;
8779  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8780                  "$dst {${mask}} {z}, ${src}{1to4}}",
8781                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8782                  VK4WM:$mask, i64mem:$src), 0, "att">;
8783}
8784
8785defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8786                                 X86any_VSintToFP, X86VSintToFP,
8787                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8788
8789defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8790                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8791                                PS, EVEX_CD8<32, CD8VF>;
8792
8793defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8794                                 X86cvttp2si, X86cvttp2siSAE,
8795                                 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8796
8797defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8798                                 X86cvttp2si, X86cvttp2siSAE,
8799                                 SchedWriteCvtPD2DQ>,
8800                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8801
8802defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8803                                 X86cvttp2ui, X86cvttp2uiSAE,
8804                                 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8805
8806defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8807                                 X86cvttp2ui, X86cvttp2uiSAE,
8808                                 SchedWriteCvtPD2DQ>,
8809                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8810
8811defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8812                                  uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8813                                  SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8814
8815defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8816                                 uint_to_fp, X86VUintToFpRnd,
8817                                 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8818
8819defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8820                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8821                                 EVEX_CD8<32, CD8VF>;
8822
8823defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8824                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8825                                 VEX_W, EVEX_CD8<64, CD8VF>;
8826
8827defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8828                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8829                                 PS, EVEX_CD8<32, CD8VF>;
8830
8831defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8832                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8833                                 PS, EVEX_CD8<64, CD8VF>;
8834
8835defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8836                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8837                                 PD, EVEX_CD8<64, CD8VF>;
8838
8839defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8840                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8841                                 EVEX_CD8<32, CD8VH>;
8842
8843defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8844                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8845                                 PD, EVEX_CD8<64, CD8VF>;
8846
8847defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8848                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8849                                 EVEX_CD8<32, CD8VH>;
8850
8851defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8852                                 X86cvttp2si, X86cvttp2siSAE,
8853                                 SchedWriteCvtPD2DQ>, VEX_W,
8854                                 PD, EVEX_CD8<64, CD8VF>;
8855
8856defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8857                                 X86cvttp2si, X86cvttp2siSAE,
8858                                 SchedWriteCvtPS2DQ>, PD,
8859                                 EVEX_CD8<32, CD8VH>;
8860
8861defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8862                                 X86cvttp2ui, X86cvttp2uiSAE,
8863                                 SchedWriteCvtPD2DQ>, VEX_W,
8864                                 PD, EVEX_CD8<64, CD8VF>;
8865
8866defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8867                                 X86cvttp2ui, X86cvttp2uiSAE,
8868                                 SchedWriteCvtPS2DQ>, PD,
8869                                 EVEX_CD8<32, CD8VH>;
8870
8871defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8872                            sint_to_fp, X86VSintToFpRnd,
8873                            SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8874
8875defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8876                            uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8877                            VEX_W, XS, EVEX_CD8<64, CD8VF>;
8878
8879defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
8880                            X86any_VSintToFP, X86VMSintToFP,
8881                            X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8882                            SchedWriteCvtDQ2PS, HasFP16>,
8883                            T_MAP5PS, EVEX_CD8<32, CD8VF>;
8884
8885defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
8886                            X86any_VUintToFP, X86VMUintToFP,
8887                            X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
8888                            SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
8889                            EVEX_CD8<32, CD8VF>;
8890
8891defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
8892                            X86any_VSintToFP, X86VMSintToFP,
8893                            X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8894                            SchedWriteCvtDQ2PS>, VEX_W, PS,
8895                            EVEX_CD8<64, CD8VF>;
8896
8897defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
8898                            X86any_VUintToFP, X86VMUintToFP,
8899                            X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
8900                            SchedWriteCvtDQ2PS>, VEX_W, XD,
8901                            EVEX_CD8<64, CD8VF>;
8902
8903let Predicates = [HasVLX] in {
8904  // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8905  // patterns have been disabled with null_frag.
8906  def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8907            (VCVTPD2DQZ128rr VR128X:$src)>;
8908  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8909                          VK2WM:$mask),
8910            (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8911  def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8912                          VK2WM:$mask),
8913            (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8914
8915  def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8916            (VCVTPD2DQZ128rm addr:$src)>;
8917  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8918                          VK2WM:$mask),
8919            (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8920  def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8921                          VK2WM:$mask),
8922            (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8923
8924  def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8925            (VCVTPD2DQZ128rmb addr:$src)>;
8926  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8927                          (v4i32 VR128X:$src0), VK2WM:$mask),
8928            (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8929  def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8930                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8931            (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8932
8933  // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8934  // patterns have been disabled with null_frag.
8935  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8936            (VCVTTPD2DQZ128rr VR128X:$src)>;
8937  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8938                          VK2WM:$mask),
8939            (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8940  def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8941                          VK2WM:$mask),
8942            (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8943
8944  def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8945            (VCVTTPD2DQZ128rm addr:$src)>;
8946  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8947                          VK2WM:$mask),
8948            (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8949  def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8950                          VK2WM:$mask),
8951            (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8952
8953  def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8954            (VCVTTPD2DQZ128rmb addr:$src)>;
8955  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8956                          (v4i32 VR128X:$src0), VK2WM:$mask),
8957            (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8958  def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8959                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8960            (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8961
8962  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8963  // patterns have been disabled with null_frag.
8964  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8965            (VCVTPD2UDQZ128rr VR128X:$src)>;
8966  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8967                           VK2WM:$mask),
8968            (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8969  def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8970                           VK2WM:$mask),
8971            (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8972
8973  def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8974            (VCVTPD2UDQZ128rm addr:$src)>;
8975  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8976                           VK2WM:$mask),
8977            (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8978  def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8979                           VK2WM:$mask),
8980            (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8981
8982  def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8983            (VCVTPD2UDQZ128rmb addr:$src)>;
8984  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8985                           (v4i32 VR128X:$src0), VK2WM:$mask),
8986            (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8987  def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8988                           v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8989            (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8990
8991  // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8992  // patterns have been disabled with null_frag.
8993  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8994            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8995  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8996                          VK2WM:$mask),
8997            (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8998  def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8999                          VK2WM:$mask),
9000            (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
9001
9002  def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
9003            (VCVTTPD2UDQZ128rm addr:$src)>;
9004  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
9005                          VK2WM:$mask),
9006            (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9007  def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
9008                          VK2WM:$mask),
9009            (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
9010
9011  def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
9012            (VCVTTPD2UDQZ128rmb addr:$src)>;
9013  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9014                          (v4i32 VR128X:$src0), VK2WM:$mask),
9015            (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9016  def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
9017                          v4i32x_info.ImmAllZerosV, VK2WM:$mask),
9018            (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
9019}
9020
9021let Predicates = [HasDQI, HasVLX] in {
9022  def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9023            (VCVTPS2QQZ128rm addr:$src)>;
9024  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9025                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9026                                 VR128X:$src0)),
9027            (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9028  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9029                                 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9030                                 v2i64x_info.ImmAllZerosV)),
9031            (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9032
9033  def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9034            (VCVTPS2UQQZ128rm addr:$src)>;
9035  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9036                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9037                                 VR128X:$src0)),
9038            (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9039  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9040                                 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9041                                 v2i64x_info.ImmAllZerosV)),
9042            (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9043
9044  def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9045            (VCVTTPS2QQZ128rm addr:$src)>;
9046  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9047                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9048                                 VR128X:$src0)),
9049            (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9050  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9051                                 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9052                                 v2i64x_info.ImmAllZerosV)),
9053            (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
9054
9055  def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
9056            (VCVTTPS2UQQZ128rm addr:$src)>;
9057  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9058                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9059                                 VR128X:$src0)),
9060            (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9061  def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
9062                                 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
9063                                 v2i64x_info.ImmAllZerosV)),
9064            (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
9065}
9066
9067let Predicates = [HasVLX] in {
9068  def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9069            (VCVTDQ2PDZ128rm addr:$src)>;
9070  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9071                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9072                                 VR128X:$src0)),
9073            (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9074  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9075                                 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9076                                 v2f64x_info.ImmAllZerosV)),
9077            (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9078
9079  def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9080            (VCVTUDQ2PDZ128rm addr:$src)>;
9081  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9082                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9083                                 VR128X:$src0)),
9084            (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9085  def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
9086                                 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
9087                                 v2f64x_info.ImmAllZerosV)),
9088            (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
9089}
9090
9091//===----------------------------------------------------------------------===//
9092// Half precision conversion instructions
9093//===----------------------------------------------------------------------===//
9094
9095let Uses = [MXCSR], mayRaiseFPException = 1 in
9096multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9097                           X86MemOperand x86memop, dag ld_dag,
9098                           X86FoldableSchedWrite sched> {
9099  defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
9100                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
9101                            (X86any_cvtph2ps (_src.VT _src.RC:$src)),
9102                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
9103                            T8PD, Sched<[sched]>;
9104  defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
9105                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
9106                            (X86any_cvtph2ps (_src.VT ld_dag)),
9107                            (X86cvtph2ps (_src.VT ld_dag))>,
9108                            T8PD, Sched<[sched.Folded]>;
9109}
9110
9111multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9112                               X86FoldableSchedWrite sched> {
9113  let Uses = [MXCSR] in
9114  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
9115                             (ins _src.RC:$src), "vcvtph2ps",
9116                             "{sae}, $src", "$src, {sae}",
9117                             (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
9118                             T8PD, EVEX_B, Sched<[sched]>;
9119}
9120
9121let Predicates = [HasAVX512] in
9122  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
9123                                    (load addr:$src), WriteCvtPH2PSZ>,
9124                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
9125                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9126
9127let Predicates = [HasVLX] in {
9128  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
9129                       (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
9130                       EVEX_CD8<32, CD8VH>;
9131  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
9132                       (bitconvert (v2i64 (X86vzload64 addr:$src))),
9133                       WriteCvtPH2PS>, EVEX, EVEX_V128,
9134                       EVEX_CD8<32, CD8VH>;
9135
9136  // Pattern match vcvtph2ps of a scalar i64 load.
9137  def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
9138              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
9139            (VCVTPH2PSZ128rm addr:$src)>;
9140}
9141
9142multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9143                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
9144let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9145  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9146             (ins _src.RC:$src1, i32u8imm:$src2),
9147             "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9148             [(set _dest.RC:$dst,
9149                   (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
9150             Sched<[RR]>;
9151  let Constraints = "$src0 = $dst" in
9152  def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9153             (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9154             "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
9155             [(set _dest.RC:$dst,
9156                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9157                                 _dest.RC:$src0, _src.KRCWM:$mask))]>,
9158             Sched<[RR]>, EVEX_K;
9159  def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
9160             (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9161             "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
9162             [(set _dest.RC:$dst,
9163                   (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
9164                                 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
9165             Sched<[RR]>, EVEX_KZ;
9166  let hasSideEffects = 0, mayStore = 1 in {
9167    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
9168               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
9169               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9170               Sched<[MR]>;
9171    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
9172               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
9173               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
9174                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
9175  }
9176}
9177}
9178
9179multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
9180                               SchedWrite Sched> {
9181  let hasSideEffects = 0, Uses = [MXCSR] in
9182  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
9183                   (outs _dest.RC:$dst),
9184                   (ins _src.RC:$src1, i32u8imm:$src2),
9185                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
9186                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
9187}
9188
9189let Predicates = [HasAVX512] in {
9190  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
9191                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
9192                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
9193                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
9194
9195  def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
9196            (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
9197}
9198
9199let Predicates = [HasVLX] in {
9200  defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
9201                                       WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
9202                                       EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
9203  defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
9204                                       WriteCvtPS2PH, WriteCvtPS2PHSt>,
9205                                       EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
9206
9207  def : Pat<(store (f64 (extractelt
9208                         (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9209                         (iPTR 0))), addr:$dst),
9210            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9211  def : Pat<(store (i64 (extractelt
9212                         (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
9213                         (iPTR 0))), addr:$dst),
9214            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
9215  def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
9216            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
9217}
9218
9219//  Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9220multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9221                              string OpcodeStr, Domain d,
9222                              X86FoldableSchedWrite sched = WriteFComX> {
9223  let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
9224  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9225                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9226                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9227}
9228
9229let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9230  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
9231                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9232  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
9233                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9234  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
9235                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9236  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
9237                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9238}
9239
9240let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9241  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
9242                                 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9243                                 EVEX_CD8<32, CD8VT1>;
9244  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
9245                                  "ucomisd", SSEPackedDouble>, PD, EVEX,
9246                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9247  defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
9248                                 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9249                                 EVEX_CD8<32, CD8VT1>;
9250  defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
9251                                 "comisd", SSEPackedDouble>, PD, EVEX,
9252                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9253  let isCodeGenOnly = 1 in {
9254    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9255                          sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9256                          EVEX_CD8<32, CD8VT1>;
9257    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9258                          sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
9259                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9260
9261    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9262                          sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
9263                          EVEX_CD8<32, CD8VT1>;
9264    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9265                          sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
9266                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9267  }
9268}
9269
9270let Defs = [EFLAGS], Predicates = [HasFP16] in {
9271  defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
9272                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9273                                EVEX_CD8<16, CD8VT1>;
9274  defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
9275                                SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
9276                                EVEX_CD8<16, CD8VT1>;
9277  defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
9278                                "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
9279                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9280  defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
9281                                "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
9282                                VEX_LIG, EVEX_CD8<16, CD8VT1>;
9283  let isCodeGenOnly = 1 in {
9284    defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
9285                                sse_load_f16, "ucomish", SSEPackedSingle>,
9286                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9287
9288    defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
9289                                sse_load_f16, "comish", SSEPackedSingle>,
9290                                T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
9291  }
9292}
9293
9294/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
9295multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9296                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
9297                         Predicate prd = HasAVX512> {
9298  let Predicates = [prd], ExeDomain = _.ExeDomain in {
9299  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9300                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9301                           "$src2, $src1", "$src1, $src2",
9302                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9303                           EVEX_4V, VEX_LIG, Sched<[sched]>;
9304  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9305                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9306                         "$src2, $src1", "$src1, $src2",
9307                         (OpNode (_.VT _.RC:$src1),
9308                          (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
9309                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9310}
9311}
9312
9313defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
9314                               f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
9315                               T_MAP6PD;
9316defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
9317                                 SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
9318                                 EVEX_CD8<16, CD8VT1>, T_MAP6PD;
9319let Uses = [MXCSR] in {
9320defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9321                               f32x_info>, EVEX_CD8<32, CD8VT1>,
9322                               T8PD;
9323defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9324                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9325                               T8PD;
9326defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9327                                 SchedWriteFRsqrt.Scl, f32x_info>,
9328                                 EVEX_CD8<32, CD8VT1>, T8PD;
9329defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9330                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9331                                 EVEX_CD8<64, CD8VT1>, T8PD;
9332}
9333
9334/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9335multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9336                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9337  let ExeDomain = _.ExeDomain in {
9338  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9339                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9340                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9341                         Sched<[sched]>;
9342  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9343                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9344                         (OpNode (_.VT
9345                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9346                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9347  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9348                          (ins _.ScalarMemOp:$src), OpcodeStr,
9349                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9350                          (OpNode (_.VT
9351                            (_.BroadcastLdFrag addr:$src)))>,
9352                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9353  }
9354}
9355
9356multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9357                                X86SchedWriteWidths sched> {
9358  let Uses = [MXCSR] in {
9359  defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
9360                             v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9361  defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
9362                             v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9363  }
9364  let Predicates = [HasFP16] in
9365  defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
9366                           v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9367
9368  // Define only if AVX512VL feature is present.
9369  let Predicates = [HasVLX], Uses = [MXCSR] in {
9370    defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9371                                  OpNode, sched.XMM, v4f32x_info>,
9372                                  EVEX_V128, EVEX_CD8<32, CD8VF>;
9373    defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
9374                                  OpNode, sched.YMM, v8f32x_info>,
9375                                  EVEX_V256, EVEX_CD8<32, CD8VF>;
9376    defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9377                                  OpNode, sched.XMM, v2f64x_info>,
9378                                  EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9379    defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
9380                                  OpNode, sched.YMM, v4f64x_info>,
9381                                  EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9382  }
9383  let Predicates = [HasFP16, HasVLX] in {
9384    defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9385                                OpNode, sched.XMM, v8f16x_info>,
9386                                EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9387    defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
9388                                OpNode, sched.YMM, v16f16x_info>,
9389                                EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9390  }
9391}
9392
9393defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
9394defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
9395
9396/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9397multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9398                         SDNode OpNode, SDNode OpNodeSAE,
9399                         X86FoldableSchedWrite sched> {
9400  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
9401  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9402                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9403                           "$src2, $src1", "$src1, $src2",
9404                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9405                           Sched<[sched]>, SIMD_EXC;
9406
9407  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9408                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9409                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9410                            (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9411                            EVEX_B, Sched<[sched]>;
9412
9413  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9414                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9415                         "$src2, $src1", "$src1, $src2",
9416                         (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
9417                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9418  }
9419}
9420
9421multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9422                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9423  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
9424                           sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
9425  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
9426                           sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
9427}
9428
9429multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
9430                        SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
9431  let Predicates = [HasFP16] in
9432  defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode,  OpNodeSAE, sched>,
9433               EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
9434}
9435
9436let Predicates = [HasERI] in {
9437  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
9438                               SchedWriteFRcp.Scl>;
9439  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
9440                               SchedWriteFRsqrt.Scl>;
9441}
9442
9443defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9444                              SchedWriteFRnd.Scl>,
9445                 avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
9446                                  SchedWriteFRnd.Scl>;
9447/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9448
9449multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9450                         SDNode OpNode, X86FoldableSchedWrite sched> {
9451  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9452  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9453                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9454                         (OpNode (_.VT _.RC:$src))>,
9455                         Sched<[sched]>;
9456
9457  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9458                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9459                         (OpNode (_.VT
9460                             (bitconvert (_.LdFrag addr:$src))))>,
9461                          Sched<[sched.Folded, sched.ReadAfterFold]>;
9462
9463  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9464                         (ins _.ScalarMemOp:$src), OpcodeStr,
9465                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9466                         (OpNode (_.VT
9467                                  (_.BroadcastLdFrag addr:$src)))>,
9468                         EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9469  }
9470}
9471multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9472                         SDNode OpNode, X86FoldableSchedWrite sched> {
9473  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
9474  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9475                        (ins _.RC:$src), OpcodeStr,
9476                        "{sae}, $src", "$src, {sae}",
9477                        (OpNode (_.VT _.RC:$src))>,
9478                        EVEX_B, Sched<[sched]>;
9479}
9480
9481multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9482                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9483   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9484              avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
9485              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9486   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9487              avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
9488              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9489}
9490
9491multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9492                                  SDNode OpNode, X86SchedWriteWidths sched> {
9493  // Define only if AVX512VL feature is present.
9494  let Predicates = [HasVLX] in {
9495    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
9496                                sched.XMM>,
9497                                EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9498    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
9499                                sched.YMM>,
9500                                EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9501    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
9502                                sched.XMM>,
9503                                EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9504    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
9505                                sched.YMM>,
9506                                EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9507  }
9508}
9509
9510multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
9511                       SDNode OpNodeSAE, X86SchedWriteWidths sched> {
9512  let Predicates = [HasFP16] in
9513  defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
9514              avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
9515              T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
9516  let Predicates = [HasFP16, HasVLX] in {
9517    defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
9518                                     EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9519    defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
9520                                     EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
9521  }
9522}
9523let Predicates = [HasERI] in {
9524 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
9525                            SchedWriteFRsqrt>, EVEX;
9526 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
9527                            SchedWriteFRcp>, EVEX;
9528 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
9529                            SchedWriteFAdd>, EVEX;
9530}
9531defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9532                            SchedWriteFRnd>,
9533                 avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
9534                                     SchedWriteFRnd>,
9535                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
9536                                          SchedWriteFRnd>, EVEX;
9537
9538multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9539                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9540  let ExeDomain = _.ExeDomain in
9541  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9542                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9543                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
9544                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9545}
9546
9547multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9548                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9549  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9550  defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
9551                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
9552                         (_.VT (any_fsqrt _.RC:$src)),
9553                         (_.VT (fsqrt _.RC:$src))>, EVEX,
9554                         Sched<[sched]>;
9555  defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9556                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9557                         (any_fsqrt (_.VT (_.LdFrag addr:$src))),
9558                         (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
9559                         Sched<[sched.Folded, sched.ReadAfterFold]>;
9560  defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
9561                          (ins _.ScalarMemOp:$src), OpcodeStr,
9562                          "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
9563                          (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
9564                          (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
9565                          EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9566  }
9567}
9568
9569let Uses = [MXCSR], mayRaiseFPException = 1 in
9570multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9571                                  X86SchedWriteSizes sched> {
9572  let Predicates = [HasFP16] in
9573  defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9574                                sched.PH.ZMM, v32f16_info>,
9575                                EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9576  let Predicates = [HasFP16, HasVLX] in {
9577    defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9578                                     sched.PH.XMM, v8f16x_info>,
9579                                     EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9580    defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
9581                                     sched.PH.YMM, v16f16x_info>,
9582                                     EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9583  }
9584  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9585                                sched.PS.ZMM, v16f32_info>,
9586                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9587  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9588                                sched.PD.ZMM, v8f64_info>,
9589                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9590  // Define only if AVX512VL feature is present.
9591  let Predicates = [HasVLX] in {
9592    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9593                                     sched.PS.XMM, v4f32x_info>,
9594                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9595    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9596                                     sched.PS.YMM, v8f32x_info>,
9597                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9598    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9599                                     sched.PD.XMM, v2f64x_info>,
9600                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9601    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9602                                     sched.PD.YMM, v4f64x_info>,
9603                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9604  }
9605}
9606
9607let Uses = [MXCSR] in
9608multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9609                                        X86SchedWriteSizes sched> {
9610  let Predicates = [HasFP16] in
9611  defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
9612                                      sched.PH.ZMM, v32f16_info>,
9613                                      EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
9614  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9615                                      sched.PS.ZMM, v16f32_info>,
9616                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9617  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9618                                      sched.PD.ZMM, v8f64_info>,
9619                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9620}
9621
9622multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9623                              X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
9624  let ExeDomain = _.ExeDomain, Predicates = [prd] in {
9625    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9626                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9627                         "$src2, $src1", "$src1, $src2",
9628                         (X86fsqrts (_.VT _.RC:$src1),
9629                                    (_.VT _.RC:$src2))>,
9630                         Sched<[sched]>, SIMD_EXC;
9631    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9632                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9633                         "$src2, $src1", "$src1, $src2",
9634                         (X86fsqrts (_.VT _.RC:$src1),
9635                                    (_.ScalarIntMemFrags addr:$src2))>,
9636                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9637    let Uses = [MXCSR] in
9638    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9639                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9640                         "$rc, $src2, $src1", "$src1, $src2, $rc",
9641                         (X86fsqrtRnds (_.VT _.RC:$src1),
9642                                     (_.VT _.RC:$src2),
9643                                     (i32 timm:$rc))>,
9644                         EVEX_B, EVEX_RC, Sched<[sched]>;
9645
9646    let isCodeGenOnly = 1, hasSideEffects = 0 in {
9647      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9648                (ins _.FRC:$src1, _.FRC:$src2),
9649                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9650                Sched<[sched]>, SIMD_EXC;
9651      let mayLoad = 1 in
9652        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9653                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9654                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9655                  Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9656    }
9657  }
9658
9659  let Predicates = [prd] in {
9660    def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9661              (!cast<Instruction>(Name#Zr)
9662                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9663  }
9664
9665  let Predicates = [prd, OptForSize] in {
9666    def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9667              (!cast<Instruction>(Name#Zm)
9668                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9669  }
9670}
9671
9672multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9673                                  X86SchedWriteSizes sched> {
9674  defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
9675                        EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
9676  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9677                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9678  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9679                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9680}
9681
9682defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9683             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9684
9685defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9686
9687multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9688                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9689  let ExeDomain = _.ExeDomain in {
9690  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9691                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9692                           "$src3, $src2, $src1", "$src1, $src2, $src3",
9693                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9694                           (i32 timm:$src3)))>,
9695                           Sched<[sched]>, SIMD_EXC;
9696
9697  let Uses = [MXCSR] in
9698  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9699                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9700                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9701                         (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9702                         (i32 timm:$src3)))>, EVEX_B,
9703                         Sched<[sched]>;
9704
9705  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9706                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9707                         OpcodeStr,
9708                         "$src3, $src2, $src1", "$src1, $src2, $src3",
9709                         (_.VT (X86RndScales _.RC:$src1,
9710                                (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9711                         Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9712
9713  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9714    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9715               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9716               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9717               []>, Sched<[sched]>, SIMD_EXC;
9718
9719    let mayLoad = 1 in
9720      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9721                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9722                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9723                 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9724  }
9725  }
9726
9727  let Predicates = [HasAVX512] in {
9728    def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9729              (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9730               _.FRC:$src1, timm:$src2))>;
9731  }
9732
9733  let Predicates = [HasAVX512, OptForSize] in {
9734    def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9735              (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9736               addr:$src1, timm:$src2))>;
9737  }
9738}
9739
9740let Predicates = [HasFP16] in
9741defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
9742                                           SchedWriteFRnd.Scl, f16x_info>,
9743                                           AVX512PSIi8Base, TA, EVEX_4V,
9744                                           EVEX_CD8<16, CD8VT1>;
9745
9746defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9747                                           SchedWriteFRnd.Scl, f32x_info>,
9748                                           AVX512AIi8Base, EVEX_4V, VEX_LIG,
9749                                           EVEX_CD8<32, CD8VT1>;
9750
9751defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9752                                           SchedWriteFRnd.Scl, f64x_info>,
9753                                           VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9754                                           EVEX_CD8<64, CD8VT1>;
9755
9756multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9757                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9758                                dag OutMask, Predicate BasePredicate> {
9759  let Predicates = [BasePredicate] in {
9760    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9761               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9762               (extractelt _.VT:$dst, (iPTR 0))))),
9763              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9764               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9765
9766    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9767               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9768               ZeroFP))),
9769              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9770               OutMask, _.VT:$src2, _.VT:$src1)>;
9771  }
9772}
9773
9774defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
9775                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
9776                            fp16imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasFP16>;
9777defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9778                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9779                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9780defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9781                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9782                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
9783
9784
9785//-------------------------------------------------
9786// Integer truncate and extend operations
9787//-------------------------------------------------
9788
9789// PatFrags that contain a select and a truncate op. The take operands in the
9790// same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9791// either to the multiclasses.
9792def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9793                           (vselect_mask node:$mask,
9794                                         (trunc node:$src), node:$src0)>;
9795def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9796                            (vselect_mask node:$mask,
9797                                          (X86vtruncs node:$src), node:$src0)>;
9798def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9799                             (vselect_mask node:$mask,
9800                                           (X86vtruncus node:$src), node:$src0)>;
9801
9802multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9803                              SDPatternOperator MaskNode,
9804                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9805                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9806  let ExeDomain = DestInfo.ExeDomain in {
9807  def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9808             (ins SrcInfo.RC:$src),
9809             OpcodeStr # "\t{$src, $dst|$dst, $src}",
9810             [(set DestInfo.RC:$dst,
9811                   (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9812             EVEX, Sched<[sched]>;
9813  let Constraints = "$src0 = $dst" in
9814  def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9815             (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9816             OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9817             [(set DestInfo.RC:$dst,
9818                   (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9819                             (DestInfo.VT DestInfo.RC:$src0),
9820                             SrcInfo.KRCWM:$mask))]>,
9821             EVEX, EVEX_K, Sched<[sched]>;
9822  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9823             (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9824             OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9825             [(set DestInfo.RC:$dst,
9826                   (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9827                             DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9828             EVEX, EVEX_KZ, Sched<[sched]>;
9829  }
9830
9831  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9832    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9833               (ins x86memop:$dst, SrcInfo.RC:$src),
9834               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9835               EVEX, Sched<[sched.Folded]>;
9836
9837    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9838               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9839               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9840               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9841  }//mayStore = 1, hasSideEffects = 0
9842}
9843
9844multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9845                                    PatFrag truncFrag, PatFrag mtruncFrag,
9846                                    string Name> {
9847
9848  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9849            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9850                                    addr:$dst, SrcInfo.RC:$src)>;
9851
9852  def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9853                        SrcInfo.KRCWM:$mask),
9854            (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9855                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9856}
9857
9858multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9859                        SDNode OpNode256, SDNode OpNode512,
9860                        SDPatternOperator MaskNode128,
9861                        SDPatternOperator MaskNode256,
9862                        SDPatternOperator MaskNode512,
9863                        X86FoldableSchedWrite sched,
9864                        AVX512VLVectorVTInfo VTSrcInfo,
9865                        X86VectorVTInfo DestInfoZ128,
9866                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9867                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9868                        X86MemOperand x86memopZ, PatFrag truncFrag,
9869                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9870
9871  let Predicates = [HasVLX, prd] in {
9872    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9873                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9874                avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
9875                                         mtruncFrag, NAME>, EVEX_V128;
9876
9877    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9878                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9879                avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
9880                                         mtruncFrag, NAME>, EVEX_V256;
9881  }
9882  let Predicates = [prd] in
9883    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9884                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9885                avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
9886                                         mtruncFrag, NAME>, EVEX_V512;
9887}
9888
9889multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
9890                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9891                           PatFrag MaskedStoreNode, SDNode InVecNode,
9892                           SDPatternOperator InVecMaskNode> {
9893  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9894                          InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9895                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9896                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9897                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9898}
9899
9900multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9901                           SDPatternOperator MaskNode,
9902                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9903                           PatFrag MaskedStoreNode, SDNode InVecNode,
9904                           SDPatternOperator InVecMaskNode> {
9905  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9906                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9907                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9908                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9909                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9910}
9911
9912multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9913                           SDPatternOperator MaskNode,
9914                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9915                           PatFrag MaskedStoreNode, SDNode InVecNode,
9916                           SDPatternOperator InVecMaskNode> {
9917  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9918                          InVecMaskNode, MaskNode, MaskNode, sched,
9919                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9920                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9921                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9922}
9923
9924multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9925                           SDPatternOperator MaskNode,
9926                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9927                           PatFrag MaskedStoreNode, SDNode InVecNode,
9928                           SDPatternOperator InVecMaskNode> {
9929  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9930                          InVecMaskNode, InVecMaskNode, MaskNode, sched,
9931                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9932                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9933                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9934}
9935
9936multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9937                           SDPatternOperator MaskNode,
9938                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9939                           PatFrag MaskedStoreNode, SDNode InVecNode,
9940                           SDPatternOperator InVecMaskNode> {
9941  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9942                          InVecMaskNode, MaskNode, MaskNode, sched,
9943                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9944                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9945                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9946}
9947
9948multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9949                           SDPatternOperator MaskNode,
9950                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9951                           PatFrag MaskedStoreNode, SDNode InVecNode,
9952                           SDPatternOperator InVecMaskNode> {
9953  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9954                          InVecMaskNode, MaskNode, MaskNode, sched,
9955                          avx512vl_i16_info, v16i8x_info, v16i8x_info,
9956                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9957                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9958}
9959
9960defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",
9961                                  WriteShuffle256, truncstorevi8,
9962                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9963defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",
9964                                  WriteShuffle256, truncstore_s_vi8,
9965                                  masked_truncstore_s_vi8, X86vtruncs,
9966                                  X86vmtruncs>;
9967defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb",
9968                                  WriteShuffle256, truncstore_us_vi8,
9969                                  masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
9970
9971defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9972                                  WriteShuffle256, truncstorevi16,
9973                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9974defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, select_truncs,
9975                                  WriteShuffle256, truncstore_s_vi16,
9976                                  masked_truncstore_s_vi16, X86vtruncs,
9977                                  X86vmtruncs>;
9978defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9979                                  select_truncus, WriteShuffle256,
9980                                  truncstore_us_vi16, masked_truncstore_us_vi16,
9981                                  X86vtruncus, X86vmtruncus>;
9982
9983defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9984                                  WriteShuffle256, truncstorevi32,
9985                                  masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9986defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, select_truncs,
9987                                  WriteShuffle256, truncstore_s_vi32,
9988                                  masked_truncstore_s_vi32, X86vtruncs,
9989                                  X86vmtruncs>;
9990defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9991                                  select_truncus, WriteShuffle256,
9992                                  truncstore_us_vi32, masked_truncstore_us_vi32,
9993                                  X86vtruncus, X86vmtruncus>;
9994
9995defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9996                                  WriteShuffle256, truncstorevi8,
9997                                  masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9998defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9999                                  WriteShuffle256, truncstore_s_vi8,
10000                                  masked_truncstore_s_vi8, X86vtruncs,
10001                                  X86vmtruncs>;
10002defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
10003                                  select_truncus, WriteShuffle256,
10004                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10005                                  X86vtruncus, X86vmtruncus>;
10006
10007defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
10008                                  WriteShuffle256, truncstorevi16,
10009                                  masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
10010defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
10011                                  WriteShuffle256, truncstore_s_vi16,
10012                                  masked_truncstore_s_vi16, X86vtruncs,
10013                                  X86vmtruncs>;
10014defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
10015                                  select_truncus, WriteShuffle256,
10016                                  truncstore_us_vi16, masked_truncstore_us_vi16,
10017                                  X86vtruncus, X86vmtruncus>;
10018
10019defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
10020                                  WriteShuffle256, truncstorevi8,
10021                                  masked_truncstorevi8, X86vtrunc,
10022                                  X86vmtrunc>;
10023defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
10024                                  WriteShuffle256, truncstore_s_vi8,
10025                                  masked_truncstore_s_vi8, X86vtruncs,
10026                                  X86vmtruncs>;
10027defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
10028                                  select_truncus, WriteShuffle256,
10029                                  truncstore_us_vi8, masked_truncstore_us_vi8,
10030                                  X86vtruncus, X86vmtruncus>;
10031
10032let Predicates = [HasAVX512, NoVLX] in {
10033def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
10034         (v8i16 (EXTRACT_SUBREG
10035                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
10036                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
10037def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
10038         (v4i32 (EXTRACT_SUBREG
10039                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
10040                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
10041}
10042
10043let Predicates = [HasBWI, NoVLX] in {
10044def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10045         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
10046                                            VR256X:$src, sub_ymm))), sub_xmm))>;
10047}
10048
10049// Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
10050multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
10051                           X86VectorVTInfo DestInfo,
10052                           X86VectorVTInfo SrcInfo> {
10053  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10054                                 DestInfo.RC:$src0,
10055                                 SrcInfo.KRCWM:$mask)),
10056            (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
10057                                                 SrcInfo.KRCWM:$mask,
10058                                                 SrcInfo.RC:$src)>;
10059
10060  def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
10061                                 DestInfo.ImmAllZerosV,
10062                                 SrcInfo.KRCWM:$mask)),
10063            (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
10064                                                  SrcInfo.RC:$src)>;
10065}
10066
10067let Predicates = [HasVLX] in {
10068defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
10069defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
10070defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
10071}
10072
10073let Predicates = [HasAVX512] in {
10074defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
10075defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
10076defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
10077
10078defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
10079defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
10080defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
10081
10082defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
10083defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
10084defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
10085}
10086
10087multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
10088              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
10089              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
10090  let ExeDomain = DestInfo.ExeDomain in {
10091  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10092                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
10093                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
10094                  EVEX, Sched<[sched]>;
10095
10096  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10097                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
10098                  (DestInfo.VT (LdFrag addr:$src))>,
10099                EVEX, Sched<[sched.Folded]>;
10100  }
10101}
10102
10103multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
10104          SDNode OpNode, SDNode InVecNode, string ExtTy,
10105          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10106  let Predicates = [HasVLX, HasBWI] in {
10107    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
10108                    v16i8x_info, i64mem, LdFrag, InVecNode>,
10109                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10110
10111    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
10112                    v16i8x_info, i128mem, LdFrag, OpNode>,
10113                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10114  }
10115  let Predicates = [HasBWI] in {
10116    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
10117                    v32i8x_info, i256mem, LdFrag, OpNode>,
10118                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10119  }
10120}
10121
10122multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
10123          SDNode OpNode, SDNode InVecNode, string ExtTy,
10124          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10125  let Predicates = [HasVLX, HasAVX512] in {
10126    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10127                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10128                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10129
10130    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10131                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10132                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10133  }
10134  let Predicates = [HasAVX512] in {
10135    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10136                   v16i8x_info, i128mem, LdFrag, OpNode>,
10137                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10138  }
10139}
10140
10141multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
10142                              SDNode InVecNode, string ExtTy,
10143                              X86FoldableSchedWrite sched,
10144                              PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
10145  let Predicates = [HasVLX, HasAVX512] in {
10146    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10147                   v16i8x_info, i16mem, LdFrag, InVecNode>,
10148                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
10149
10150    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10151                   v16i8x_info, i32mem, LdFrag, InVecNode>,
10152                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
10153  }
10154  let Predicates = [HasAVX512] in {
10155    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10156                   v16i8x_info, i64mem, LdFrag, InVecNode>,
10157                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
10158  }
10159}
10160
10161multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
10162         SDNode OpNode, SDNode InVecNode, string ExtTy,
10163         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10164  let Predicates = [HasVLX, HasAVX512] in {
10165    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
10166                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10167                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
10168
10169    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
10170                   v8i16x_info, i128mem, LdFrag, OpNode>,
10171                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
10172  }
10173  let Predicates = [HasAVX512] in {
10174    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
10175                   v16i16x_info, i256mem, LdFrag, OpNode>,
10176                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
10177  }
10178}
10179
10180multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
10181         SDNode OpNode, SDNode InVecNode, string ExtTy,
10182         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
10183  let Predicates = [HasVLX, HasAVX512] in {
10184    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10185                   v8i16x_info, i32mem, LdFrag, InVecNode>,
10186                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
10187
10188    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10189                   v8i16x_info, i64mem, LdFrag, InVecNode>,
10190                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
10191  }
10192  let Predicates = [HasAVX512] in {
10193    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10194                   v8i16x_info, i128mem, LdFrag, OpNode>,
10195                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
10196  }
10197}
10198
10199multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
10200         SDNode OpNode, SDNode InVecNode, string ExtTy,
10201         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
10202
10203  let Predicates = [HasVLX, HasAVX512] in {
10204    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
10205                   v4i32x_info, i64mem, LdFrag, InVecNode>,
10206                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
10207
10208    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
10209                   v4i32x_info, i128mem, LdFrag, OpNode>,
10210                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
10211  }
10212  let Predicates = [HasAVX512] in {
10213    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
10214                   v8i32x_info, i256mem, LdFrag, OpNode>,
10215                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
10216  }
10217}
10218
10219defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
10220defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
10221defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq",       zext_invec, "z", WriteShuffle256>;
10222defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
10223defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
10224defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
10225
10226defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
10227defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
10228defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq",       sext_invec, "s", WriteShuffle256>;
10229defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
10230defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
10231defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
10232
10233
10234// Patterns that we also need any extend versions of. aext_vector_inreg
10235// is currently legalized to zext_vector_inreg.
10236multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
10237  // 256-bit patterns
10238  let Predicates = [HasVLX, HasBWI] in {
10239    def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10240              (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10241  }
10242
10243  let Predicates = [HasVLX] in {
10244    def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10245              (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10246
10247    def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10248              (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10249  }
10250
10251  // 512-bit patterns
10252  let Predicates = [HasBWI] in {
10253    def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10254              (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10255  }
10256  let Predicates = [HasAVX512] in {
10257    def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10258              (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10259    def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10260              (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10261
10262    def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10263              (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10264
10265    def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10266              (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10267  }
10268}
10269
10270multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10271                                 SDNode InVecOp> :
10272    AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10273  // 128-bit patterns
10274  let Predicates = [HasVLX, HasBWI] in {
10275  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10276            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10277  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10278            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10279  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10280            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10281  }
10282  let Predicates = [HasVLX] in {
10283  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10284            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10285  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10286            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10287
10288  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10289            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10290
10291  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10292            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10293  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10294            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10295  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10296            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10297
10298  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10299            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10300  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
10301            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10302
10303  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10304            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10305  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10306            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10307  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
10308            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10309  }
10310  let Predicates = [HasVLX] in {
10311  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10312            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10313  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10314            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10315  def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10316            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10317
10318  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10319            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10320  def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
10321            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10322
10323  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10324            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10325  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
10326            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10327  def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
10328            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10329  }
10330  // 512-bit patterns
10331  let Predicates = [HasAVX512] in {
10332  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10333            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10334  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10335            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10336  def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
10337            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10338  }
10339}
10340
10341defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10342defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10343
10344// Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10345// ext+trunc aggressively making it impossible to legalize the DAG to this
10346// pattern directly.
10347let Predicates = [HasAVX512, NoBWI] in {
10348def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10349         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10350def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
10351         (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10352}
10353
10354//===----------------------------------------------------------------------===//
10355// GATHER - SCATTER Operations
10356
10357// FIXME: Improve scheduling of gather/scatter instructions.
10358multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10359                         X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10360  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10361      ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
10362  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10363            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10364            !strconcat(OpcodeStr#_.Suffix,
10365            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10366            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10367            Sched<[WriteLoad, WriteVecMaskedGatherWriteback]>;
10368}
10369
10370multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10371                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10372  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
10373                                      vy512xmem>, EVEX_V512, VEX_W;
10374  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
10375                                      vz512mem>, EVEX_V512, VEX_W;
10376let Predicates = [HasVLX] in {
10377  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10378                              vx256xmem>, EVEX_V256, VEX_W;
10379  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
10380                              vy256xmem>, EVEX_V256, VEX_W;
10381  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10382                              vx128xmem>, EVEX_V128, VEX_W;
10383  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10384                              vx128xmem>, EVEX_V128, VEX_W;
10385}
10386}
10387
10388multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10389                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10390  defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10391                                       EVEX_V512;
10392  defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10393                                       EVEX_V512;
10394let Predicates = [HasVLX] in {
10395  defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
10396                                          vy256xmem>, EVEX_V256;
10397  defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10398                                          vy128xmem>, EVEX_V256;
10399  defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
10400                                          vx128xmem>, EVEX_V128;
10401  defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
10402                                          vx64xmem, VK2WM>, EVEX_V128;
10403}
10404}
10405
10406
10407defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10408               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10409
10410defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10411                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10412
10413multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10414                          X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
10415
10416let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
10417    hasSideEffects = 0 in
10418
10419  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10420            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10421            !strconcat(OpcodeStr#_.Suffix,
10422            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10423            []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10424            Sched<[WriteStore]>;
10425}
10426
10427multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10428                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10429  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
10430                                      vy512xmem>, EVEX_V512, VEX_W;
10431  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
10432                                      vz512mem>, EVEX_V512, VEX_W;
10433let Predicates = [HasVLX] in {
10434  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10435                              vx256xmem>, EVEX_V256, VEX_W;
10436  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
10437                              vy256xmem>, EVEX_V256, VEX_W;
10438  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10439                              vx128xmem>, EVEX_V128, VEX_W;
10440  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10441                              vx128xmem>, EVEX_V128, VEX_W;
10442}
10443}
10444
10445multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10446                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10447  defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
10448                                       EVEX_V512;
10449  defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
10450                                       EVEX_V512;
10451let Predicates = [HasVLX] in {
10452  defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
10453                                          vy256xmem>, EVEX_V256;
10454  defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10455                                          vy128xmem>, EVEX_V256;
10456  defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
10457                                          vx128xmem>, EVEX_V128;
10458  defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
10459                                          vx64xmem, VK2WM>, EVEX_V128;
10460}
10461}
10462
10463defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10464               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10465
10466defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10467                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10468
10469// prefetch
10470multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10471                       RegisterClass KRC, X86MemOperand memop> {
10472  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10473  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10474            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10475            EVEX, EVEX_K, Sched<[WriteLoad]>;
10476}
10477
10478defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10479                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10480
10481defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10482                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10483
10484defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10485                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10486
10487defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10488                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10489
10490defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10491                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10492
10493defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10494                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10495
10496defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10497                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10498
10499defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10500                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10501
10502defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10503                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10504
10505defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10506                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10507
10508defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10509                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10510
10511defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10512                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10513
10514defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10515                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10516
10517defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10518                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10519
10520defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10521                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10522
10523defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10524                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10525
10526multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
10527def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10528                  !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10529                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10530                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
10531}
10532
10533multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10534                                 string OpcodeStr, Predicate prd> {
10535let Predicates = [prd] in
10536  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
10537
10538  let Predicates = [prd, HasVLX] in {
10539    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
10540    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
10541  }
10542}
10543
10544defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10545defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10546defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10547defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10548
10549multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10550    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10551                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10552                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10553                        EVEX, Sched<[WriteMove]>;
10554}
10555
10556// Use 512bit version to implement 128/256 bit in case NoVLX.
10557multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10558                                           X86VectorVTInfo _,
10559                                           string Name> {
10560
10561  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10562            (_.KVT (COPY_TO_REGCLASS
10563                     (!cast<Instruction>(Name#"Zrr")
10564                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10565                                      _.RC:$src, _.SubRegIdx)),
10566                   _.KRC))>;
10567}
10568
10569multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10570                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10571  let Predicates = [prd] in
10572    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10573                                            EVEX_V512;
10574
10575  let Predicates = [prd, HasVLX] in {
10576    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10577                                              EVEX_V256;
10578    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10579                                               EVEX_V128;
10580  }
10581  let Predicates = [prd, NoVLX] in {
10582    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10583    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10584  }
10585}
10586
10587defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10588                                              avx512vl_i8_info, HasBWI>;
10589defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10590                                              avx512vl_i16_info, HasBWI>, VEX_W;
10591defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10592                                              avx512vl_i32_info, HasDQI>;
10593defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10594                                              avx512vl_i64_info, HasDQI>, VEX_W;
10595
10596// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10597// is available, but BWI is not. We can't handle this in lowering because
10598// a target independent DAG combine likes to combine sext and trunc.
10599let Predicates = [HasDQI, NoBWI] in {
10600  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10601            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10602  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10603            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10604}
10605
10606let Predicates = [HasDQI, NoBWI, HasVLX] in {
10607  def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10608            (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10609}
10610
10611//===----------------------------------------------------------------------===//
10612// AVX-512 - COMPRESS and EXPAND
10613//
10614
10615multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10616                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10617  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10618              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10619              (null_frag)>, AVX5128IBase,
10620              Sched<[sched]>;
10621
10622  let mayStore = 1, hasSideEffects = 0 in
10623  def mr : AVX5128I<opc, MRMDestMem, (outs),
10624              (ins _.MemOp:$dst, _.RC:$src),
10625              OpcodeStr # "\t{$src, $dst|$dst, $src}",
10626              []>, EVEX_CD8<_.EltSize, CD8VT1>,
10627              Sched<[sched.Folded]>;
10628
10629  def mrk : AVX5128I<opc, MRMDestMem, (outs),
10630              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10631              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10632              []>,
10633              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10634              Sched<[sched.Folded]>;
10635}
10636
10637multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10638  def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10639            (!cast<Instruction>(Name#_.ZSuffix#mrk)
10640                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10641
10642  def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10643            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10644                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10645  def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10646            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10647                            _.KRCWM:$mask, _.RC:$src)>;
10648}
10649
10650multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10651                                 X86FoldableSchedWrite sched,
10652                                 AVX512VLVectorVTInfo VTInfo,
10653                                 Predicate Pred = HasAVX512> {
10654  let Predicates = [Pred] in
10655  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10656           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10657
10658  let Predicates = [Pred, HasVLX] in {
10659    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10660                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10661    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10662                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10663  }
10664}
10665
10666// FIXME: Is there a better scheduler class for VPCOMPRESS?
10667defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10668                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10669defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10670                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10671defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10672                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10673defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10674                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10675
10676// expand
10677multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10678                                 string OpcodeStr, X86FoldableSchedWrite sched> {
10679  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10680              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10681              (null_frag)>, AVX5128IBase,
10682              Sched<[sched]>;
10683
10684  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10685              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10686              (null_frag)>,
10687            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10688            Sched<[sched.Folded, sched.ReadAfterFold]>;
10689}
10690
10691multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10692
10693  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10694            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10695                                        _.KRCWM:$mask, addr:$src)>;
10696
10697  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10698            (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10699                                        _.KRCWM:$mask, addr:$src)>;
10700
10701  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10702                                               (_.VT _.RC:$src0))),
10703            (!cast<Instruction>(Name#_.ZSuffix#rmk)
10704                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10705
10706  def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10707            (!cast<Instruction>(Name#_.ZSuffix#rrk)
10708                            _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10709  def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10710            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10711                            _.KRCWM:$mask, _.RC:$src)>;
10712}
10713
10714multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10715                               X86FoldableSchedWrite sched,
10716                               AVX512VLVectorVTInfo VTInfo,
10717                               Predicate Pred = HasAVX512> {
10718  let Predicates = [Pred] in
10719  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10720           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10721
10722  let Predicates = [Pred, HasVLX] in {
10723    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10724                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10725    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10726                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10727  }
10728}
10729
10730// FIXME: Is there a better scheduler class for VPEXPAND?
10731defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10732                                      avx512vl_i32_info>, EVEX;
10733defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10734                                      avx512vl_i64_info>, EVEX, VEX_W;
10735defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10736                                      avx512vl_f32_info>, EVEX;
10737defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10738                                      avx512vl_f64_info>, EVEX, VEX_W;
10739
10740//handle instruction  reg_vec1 = op(reg_vec,imm)
10741//                               op(mem_vec,imm)
10742//                               op(broadcast(eltVt),imm)
10743//all instruction created with FROUND_CURRENT
10744multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10745                                      SDPatternOperator OpNode,
10746                                      SDPatternOperator MaskOpNode,
10747                                      X86FoldableSchedWrite sched,
10748                                      X86VectorVTInfo _> {
10749  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10750  defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10751                      (ins _.RC:$src1, i32u8imm:$src2),
10752                      OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10753                      (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10754                      (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10755                      Sched<[sched]>;
10756  defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10757                    (ins _.MemOp:$src1, i32u8imm:$src2),
10758                    OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10759                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10760                            (i32 timm:$src2)),
10761                    (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10762                                (i32 timm:$src2))>,
10763                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10764  defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10765                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10766                    OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10767                    "${src1}"#_.BroadcastStr#", $src2",
10768                    (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10769                            (i32 timm:$src2)),
10770                    (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10771                                (i32 timm:$src2))>, EVEX_B,
10772                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10773  }
10774}
10775
10776//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10777multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10778                                          SDNode OpNode, X86FoldableSchedWrite sched,
10779                                          X86VectorVTInfo _> {
10780  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10781  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10782                      (ins _.RC:$src1, i32u8imm:$src2),
10783                      OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10784                      "$src1, {sae}, $src2",
10785                      (OpNode (_.VT _.RC:$src1),
10786                              (i32 timm:$src2))>,
10787                      EVEX_B, Sched<[sched]>;
10788}
10789
10790multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10791            AVX512VLVectorVTInfo _, bits<8> opc, SDPatternOperator OpNode,
10792            SDPatternOperator MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10793            Predicate prd>{
10794  let Predicates = [prd] in {
10795    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10796                                           sched.ZMM, _.info512>,
10797                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10798                                               sched.ZMM, _.info512>, EVEX_V512;
10799  }
10800  let Predicates = [prd, HasVLX] in {
10801    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10802                                           sched.XMM, _.info128>, EVEX_V128;
10803    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10804                                           sched.YMM, _.info256>, EVEX_V256;
10805  }
10806}
10807
10808//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10809//                               op(reg_vec2,mem_vec,imm)
10810//                               op(reg_vec2,broadcast(eltVt),imm)
10811//all instruction created with FROUND_CURRENT
10812multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10813                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10814  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10815  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10816                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10817                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10818                      (OpNode (_.VT _.RC:$src1),
10819                              (_.VT _.RC:$src2),
10820                              (i32 timm:$src3))>,
10821                      Sched<[sched]>;
10822  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10823                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10824                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10825                    (OpNode (_.VT _.RC:$src1),
10826                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
10827                            (i32 timm:$src3))>,
10828                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10829  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10830                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10831                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10832                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10833                    (OpNode (_.VT _.RC:$src1),
10834                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10835                            (i32 timm:$src3))>, EVEX_B,
10836                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10837  }
10838}
10839
10840//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10841//                               op(reg_vec2,mem_vec,imm)
10842multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10843                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10844                              X86VectorVTInfo SrcInfo>{
10845  let ExeDomain = DestInfo.ExeDomain in {
10846  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10847                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10848                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10849                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10850                               (SrcInfo.VT SrcInfo.RC:$src2),
10851                               (i8 timm:$src3)))>,
10852                  Sched<[sched]>;
10853  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10854                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10855                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10856                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10857                             (SrcInfo.VT (bitconvert
10858                                                (SrcInfo.LdFrag addr:$src2))),
10859                             (i8 timm:$src3)))>,
10860                Sched<[sched.Folded, sched.ReadAfterFold]>;
10861  }
10862}
10863
10864//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10865//                               op(reg_vec2,mem_vec,imm)
10866//                               op(reg_vec2,broadcast(eltVt),imm)
10867multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10868                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10869  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10870
10871  let ExeDomain = _.ExeDomain in
10872  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10873                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10874                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10875                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
10876                    (OpNode (_.VT _.RC:$src1),
10877                            (_.VT (_.BroadcastLdFrag addr:$src2)),
10878                            (i8 timm:$src3))>, EVEX_B,
10879                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10880}
10881
10882//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
10883//                                      op(reg_vec2,mem_scalar,imm)
10884multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10885                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10886  let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10887  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10888                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10889                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10890                      (OpNode (_.VT _.RC:$src1),
10891                              (_.VT _.RC:$src2),
10892                              (i32 timm:$src3))>,
10893                      Sched<[sched]>;
10894  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10895                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10896                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10897                    (OpNode (_.VT _.RC:$src1),
10898                            (_.ScalarIntMemFrags addr:$src2),
10899                            (i32 timm:$src3))>,
10900                    Sched<[sched.Folded, sched.ReadAfterFold]>;
10901  }
10902}
10903
10904//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10905multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10906                                    SDNode OpNode, X86FoldableSchedWrite sched,
10907                                    X86VectorVTInfo _> {
10908  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10909  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10910                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10911                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10912                      "$src1, $src2, {sae}, $src3",
10913                      (OpNode (_.VT _.RC:$src1),
10914                              (_.VT _.RC:$src2),
10915                              (i32 timm:$src3))>,
10916                      EVEX_B, Sched<[sched]>;
10917}
10918
10919//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10920multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10921                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10922  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10923  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10924                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10925                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10926                      "$src1, $src2, {sae}, $src3",
10927                      (OpNode (_.VT _.RC:$src1),
10928                              (_.VT _.RC:$src2),
10929                              (i32 timm:$src3))>,
10930                      EVEX_B, Sched<[sched]>;
10931}
10932
10933multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10934            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10935            SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10936  let Predicates = [prd] in {
10937    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10938                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10939                                  EVEX_V512;
10940
10941  }
10942  let Predicates = [prd, HasVLX] in {
10943    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10944                                  EVEX_V128;
10945    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10946                                  EVEX_V256;
10947  }
10948}
10949
10950multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10951                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10952                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10953  let Predicates = [Pred] in {
10954    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10955                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10956  }
10957  let Predicates = [Pred, HasVLX] in {
10958    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10959                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10960    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10961                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10962  }
10963}
10964
10965multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10966                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10967                                  Predicate Pred = HasAVX512> {
10968  let Predicates = [Pred] in {
10969    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10970                                EVEX_V512;
10971  }
10972  let Predicates = [Pred, HasVLX] in {
10973    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10974                                EVEX_V128;
10975    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10976                                EVEX_V256;
10977  }
10978}
10979
10980multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10981                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10982                  SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10983  let Predicates = [prd] in {
10984     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10985              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10986  }
10987}
10988
10989multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10990                    bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
10991                    SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
10992                    X86SchedWriteWidths sched, Predicate prd>{
10993  defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
10994                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
10995                            AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
10996  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10997                            opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10998                            AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
10999  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
11000                            opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
11001                            AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
11002}
11003
11004defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
11005                              X86VReduce, X86VReduce, X86VReduceSAE,
11006                              SchedWriteFRnd, HasDQI>;
11007defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
11008                              X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
11009                              SchedWriteFRnd, HasAVX512>;
11010defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
11011                              X86VGetMant, X86VGetMant, X86VGetMantSAE,
11012                              SchedWriteFRnd, HasAVX512>;
11013
11014defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
11015                                                0x50, X86VRange, X86VRangeSAE,
11016                                                SchedWriteFAdd, HasDQI>,
11017      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11018defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
11019                                                0x50, X86VRange, X86VRangeSAE,
11020                                                SchedWriteFAdd, HasDQI>,
11021      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11022
11023defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
11024      f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11025      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11026defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
11027      0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
11028      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11029
11030defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
11031      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11032      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11033defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
11034      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
11035      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11036defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
11037      0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
11038      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11039
11040defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
11041      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11042      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11043defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
11044      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
11045      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11046defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
11047      0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
11048      AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
11049
11050multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11051                                          X86FoldableSchedWrite sched,
11052                                          X86VectorVTInfo _,
11053                                          X86VectorVTInfo CastInfo,
11054                                          string EVEX2VEXOvrd> {
11055  let ExeDomain = _.ExeDomain in {
11056  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11057                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11058                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11059                  (_.VT (bitconvert
11060                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11061                                                  (i8 timm:$src3)))))>,
11062                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11063  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11064                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11065                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11066                (_.VT
11067                 (bitconvert
11068                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
11069                                           (CastInfo.LdFrag addr:$src2),
11070                                           (i8 timm:$src3)))))>,
11071                Sched<[sched.Folded, sched.ReadAfterFold]>,
11072                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11073  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11074                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11075                    OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11076                    "$src1, ${src2}"#_.BroadcastStr#", $src3",
11077                    (_.VT
11078                     (bitconvert
11079                      (CastInfo.VT
11080                       (X86Shuf128 _.RC:$src1,
11081                                   (_.BroadcastLdFrag addr:$src2),
11082                                   (i8 timm:$src3)))))>, EVEX_B,
11083                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11084  }
11085}
11086
11087multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11088                                   AVX512VLVectorVTInfo _,
11089                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11090                                   string EVEX2VEXOvrd>{
11091  let Predicates = [HasAVX512] in
11092  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11093                                          _.info512, CastInfo.info512, "">, EVEX_V512;
11094
11095  let Predicates = [HasAVX512, HasVLX] in
11096  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11097                                             _.info256, CastInfo.info256,
11098                                             EVEX2VEXOvrd>, EVEX_V256;
11099}
11100
11101defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11102      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11103defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11104      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11105defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11106      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11107defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11108      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11109
11110multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11111                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11112  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11113  // instantiation of this class.
11114  let ExeDomain = _.ExeDomain in {
11115  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11116                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11117                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11118                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
11119                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11120  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11121                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11122                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11123                (_.VT (X86VAlign _.RC:$src1,
11124                                 (bitconvert (_.LdFrag addr:$src2)),
11125                                 (i8 timm:$src3)))>,
11126                Sched<[sched.Folded, sched.ReadAfterFold]>,
11127                EVEX2VEXOverride<"VPALIGNRrmi">;
11128
11129  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11130                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11131                   OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
11132                   "$src1, ${src2}"#_.BroadcastStr#", $src3",
11133                   (X86VAlign _.RC:$src1,
11134                              (_.VT (_.BroadcastLdFrag addr:$src2)),
11135                              (i8 timm:$src3))>, EVEX_B,
11136                   Sched<[sched.Folded, sched.ReadAfterFold]>;
11137  }
11138}
11139
11140multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11141                                AVX512VLVectorVTInfo _> {
11142  let Predicates = [HasAVX512] in {
11143    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11144                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
11145  }
11146  let Predicates = [HasAVX512, HasVLX] in {
11147    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11148                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
11149    // We can't really override the 256-bit version so change it back to unset.
11150    let EVEX2VEXOverride = ? in
11151    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11152                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
11153  }
11154}
11155
11156defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11157                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11158defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11159                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11160                                   VEX_W;
11161
11162defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11163                                         SchedWriteShuffle, avx512vl_i8_info,
11164                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11165
11166// Fragments to help convert valignq into masked valignd. Or valignq/valignd
11167// into vpalignr.
11168def ValignqImm32XForm : SDNodeXForm<timm, [{
11169  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11170}]>;
11171def ValignqImm8XForm : SDNodeXForm<timm, [{
11172  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11173}]>;
11174def ValigndImm8XForm : SDNodeXForm<timm, [{
11175  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11176}]>;
11177
11178multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11179                                        X86VectorVTInfo From, X86VectorVTInfo To,
11180                                        SDNodeXForm ImmXForm> {
11181  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11182                                 (bitconvert
11183                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11184                                                   timm:$src3))),
11185                                 To.RC:$src0)),
11186            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11187                                                  To.RC:$src1, To.RC:$src2,
11188                                                  (ImmXForm timm:$src3))>;
11189
11190  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11191                                 (bitconvert
11192                                  (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11193                                                   timm:$src3))),
11194                                 To.ImmAllZerosV)),
11195            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11196                                                   To.RC:$src1, To.RC:$src2,
11197                                                   (ImmXForm timm:$src3))>;
11198
11199  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11200                                 (bitconvert
11201                                  (From.VT (OpNode From.RC:$src1,
11202                                                   (From.LdFrag addr:$src2),
11203                                           timm:$src3))),
11204                                 To.RC:$src0)),
11205            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11206                                                  To.RC:$src1, addr:$src2,
11207                                                  (ImmXForm timm:$src3))>;
11208
11209  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11210                                 (bitconvert
11211                                  (From.VT (OpNode From.RC:$src1,
11212                                                   (From.LdFrag addr:$src2),
11213                                           timm:$src3))),
11214                                 To.ImmAllZerosV)),
11215            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11216                                                   To.RC:$src1, addr:$src2,
11217                                                   (ImmXForm timm:$src3))>;
11218}
11219
11220multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11221                                           X86VectorVTInfo From,
11222                                           X86VectorVTInfo To,
11223                                           SDNodeXForm ImmXForm> :
11224      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11225  def : Pat<(From.VT (OpNode From.RC:$src1,
11226                             (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
11227                             timm:$src3)),
11228            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11229                                                  (ImmXForm timm:$src3))>;
11230
11231  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11232                                 (bitconvert
11233                                  (From.VT (OpNode From.RC:$src1,
11234                                           (bitconvert
11235                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11236                                           timm:$src3))),
11237                                 To.RC:$src0)),
11238            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11239                                                   To.RC:$src1, addr:$src2,
11240                                                   (ImmXForm timm:$src3))>;
11241
11242  def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
11243                                 (bitconvert
11244                                  (From.VT (OpNode From.RC:$src1,
11245                                           (bitconvert
11246                                            (To.VT (To.BroadcastLdFrag addr:$src2))),
11247                                           timm:$src3))),
11248                                 To.ImmAllZerosV)),
11249            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11250                                                    To.RC:$src1, addr:$src2,
11251                                                    (ImmXForm timm:$src3))>;
11252}
11253
11254let Predicates = [HasAVX512] in {
11255  // For 512-bit we lower to the widest element type we can. So we only need
11256  // to handle converting valignq to valignd.
11257  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11258                                         v16i32_info, ValignqImm32XForm>;
11259}
11260
11261let Predicates = [HasVLX] in {
11262  // For 128-bit we lower to the widest element type we can. So we only need
11263  // to handle converting valignq to valignd.
11264  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11265                                         v4i32x_info, ValignqImm32XForm>;
11266  // For 256-bit we lower to the widest element type we can. So we only need
11267  // to handle converting valignq to valignd.
11268  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11269                                         v8i32x_info, ValignqImm32XForm>;
11270}
11271
11272let Predicates = [HasVLX, HasBWI] in {
11273  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11274  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11275                                      v16i8x_info, ValignqImm8XForm>;
11276  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11277                                      v16i8x_info, ValigndImm8XForm>;
11278}
11279
11280defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11281                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11282                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11283
11284multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11285                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11286  let ExeDomain = _.ExeDomain in {
11287  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11288                    (ins _.RC:$src1), OpcodeStr,
11289                    "$src1", "$src1",
11290                    (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
11291                    Sched<[sched]>;
11292
11293  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11294                  (ins _.MemOp:$src1), OpcodeStr,
11295                  "$src1", "$src1",
11296                  (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
11297            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11298            Sched<[sched.Folded]>;
11299  }
11300}
11301
11302multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11303                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11304           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11305  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11306                  (ins _.ScalarMemOp:$src1), OpcodeStr,
11307                  "${src1}"#_.BroadcastStr,
11308                  "${src1}"#_.BroadcastStr,
11309                  (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
11310             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11311             Sched<[sched.Folded]>;
11312}
11313
11314multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11315                              X86SchedWriteWidths sched,
11316                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11317  let Predicates = [prd] in
11318    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11319                             EVEX_V512;
11320
11321  let Predicates = [prd, HasVLX] in {
11322    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11323                              EVEX_V256;
11324    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11325                              EVEX_V128;
11326  }
11327}
11328
11329multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11330                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11331                               Predicate prd> {
11332  let Predicates = [prd] in
11333    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11334                              EVEX_V512;
11335
11336  let Predicates = [prd, HasVLX] in {
11337    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11338                                 EVEX_V256;
11339    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11340                                 EVEX_V128;
11341  }
11342}
11343
11344multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11345                                 SDNode OpNode, X86SchedWriteWidths sched,
11346                                 Predicate prd> {
11347  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11348                               avx512vl_i64_info, prd>, VEX_W;
11349  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11350                               avx512vl_i32_info, prd>;
11351}
11352
11353multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11354                                 SDNode OpNode, X86SchedWriteWidths sched,
11355                                 Predicate prd> {
11356  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11357                              avx512vl_i16_info, prd>, VEX_WIG;
11358  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11359                              avx512vl_i8_info, prd>, VEX_WIG;
11360}
11361
11362multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11363                                  bits<8> opc_d, bits<8> opc_q,
11364                                  string OpcodeStr, SDNode OpNode,
11365                                  X86SchedWriteWidths sched> {
11366  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11367                                    HasAVX512>,
11368              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11369                                    HasBWI>;
11370}
11371
11372defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11373                                    SchedWriteVecALU>;
11374
11375// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11376let Predicates = [HasAVX512, NoVLX] in {
11377  def : Pat<(v4i64 (abs VR256X:$src)),
11378            (EXTRACT_SUBREG
11379                (VPABSQZrr
11380                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11381             sub_ymm)>;
11382  def : Pat<(v2i64 (abs VR128X:$src)),
11383            (EXTRACT_SUBREG
11384                (VPABSQZrr
11385                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11386             sub_xmm)>;
11387}
11388
11389// Use 512bit version to implement 128/256 bit.
11390multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11391                                 AVX512VLVectorVTInfo _, Predicate prd> {
11392  let Predicates = [prd, NoVLX] in {
11393    def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
11394              (EXTRACT_SUBREG
11395                (!cast<Instruction>(InstrStr # "Zrr")
11396                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11397                                 _.info256.RC:$src1,
11398                                 _.info256.SubRegIdx)),
11399              _.info256.SubRegIdx)>;
11400
11401    def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
11402              (EXTRACT_SUBREG
11403                (!cast<Instruction>(InstrStr # "Zrr")
11404                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11405                                 _.info128.RC:$src1,
11406                                 _.info128.SubRegIdx)),
11407              _.info128.SubRegIdx)>;
11408  }
11409}
11410
11411defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11412                                        SchedWriteVecIMul, HasCDI>;
11413
11414// FIXME: Is there a better scheduler class for VPCONFLICT?
11415defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11416                                        SchedWriteVecALU, HasCDI>;
11417
11418// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11419defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11420defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11421
11422//===---------------------------------------------------------------------===//
11423// Counts number of ones - VPOPCNTD and VPOPCNTQ
11424//===---------------------------------------------------------------------===//
11425
11426// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11427defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11428                                     SchedWriteVecALU, HasVPOPCNTDQ>;
11429
11430defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11431defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11432
11433//===---------------------------------------------------------------------===//
11434// Replicate Single FP - MOVSHDUP and MOVSLDUP
11435//===---------------------------------------------------------------------===//
11436
11437multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11438                            X86SchedWriteWidths sched> {
11439  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11440                                      avx512vl_f32_info, HasAVX512>, XS;
11441}
11442
11443defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11444                                  SchedWriteFShuffle>;
11445defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11446                                  SchedWriteFShuffle>;
11447
11448//===----------------------------------------------------------------------===//
11449// AVX-512 - MOVDDUP
11450//===----------------------------------------------------------------------===//
11451
11452multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
11453                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11454  let ExeDomain = _.ExeDomain in {
11455  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11456                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
11457                   (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
11458                   Sched<[sched]>;
11459  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11460                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11461                 (_.VT (_.BroadcastLdFrag addr:$src))>,
11462                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11463                 Sched<[sched.Folded]>;
11464  }
11465}
11466
11467multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
11468                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11469  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11470                           VTInfo.info512>, EVEX_V512;
11471
11472  let Predicates = [HasAVX512, HasVLX] in {
11473    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11474                                VTInfo.info256>, EVEX_V256;
11475    defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
11476                                   VTInfo.info128>, EVEX_V128;
11477  }
11478}
11479
11480multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
11481                          X86SchedWriteWidths sched> {
11482  defm NAME:      avx512_movddup_common<opc, OpcodeStr, sched,
11483                                        avx512vl_f64_info>, XD, VEX_W;
11484}
11485
11486defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
11487
11488let Predicates = [HasVLX] in {
11489def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11490          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11491
11492def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11493                        (v2f64 VR128X:$src0)),
11494          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11495                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11496def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11497                        immAllZerosV),
11498          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11499}
11500
11501//===----------------------------------------------------------------------===//
11502// AVX-512 - Unpack Instructions
11503//===----------------------------------------------------------------------===//
11504
11505let Uses = []<Register>, mayRaiseFPException = 0 in {
11506defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
11507                                 SchedWriteFShuffleSizes, 0, 1>;
11508defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
11509                                 SchedWriteFShuffleSizes>;
11510}
11511
11512defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11513                                       SchedWriteShuffle, HasBWI>;
11514defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11515                                       SchedWriteShuffle, HasBWI>;
11516defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11517                                       SchedWriteShuffle, HasBWI>;
11518defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11519                                       SchedWriteShuffle, HasBWI>;
11520
11521defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11522                                       SchedWriteShuffle, HasAVX512>;
11523defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11524                                       SchedWriteShuffle, HasAVX512>;
11525defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11526                                        SchedWriteShuffle, HasAVX512>;
11527defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11528                                        SchedWriteShuffle, HasAVX512>;
11529
11530//===----------------------------------------------------------------------===//
11531// AVX-512 - Extract & Insert Integer Instructions
11532//===----------------------------------------------------------------------===//
11533
11534multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11535                                                            X86VectorVTInfo _> {
11536  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11537              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11538              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11539              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), timm:$src2))),
11540                       addr:$dst)]>,
11541              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11542}
11543
11544multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11545  let Predicates = [HasBWI] in {
11546    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11547                  (ins _.RC:$src1, u8imm:$src2),
11548                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11549                  [(set GR32orGR64:$dst,
11550                        (X86pextrb (_.VT _.RC:$src1), timm:$src2))]>,
11551                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11552
11553    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11554  }
11555}
11556
11557multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11558  let Predicates = [HasBWI] in {
11559    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11560                  (ins _.RC:$src1, u8imm:$src2),
11561                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11562                  [(set GR32orGR64:$dst,
11563                        (X86pextrw (_.VT _.RC:$src1), timm:$src2))]>,
11564                  EVEX, PD, Sched<[WriteVecExtract]>;
11565
11566    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11567    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11568                   (ins _.RC:$src1, u8imm:$src2),
11569                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11570                   EVEX, TAPD, FoldGenData<NAME#rr>,
11571                   Sched<[WriteVecExtract]>;
11572
11573    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11574  }
11575}
11576
11577multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11578                                                            RegisterClass GRC> {
11579  let Predicates = [HasDQI] in {
11580    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11581                  (ins _.RC:$src1, u8imm:$src2),
11582                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11583                  [(set GRC:$dst,
11584                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11585                  EVEX, TAPD, Sched<[WriteVecExtract]>;
11586
11587    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11588                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11589                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11590                [(store (extractelt (_.VT _.RC:$src1),
11591                                    imm:$src2),addr:$dst)]>,
11592                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11593                Sched<[WriteVecExtractSt]>;
11594  }
11595}
11596
11597defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11598defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11599defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11600defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11601
11602multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11603                                            X86VectorVTInfo _, PatFrag LdFrag,
11604                                            SDPatternOperator immoperator> {
11605  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11606      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
11607      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11608      [(set _.RC:$dst,
11609          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), immoperator:$src3)))]>,
11610      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11611}
11612
11613multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11614                                            X86VectorVTInfo _, PatFrag LdFrag> {
11615  let Predicates = [HasBWI] in {
11616    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11617        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11618        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11619        [(set _.RC:$dst,
11620            (OpNode _.RC:$src1, GR32orGR64:$src2, timm:$src3))]>, EVEX_4V,
11621        Sched<[WriteVecInsert]>;
11622
11623    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag, timm>;
11624  }
11625}
11626
11627multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11628                                         X86VectorVTInfo _, RegisterClass GRC> {
11629  let Predicates = [HasDQI] in {
11630    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11631        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11632        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11633        [(set _.RC:$dst,
11634            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11635        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11636
11637    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11638                                    _.ScalarLdFrag, imm>, TAPD;
11639  }
11640}
11641
11642defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11643                                     extloadi8>, TAPD, VEX_WIG;
11644defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11645                                     extloadi16>, PD, VEX_WIG;
11646defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11647defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11648
11649//===----------------------------------------------------------------------===//
11650// VSHUFPS - VSHUFPD Operations
11651//===----------------------------------------------------------------------===//
11652
11653multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
11654  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11655                                    SchedWriteFShuffle>,
11656                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11657                                    AVX512AIi8Base, EVEX_4V;
11658}
11659
11660defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
11661defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
11662
11663//===----------------------------------------------------------------------===//
11664// AVX-512 - Byte shift Left/Right
11665//===----------------------------------------------------------------------===//
11666
11667multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11668                               Format MRMm, string OpcodeStr,
11669                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11670  def ri : AVX512<opc, MRMr,
11671             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11672             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11673             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11674             Sched<[sched]>;
11675  def mi : AVX512<opc, MRMm,
11676           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11677           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11678           [(set _.RC:$dst,(_.VT (OpNode
11679                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11680                                 (i8 timm:$src2))))]>,
11681           Sched<[sched.Folded, sched.ReadAfterFold]>;
11682}
11683
11684multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11685                                   Format MRMm, string OpcodeStr,
11686                                   X86SchedWriteWidths sched, Predicate prd>{
11687  let Predicates = [prd] in
11688    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11689                                 sched.ZMM, v64i8_info>, EVEX_V512;
11690  let Predicates = [prd, HasVLX] in {
11691    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11692                                    sched.YMM, v32i8x_info>, EVEX_V256;
11693    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11694                                    sched.XMM, v16i8x_info>, EVEX_V128;
11695  }
11696}
11697defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11698                                       SchedWriteShuffle, HasBWI>,
11699                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11700defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11701                                       SchedWriteShuffle, HasBWI>,
11702                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11703
11704multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11705                                string OpcodeStr, X86FoldableSchedWrite sched,
11706                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11707  let isCommutable = 1 in
11708  def rr : AVX512BI<opc, MRMSrcReg,
11709             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11710             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11711             [(set _dst.RC:$dst,(_dst.VT
11712                                (OpNode (_src.VT _src.RC:$src1),
11713                                        (_src.VT _src.RC:$src2))))]>,
11714             Sched<[sched]>;
11715  def rm : AVX512BI<opc, MRMSrcMem,
11716           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11717           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11718           [(set _dst.RC:$dst,(_dst.VT
11719                              (OpNode (_src.VT _src.RC:$src1),
11720                              (_src.VT (bitconvert
11721                                        (_src.LdFrag addr:$src2))))))]>,
11722           Sched<[sched.Folded, sched.ReadAfterFold]>;
11723}
11724
11725multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11726                                    string OpcodeStr, X86SchedWriteWidths sched,
11727                                    Predicate prd> {
11728  let Predicates = [prd] in
11729    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11730                                  v8i64_info, v64i8_info>, EVEX_V512;
11731  let Predicates = [prd, HasVLX] in {
11732    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11733                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11734    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11735                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11736  }
11737}
11738
11739defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11740                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11741
11742// Transforms to swizzle an immediate to enable better matching when
11743// memory operand isn't in the right place.
11744def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11745  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11746  uint8_t Imm = N->getZExtValue();
11747  // Swap bits 1/4 and 3/6.
11748  uint8_t NewImm = Imm & 0xa5;
11749  if (Imm & 0x02) NewImm |= 0x10;
11750  if (Imm & 0x10) NewImm |= 0x02;
11751  if (Imm & 0x08) NewImm |= 0x40;
11752  if (Imm & 0x40) NewImm |= 0x08;
11753  return getI8Imm(NewImm, SDLoc(N));
11754}]>;
11755def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11756  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11757  uint8_t Imm = N->getZExtValue();
11758  // Swap bits 2/4 and 3/5.
11759  uint8_t NewImm = Imm & 0xc3;
11760  if (Imm & 0x04) NewImm |= 0x10;
11761  if (Imm & 0x10) NewImm |= 0x04;
11762  if (Imm & 0x08) NewImm |= 0x20;
11763  if (Imm & 0x20) NewImm |= 0x08;
11764  return getI8Imm(NewImm, SDLoc(N));
11765}]>;
11766def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11767  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11768  uint8_t Imm = N->getZExtValue();
11769  // Swap bits 1/2 and 5/6.
11770  uint8_t NewImm = Imm & 0x99;
11771  if (Imm & 0x02) NewImm |= 0x04;
11772  if (Imm & 0x04) NewImm |= 0x02;
11773  if (Imm & 0x20) NewImm |= 0x40;
11774  if (Imm & 0x40) NewImm |= 0x20;
11775  return getI8Imm(NewImm, SDLoc(N));
11776}]>;
11777def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11778  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11779  uint8_t Imm = N->getZExtValue();
11780  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11781  uint8_t NewImm = Imm & 0x81;
11782  if (Imm & 0x02) NewImm |= 0x04;
11783  if (Imm & 0x04) NewImm |= 0x10;
11784  if (Imm & 0x08) NewImm |= 0x40;
11785  if (Imm & 0x10) NewImm |= 0x02;
11786  if (Imm & 0x20) NewImm |= 0x08;
11787  if (Imm & 0x40) NewImm |= 0x20;
11788  return getI8Imm(NewImm, SDLoc(N));
11789}]>;
11790def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11791  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11792  uint8_t Imm = N->getZExtValue();
11793  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11794  uint8_t NewImm = Imm & 0x81;
11795  if (Imm & 0x02) NewImm |= 0x10;
11796  if (Imm & 0x04) NewImm |= 0x02;
11797  if (Imm & 0x08) NewImm |= 0x20;
11798  if (Imm & 0x10) NewImm |= 0x04;
11799  if (Imm & 0x20) NewImm |= 0x40;
11800  if (Imm & 0x40) NewImm |= 0x08;
11801  return getI8Imm(NewImm, SDLoc(N));
11802}]>;
11803
11804multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11805                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11806                          string Name>{
11807  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11808  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11809                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11810                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11811                      (OpNode (_.VT _.RC:$src1),
11812                              (_.VT _.RC:$src2),
11813                              (_.VT _.RC:$src3),
11814                              (i8 timm:$src4)), 1, 1>,
11815                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11816  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11817                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11818                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11819                    (OpNode (_.VT _.RC:$src1),
11820                            (_.VT _.RC:$src2),
11821                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11822                            (i8 timm:$src4)), 1, 0>,
11823                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11824                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11825  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11826                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11827                    OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11828                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
11829                    (OpNode (_.VT _.RC:$src1),
11830                            (_.VT _.RC:$src2),
11831                            (_.VT (_.BroadcastLdFrag addr:$src3)),
11832                            (i8 timm:$src4)), 1, 0>, EVEX_B,
11833                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11834                    Sched<[sched.Folded, sched.ReadAfterFold]>;
11835  }// Constraints = "$src1 = $dst"
11836
11837  // Additional patterns for matching passthru operand in other positions.
11838  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11839                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11840                   _.RC:$src1)),
11841            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11842             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11843  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11844                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11845                   _.RC:$src1)),
11846            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11847             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11848
11849  // Additional patterns for matching zero masking with loads in other
11850  // positions.
11851  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11852                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11853                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11854                   _.ImmAllZerosV)),
11855            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11856             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11857  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11858                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11859                    _.RC:$src2, (i8 timm:$src4)),
11860                   _.ImmAllZerosV)),
11861            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11862             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11863
11864  // Additional patterns for matching masked loads with different
11865  // operand orders.
11866  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11867                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11868                    _.RC:$src2, (i8 timm:$src4)),
11869                   _.RC:$src1)),
11870            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11871             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11872  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11873                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11874                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11875                   _.RC:$src1)),
11876            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11877             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11878  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11879                   (OpNode _.RC:$src2, _.RC:$src1,
11880                    (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11881                   _.RC:$src1)),
11882            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11883             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11884  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11885                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11886                    _.RC:$src1, (i8 timm:$src4)),
11887                   _.RC:$src1)),
11888            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11889             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11890  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11891                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11892                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11893                   _.RC:$src1)),
11894            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11895             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11896
11897  // Additional patterns for matching zero masking with broadcasts in other
11898  // positions.
11899  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11900                   (OpNode (_.BroadcastLdFrag addr:$src3),
11901                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11902                   _.ImmAllZerosV)),
11903            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11904             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11905             (VPTERNLOG321_imm8 timm:$src4))>;
11906  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11907                   (OpNode _.RC:$src1,
11908                    (_.BroadcastLdFrag addr:$src3),
11909                    _.RC:$src2, (i8 timm:$src4)),
11910                   _.ImmAllZerosV)),
11911            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11912             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11913             (VPTERNLOG132_imm8 timm:$src4))>;
11914
11915  // Additional patterns for matching masked broadcasts with different
11916  // operand orders.
11917  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11918                   (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11919                    _.RC:$src2, (i8 timm:$src4)),
11920                   _.RC:$src1)),
11921            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11922             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11923  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11924                   (OpNode (_.BroadcastLdFrag addr:$src3),
11925                    _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11926                   _.RC:$src1)),
11927            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11928             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11929  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11930                   (OpNode _.RC:$src2, _.RC:$src1,
11931                    (_.BroadcastLdFrag addr:$src3),
11932                    (i8 timm:$src4)), _.RC:$src1)),
11933            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11934             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11935  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11936                   (OpNode _.RC:$src2,
11937                    (_.BroadcastLdFrag addr:$src3),
11938                    _.RC:$src1, (i8 timm:$src4)),
11939                   _.RC:$src1)),
11940            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11941             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11942  def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11943                   (OpNode (_.BroadcastLdFrag addr:$src3),
11944                    _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11945                   _.RC:$src1)),
11946            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11947             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11948}
11949
11950multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11951                                 AVX512VLVectorVTInfo _> {
11952  let Predicates = [HasAVX512] in
11953    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11954                               _.info512, NAME>, EVEX_V512;
11955  let Predicates = [HasAVX512, HasVLX] in {
11956    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11957                               _.info128, NAME>, EVEX_V128;
11958    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11959                               _.info256, NAME>, EVEX_V256;
11960  }
11961}
11962
11963defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11964                                        avx512vl_i32_info>;
11965defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11966                                        avx512vl_i64_info>, VEX_W;
11967
11968// Patterns to implement vnot using vpternlog instead of creating all ones
11969// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11970// so that the result is only dependent on src0. But we use the same source
11971// for all operands to prevent a false dependency.
11972// TODO: We should maybe have a more generalized algorithm for folding to
11973// vpternlog.
11974let Predicates = [HasAVX512] in {
11975  def : Pat<(v64i8 (vnot VR512:$src)),
11976            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11977  def : Pat<(v32i16 (vnot VR512:$src)),
11978            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11979  def : Pat<(v16i32 (vnot VR512:$src)),
11980            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11981  def : Pat<(v8i64 (vnot VR512:$src)),
11982            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11983}
11984
11985let Predicates = [HasAVX512, NoVLX] in {
11986  def : Pat<(v16i8 (vnot VR128X:$src)),
11987            (EXTRACT_SUBREG
11988             (VPTERNLOGQZrri
11989              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11990              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11991              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11992              (i8 15)), sub_xmm)>;
11993  def : Pat<(v8i16 (vnot VR128X:$src)),
11994            (EXTRACT_SUBREG
11995             (VPTERNLOGQZrri
11996              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11997              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11998              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11999              (i8 15)), sub_xmm)>;
12000  def : Pat<(v4i32 (vnot VR128X:$src)),
12001            (EXTRACT_SUBREG
12002             (VPTERNLOGQZrri
12003              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12004              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12005              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12006              (i8 15)), sub_xmm)>;
12007  def : Pat<(v2i64 (vnot VR128X:$src)),
12008            (EXTRACT_SUBREG
12009             (VPTERNLOGQZrri
12010              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12011              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12012              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12013              (i8 15)), sub_xmm)>;
12014
12015  def : Pat<(v32i8 (vnot VR256X:$src)),
12016            (EXTRACT_SUBREG
12017             (VPTERNLOGQZrri
12018              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12019              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12020              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12021              (i8 15)), sub_ymm)>;
12022  def : Pat<(v16i16 (vnot VR256X:$src)),
12023            (EXTRACT_SUBREG
12024             (VPTERNLOGQZrri
12025              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12026              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12027              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12028              (i8 15)), sub_ymm)>;
12029  def : Pat<(v8i32 (vnot VR256X:$src)),
12030            (EXTRACT_SUBREG
12031             (VPTERNLOGQZrri
12032              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12033              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12034              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12035              (i8 15)), sub_ymm)>;
12036  def : Pat<(v4i64 (vnot VR256X:$src)),
12037            (EXTRACT_SUBREG
12038             (VPTERNLOGQZrri
12039              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12040              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12041              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12042              (i8 15)), sub_ymm)>;
12043}
12044
12045let Predicates = [HasVLX] in {
12046  def : Pat<(v16i8 (vnot VR128X:$src)),
12047            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12048  def : Pat<(v8i16 (vnot VR128X:$src)),
12049            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12050  def : Pat<(v4i32 (vnot VR128X:$src)),
12051            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12052  def : Pat<(v2i64 (vnot VR128X:$src)),
12053            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12054
12055  def : Pat<(v32i8 (vnot VR256X:$src)),
12056            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12057  def : Pat<(v16i16 (vnot VR256X:$src)),
12058            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12059  def : Pat<(v8i32 (vnot VR256X:$src)),
12060            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12061  def : Pat<(v4i64 (vnot VR256X:$src)),
12062            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12063}
12064
12065//===----------------------------------------------------------------------===//
12066// AVX-512 - FixupImm
12067//===----------------------------------------------------------------------===//
12068
12069multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
12070                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12071                                  X86VectorVTInfo TblVT>{
12072  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
12073      Uses = [MXCSR], mayRaiseFPException = 1 in {
12074    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12075                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12076                         OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12077                        (X86VFixupimm (_.VT _.RC:$src1),
12078                                      (_.VT _.RC:$src2),
12079                                      (TblVT.VT _.RC:$src3),
12080                                      (i32 timm:$src4))>, Sched<[sched]>;
12081    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12082                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12083                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12084                      (X86VFixupimm (_.VT _.RC:$src1),
12085                                    (_.VT _.RC:$src2),
12086                                    (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12087                                    (i32 timm:$src4))>,
12088                      Sched<[sched.Folded, sched.ReadAfterFold]>;
12089    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12090                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12091                    OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
12092                    "$src2, ${src3}"#_.BroadcastStr#", $src4",
12093                      (X86VFixupimm (_.VT _.RC:$src1),
12094                                    (_.VT _.RC:$src2),
12095                                    (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
12096                                    (i32 timm:$src4))>,
12097                    EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12098  } // Constraints = "$src1 = $dst"
12099}
12100
12101multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12102                                      X86FoldableSchedWrite sched,
12103                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>
12104  : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
12105let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
12106  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12107                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12108                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12109                      "$src2, $src3, {sae}, $src4",
12110                      (X86VFixupimmSAE (_.VT _.RC:$src1),
12111                                       (_.VT _.RC:$src2),
12112                                       (TblVT.VT _.RC:$src3),
12113                                       (i32 timm:$src4))>,
12114                      EVEX_B, Sched<[sched]>;
12115  }
12116}
12117
12118multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
12119                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
12120                                  X86VectorVTInfo _src3VT> {
12121  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12122      ExeDomain = _.ExeDomain in {
12123    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12124                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12125                      OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12126                      (X86VFixupimms (_.VT _.RC:$src1),
12127                                     (_.VT _.RC:$src2),
12128                                     (_src3VT.VT _src3VT.RC:$src3),
12129                                     (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
12130    let Uses = [MXCSR] in
12131    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12132                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12133                      OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
12134                      "$src2, $src3, {sae}, $src4",
12135                      (X86VFixupimmSAEs (_.VT _.RC:$src1),
12136                                        (_.VT _.RC:$src2),
12137                                        (_src3VT.VT _src3VT.RC:$src3),
12138                                        (i32 timm:$src4))>,
12139                      EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12140    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12141                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12142                     OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12143                     (X86VFixupimms (_.VT _.RC:$src1),
12144                                    (_.VT _.RC:$src2),
12145                                    (_src3VT.VT (scalar_to_vector
12146                                              (_src3VT.ScalarLdFrag addr:$src3))),
12147                                    (i32 timm:$src4))>,
12148                     Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
12149  }
12150}
12151
12152multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12153                                      AVX512VLVectorVTInfo _Vec,
12154                                      AVX512VLVectorVTInfo _Tbl> {
12155  let Predicates = [HasAVX512] in
12156    defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
12157                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12158                                EVEX_4V, EVEX_V512;
12159  let Predicates = [HasAVX512, HasVLX] in {
12160    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
12161                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12162                            EVEX_4V, EVEX_V128;
12163    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
12164                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12165                            EVEX_4V, EVEX_V256;
12166  }
12167}
12168
12169defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12170                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12171                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12172defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
12173                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12174                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12175defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12176                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12177defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12178                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12179
12180// Patterns used to select SSE scalar fp arithmetic instructions from
12181// either:
12182//
12183// (1) a scalar fp operation followed by a blend
12184//
12185// The effect is that the backend no longer emits unnecessary vector
12186// insert instructions immediately after SSE scalar fp instructions
12187// like addss or mulss.
12188//
12189// For example, given the following code:
12190//   __m128 foo(__m128 A, __m128 B) {
12191//     A[0] += B[0];
12192//     return A;
12193//   }
12194//
12195// Previously we generated:
12196//   addss %xmm0, %xmm1
12197//   movss %xmm1, %xmm0
12198//
12199// We now generate:
12200//   addss %xmm1, %xmm0
12201//
12202// (2) a vector packed single/double fp operation followed by a vector insert
12203//
12204// The effect is that the backend converts the packed fp instruction
12205// followed by a vector insert into a single SSE scalar fp instruction.
12206//
12207// For example, given the following code:
12208//   __m128 foo(__m128 A, __m128 B) {
12209//     __m128 C = A + B;
12210//     return (__m128) {c[0], a[1], a[2], a[3]};
12211//   }
12212//
12213// Previously we generated:
12214//   addps %xmm0, %xmm1
12215//   movss %xmm1, %xmm0
12216//
12217// We now generate:
12218//   addss %xmm1, %xmm0
12219
12220// TODO: Some canonicalization in lowering would simplify the number of
12221// patterns we have to try to match.
12222multiclass AVX512_scalar_math_fp_patterns<SDPatternOperator Op, SDNode MaskedOp,
12223                                          string OpcPrefix, SDNode MoveNode,
12224                                          X86VectorVTInfo _, PatLeaf ZeroFP> {
12225  let Predicates = [HasAVX512] in {
12226    // extracted scalar math op with insert via movss
12227    def : Pat<(MoveNode
12228               (_.VT VR128X:$dst),
12229               (_.VT (scalar_to_vector
12230                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12231                          _.FRC:$src)))),
12232              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12233               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12234    def : Pat<(MoveNode
12235               (_.VT VR128X:$dst),
12236               (_.VT (scalar_to_vector
12237                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12238                          (_.ScalarLdFrag addr:$src))))),
12239              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12240
12241    // extracted masked scalar math op with insert via movss
12242    def : Pat<(MoveNode (_.VT VR128X:$src1),
12243               (scalar_to_vector
12244                (X86selects_mask VK1WM:$mask,
12245                            (MaskedOp (_.EltVT
12246                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12247                                      _.FRC:$src2),
12248                            _.FRC:$src0))),
12249              (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12250               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12251               VK1WM:$mask, _.VT:$src1,
12252               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12253    def : Pat<(MoveNode (_.VT VR128X:$src1),
12254               (scalar_to_vector
12255                (X86selects_mask VK1WM:$mask,
12256                            (MaskedOp (_.EltVT
12257                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12258                                      (_.ScalarLdFrag addr:$src2)),
12259                            _.FRC:$src0))),
12260              (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12261               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12262               VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12263
12264    // extracted masked scalar math op with insert via movss
12265    def : Pat<(MoveNode (_.VT VR128X:$src1),
12266               (scalar_to_vector
12267                (X86selects_mask VK1WM:$mask,
12268                            (MaskedOp (_.EltVT
12269                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12270                                      _.FRC:$src2), (_.EltVT ZeroFP)))),
12271      (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12272          VK1WM:$mask, _.VT:$src1,
12273          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12274    def : Pat<(MoveNode (_.VT VR128X:$src1),
12275               (scalar_to_vector
12276                (X86selects_mask VK1WM:$mask,
12277                            (MaskedOp (_.EltVT
12278                                       (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12279                                      (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12280      (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12281  }
12282}
12283
12284defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12285defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12286defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12287defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12288
12289defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12290defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12291defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12292defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12293
12294defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
12295defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
12296defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
12297defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
12298
12299multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
12300                                             SDNode Move, X86VectorVTInfo _> {
12301  let Predicates = [HasAVX512] in {
12302    def : Pat<(_.VT (Move _.VT:$dst,
12303                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12304              (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12305  }
12306}
12307
12308defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12309defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12310defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
12311
12312//===----------------------------------------------------------------------===//
12313// AES instructions
12314//===----------------------------------------------------------------------===//
12315
12316multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12317  let Predicates = [HasVLX, HasVAES] in {
12318    defm Z128 : AESI_binop_rm_int<Op, OpStr,
12319                                  !cast<Intrinsic>(IntPrefix),
12320                                  loadv2i64, 0, VR128X, i128mem>,
12321                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12322    defm Z256 : AESI_binop_rm_int<Op, OpStr,
12323                                  !cast<Intrinsic>(IntPrefix#"_256"),
12324                                  loadv4i64, 0, VR256X, i256mem>,
12325                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12326    }
12327    let Predicates = [HasAVX512, HasVAES] in
12328    defm Z    : AESI_binop_rm_int<Op, OpStr,
12329                                  !cast<Intrinsic>(IntPrefix#"_512"),
12330                                  loadv8i64, 0, VR512, i512mem>,
12331                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12332}
12333
12334defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12335defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12336defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12337defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12338
12339//===----------------------------------------------------------------------===//
12340// PCLMUL instructions - Carry less multiplication
12341//===----------------------------------------------------------------------===//
12342
12343let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12344defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12345                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12346
12347let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12348defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12349                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12350
12351defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12352                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12353                                EVEX_CD8<64, CD8VF>, VEX_WIG;
12354}
12355
12356// Aliases
12357defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12358defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12359defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12360
12361//===----------------------------------------------------------------------===//
12362// VBMI2
12363//===----------------------------------------------------------------------===//
12364
12365multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12366                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12367  let Constraints = "$src1 = $dst",
12368      ExeDomain   = VTI.ExeDomain in {
12369    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12370                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12371                "$src3, $src2", "$src2, $src3",
12372                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12373                T8PD, EVEX_4V, Sched<[sched]>;
12374    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12375                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12376                "$src3, $src2", "$src2, $src3",
12377                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12378                        (VTI.VT (VTI.LdFrag addr:$src3))))>,
12379                T8PD, EVEX_4V,
12380                Sched<[sched.Folded, sched.ReadAfterFold]>;
12381  }
12382}
12383
12384multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12385                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12386         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12387  let Constraints = "$src1 = $dst",
12388      ExeDomain   = VTI.ExeDomain in
12389  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12390              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12391              "${src3}"#VTI.BroadcastStr#", $src2",
12392              "$src2, ${src3}"#VTI.BroadcastStr,
12393              (OpNode VTI.RC:$src1, VTI.RC:$src2,
12394               (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12395              T8PD, EVEX_4V, EVEX_B,
12396              Sched<[sched.Folded, sched.ReadAfterFold]>;
12397}
12398
12399multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12400                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12401  let Predicates = [HasVBMI2] in
12402  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12403                                   EVEX_V512;
12404  let Predicates = [HasVBMI2, HasVLX] in {
12405    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12406                                   EVEX_V256;
12407    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12408                                   EVEX_V128;
12409  }
12410}
12411
12412multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12413                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12414  let Predicates = [HasVBMI2] in
12415  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12416                                    EVEX_V512;
12417  let Predicates = [HasVBMI2, HasVLX] in {
12418    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12419                                    EVEX_V256;
12420    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12421                                    EVEX_V128;
12422  }
12423}
12424multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12425                           SDNode OpNode, X86SchedWriteWidths sched> {
12426  defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12427             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12428  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12429             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12430  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12431             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12432}
12433
12434multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12435                           SDNode OpNode, X86SchedWriteWidths sched> {
12436  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12437             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12438             VEX_W, EVEX_CD8<16, CD8VF>;
12439  defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12440             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12441  defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12442             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12443}
12444
12445// Concat & Shift
12446defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12447defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12448defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12449defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12450
12451// Compress
12452defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12453                                         avx512vl_i8_info, HasVBMI2>, EVEX,
12454                                         NotMemoryFoldable;
12455defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12456                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12457                                          NotMemoryFoldable;
12458// Expand
12459defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12460                                      avx512vl_i8_info, HasVBMI2>, EVEX;
12461defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12462                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12463
12464//===----------------------------------------------------------------------===//
12465// VNNI
12466//===----------------------------------------------------------------------===//
12467
12468let Constraints = "$src1 = $dst" in
12469multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12470                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12471                    bit IsCommutable> {
12472  let ExeDomain = VTI.ExeDomain in {
12473  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12474                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12475                                   "$src3, $src2", "$src2, $src3",
12476                                   (VTI.VT (OpNode VTI.RC:$src1,
12477                                            VTI.RC:$src2, VTI.RC:$src3)),
12478                                   IsCommutable, IsCommutable>,
12479                                   EVEX_4V, T8PD, Sched<[sched]>;
12480  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12481                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12482                                   "$src3, $src2", "$src2, $src3",
12483                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12484                                            (VTI.VT (VTI.LdFrag addr:$src3))))>,
12485                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12486                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
12487  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12488                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12489                                   OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12490                                   "$src2, ${src3}"#VTI.BroadcastStr,
12491                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
12492                                    (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12493                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12494                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12495  }
12496}
12497
12498multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12499                       X86SchedWriteWidths sched, bit IsCommutable> {
12500  let Predicates = [HasVNNI] in
12501  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12502                           IsCommutable>, EVEX_V512;
12503  let Predicates = [HasVNNI, HasVLX] in {
12504    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12505                           IsCommutable>, EVEX_V256;
12506    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12507                           IsCommutable>, EVEX_V128;
12508  }
12509}
12510
12511// FIXME: Is there a better scheduler class for VPDP?
12512defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12513defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12514defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12515defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12516
12517// Patterns to match VPDPWSSD from existing instructions/intrinsics.
12518let Predicates = [HasVNNI] in {
12519  def : Pat<(v16i32 (add VR512:$src1,
12520                         (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12521            (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12522  def : Pat<(v16i32 (add VR512:$src1,
12523                         (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12524            (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12525}
12526let Predicates = [HasVNNI,HasVLX] in {
12527  def : Pat<(v8i32 (add VR256X:$src1,
12528                        (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12529            (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12530  def : Pat<(v8i32 (add VR256X:$src1,
12531                        (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12532            (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12533  def : Pat<(v4i32 (add VR128X:$src1,
12534                        (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12535            (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12536  def : Pat<(v4i32 (add VR128X:$src1,
12537                        (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12538            (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12539}
12540
12541//===----------------------------------------------------------------------===//
12542// Bit Algorithms
12543//===----------------------------------------------------------------------===//
12544
12545// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12546defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12547                                   avx512vl_i8_info, HasBITALG>;
12548defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12549                                   avx512vl_i16_info, HasBITALG>, VEX_W;
12550
12551defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12552defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12553
12554def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12555                                 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12556  return N->hasOneUse();
12557}]>;
12558
12559multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12560  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12561                                (ins VTI.RC:$src1, VTI.RC:$src2),
12562                                "vpshufbitqmb",
12563                                "$src2, $src1", "$src1, $src2",
12564                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12565                                (VTI.VT VTI.RC:$src2)),
12566                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12567                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12568                                Sched<[sched]>;
12569  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12570                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
12571                                "vpshufbitqmb",
12572                                "$src2, $src1", "$src1, $src2",
12573                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12574                                (VTI.VT (VTI.LdFrag addr:$src2))),
12575                                (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12576                                (VTI.VT (VTI.LdFrag addr:$src2)))>,
12577                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12578                                Sched<[sched.Folded, sched.ReadAfterFold]>;
12579}
12580
12581multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12582  let Predicates = [HasBITALG] in
12583  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12584  let Predicates = [HasBITALG, HasVLX] in {
12585    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12586    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12587  }
12588}
12589
12590// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12591defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12592
12593//===----------------------------------------------------------------------===//
12594// GFNI
12595//===----------------------------------------------------------------------===//
12596
12597multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12598                                   X86SchedWriteWidths sched> {
12599  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12600  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12601                                EVEX_V512;
12602  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12603    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12604                                EVEX_V256;
12605    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12606                                EVEX_V128;
12607  }
12608}
12609
12610defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12611                                          SchedWriteVecALU>,
12612                                          EVEX_CD8<8, CD8VF>, T8PD;
12613
12614multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12615                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12616                                      X86VectorVTInfo BcstVTI>
12617           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12618  let ExeDomain = VTI.ExeDomain in
12619  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12620                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12621                OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12622                "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12623                (OpNode (VTI.VT VTI.RC:$src1),
12624                 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12625                 (i8 timm:$src3))>, EVEX_B,
12626                 Sched<[sched.Folded, sched.ReadAfterFold]>;
12627}
12628
12629multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12630                                     X86SchedWriteWidths sched> {
12631  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12632  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12633                                           v64i8_info, v8i64_info>, EVEX_V512;
12634  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12635    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12636                                           v32i8x_info, v4i64x_info>, EVEX_V256;
12637    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12638                                           v16i8x_info, v2i64x_info>, EVEX_V128;
12639  }
12640}
12641
12642defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12643                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
12644                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12645defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12646                         X86GF2P8affineqb, SchedWriteVecIMul>,
12647                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12648
12649
12650//===----------------------------------------------------------------------===//
12651// AVX5124FMAPS
12652//===----------------------------------------------------------------------===//
12653
12654let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12655    Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12656defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12657                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12658                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
12659                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12660                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12661
12662defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12663                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12664                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12665                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12666                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12667
12668defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12669                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
12670                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
12671                    []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12672                    Sched<[SchedWriteFMA.Scl.Folded]>;
12673
12674defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12675                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12676                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12677                     []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12678                     Sched<[SchedWriteFMA.Scl.Folded]>;
12679}
12680
12681//===----------------------------------------------------------------------===//
12682// AVX5124VNNIW
12683//===----------------------------------------------------------------------===//
12684
12685let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12686    Constraints = "$src1 = $dst" in {
12687defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12688                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12689                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12690                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12691                    Sched<[SchedWriteFMA.ZMM.Folded]>;
12692
12693defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12694                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12695                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12696                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12697                     Sched<[SchedWriteFMA.ZMM.Folded]>;
12698}
12699
12700let hasSideEffects = 0 in {
12701  let mayStore = 1, SchedRW = [WriteFStoreX] in
12702  def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12703  let mayLoad = 1, SchedRW = [WriteFLoadX] in
12704  def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12705}
12706
12707//===----------------------------------------------------------------------===//
12708// VP2INTERSECT
12709//===----------------------------------------------------------------------===//
12710
12711multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12712  def rr : I<0x68, MRMSrcReg,
12713                  (outs _.KRPC:$dst),
12714                  (ins _.RC:$src1, _.RC:$src2),
12715                  !strconcat("vp2intersect", _.Suffix,
12716                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12717                  [(set _.KRPC:$dst, (X86vp2intersect
12718                            _.RC:$src1, (_.VT _.RC:$src2)))]>,
12719                  EVEX_4V, T8XD, Sched<[sched]>;
12720
12721  def rm : I<0x68, MRMSrcMem,
12722                  (outs _.KRPC:$dst),
12723                  (ins  _.RC:$src1, _.MemOp:$src2),
12724                  !strconcat("vp2intersect", _.Suffix,
12725                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12726                  [(set _.KRPC:$dst, (X86vp2intersect
12727                            _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12728                  EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12729                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12730
12731  def rmb : I<0x68, MRMSrcMem,
12732                  (outs _.KRPC:$dst),
12733                  (ins _.RC:$src1, _.ScalarMemOp:$src2),
12734                  !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12735                             ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12736                  [(set _.KRPC:$dst, (X86vp2intersect
12737                             _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12738                  EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12739                  Sched<[sched.Folded, sched.ReadAfterFold]>;
12740}
12741
12742multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12743  let Predicates  = [HasAVX512, HasVP2INTERSECT] in
12744    defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12745
12746  let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12747    defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12748    defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12749  }
12750}
12751
12752defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12753defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12754
12755multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12756                             X86SchedWriteWidths sched,
12757                             AVX512VLVectorVTInfo _SrcVTInfo,
12758                             AVX512VLVectorVTInfo _DstVTInfo,
12759                             SDNode OpNode, Predicate prd,
12760                             bit IsCommutable = 0> {
12761  let Predicates = [prd] in
12762    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12763                                   _SrcVTInfo.info512, _DstVTInfo.info512,
12764                                   _SrcVTInfo.info512, IsCommutable>,
12765                                   EVEX_V512, EVEX_CD8<32, CD8VF>;
12766  let Predicates = [HasVLX, prd] in {
12767    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12768                                      _SrcVTInfo.info256, _DstVTInfo.info256,
12769                                      _SrcVTInfo.info256, IsCommutable>,
12770                                     EVEX_V256, EVEX_CD8<32, CD8VF>;
12771    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12772                                      _SrcVTInfo.info128, _DstVTInfo.info128,
12773                                      _SrcVTInfo.info128, IsCommutable>,
12774                                      EVEX_V128, EVEX_CD8<32, CD8VF>;
12775  }
12776}
12777
12778let ExeDomain = SSEPackedSingle in
12779defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12780                                        SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12781                                        avx512vl_f32_info, avx512vl_i16_info,
12782                                        X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12783
12784// Truncate Float to BFloat16
12785multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12786                             X86SchedWriteWidths sched> {
12787  let ExeDomain = SSEPackedSingle in {
12788  let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12789    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12790                            X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12791  }
12792  let Predicates = [HasBF16, HasVLX] in {
12793    let Uses = []<Register>, mayRaiseFPException = 0 in {
12794    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12795                               null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12796                               VK4WM>, EVEX_V128;
12797    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12798                               X86cvtneps2bf16, X86cvtneps2bf16,
12799                               sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12800    }
12801  } // Predicates = [HasBF16, HasVLX]
12802  } // ExeDomain = SSEPackedSingle
12803
12804  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12805                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12806                  VR128X:$src), 0>;
12807  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12808                  (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12809                  f128mem:$src), 0, "intel">;
12810  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12811                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12812                  VR256X:$src), 0>;
12813  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12814                  (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12815                  f256mem:$src), 0, "intel">;
12816}
12817
12818defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12819                                       SchedWriteCvtPD2PS>, T8XS,
12820                                       EVEX_CD8<32, CD8VF>;
12821
12822let Predicates = [HasBF16, HasVLX] in {
12823  // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12824  // patterns have been disabled with null_frag.
12825  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12826            (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12827  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12828                              VK4WM:$mask),
12829            (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12830  def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12831                              VK4WM:$mask),
12832            (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12833
12834  def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12835            (VCVTNEPS2BF16Z128rm addr:$src)>;
12836  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12837                              VK4WM:$mask),
12838            (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12839  def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12840                              VK4WM:$mask),
12841            (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12842
12843  def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12844                                     (X86VBroadcastld32 addr:$src)))),
12845            (VCVTNEPS2BF16Z128rmb addr:$src)>;
12846  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12847                              (v8i16 VR128X:$src0), VK4WM:$mask),
12848            (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12849  def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12850                              v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12851            (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12852}
12853
12854let Constraints = "$src1 = $dst" in {
12855multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12856                              X86FoldableSchedWrite sched,
12857                              X86VectorVTInfo _, X86VectorVTInfo src_v> {
12858  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12859                           (ins src_v.RC:$src2, src_v.RC:$src3),
12860                           OpcodeStr, "$src3, $src2", "$src2, $src3",
12861                           (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12862                           EVEX_4V, Sched<[sched]>;
12863
12864  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12865                               (ins src_v.RC:$src2, src_v.MemOp:$src3),
12866                               OpcodeStr, "$src3, $src2", "$src2, $src3",
12867                               (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12868                               (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12869                               Sched<[sched.Folded, sched.ReadAfterFold]>;
12870
12871  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12872                  (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12873                  OpcodeStr,
12874                  !strconcat("${src3}", _.BroadcastStr,", $src2"),
12875                  !strconcat("$src2, ${src3}", _.BroadcastStr),
12876                  (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12877                  (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12878                  EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12879
12880}
12881} // Constraints = "$src1 = $dst"
12882
12883multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12884                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12885                                 AVX512VLVectorVTInfo src_v, Predicate prd> {
12886  let Predicates = [prd] in {
12887    defm Z    : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12888                                   src_v.info512>, EVEX_V512;
12889  }
12890  let Predicates = [HasVLX, prd] in {
12891    defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12892                                   src_v.info256>, EVEX_V256;
12893    defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12894                                   src_v.info128>, EVEX_V128;
12895  }
12896}
12897
12898let ExeDomain = SSEPackedSingle in
12899defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12900                                       avx512vl_f32_info, avx512vl_i32_info,
12901                                       HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
12902
12903//===----------------------------------------------------------------------===//
12904// AVX512FP16
12905//===----------------------------------------------------------------------===//
12906
12907let Predicates = [HasFP16] in {
12908// Move word ( r/m16) to Packed word
12909def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
12910                      "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
12911def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
12912                      "vmovw\t{$src, $dst|$dst, $src}",
12913                      [(set VR128X:$dst,
12914                        (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
12915                      T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
12916
12917def : Pat<(f16 (bitconvert GR16:$src)),
12918          (f16 (COPY_TO_REGCLASS
12919                (VMOVW2SHrr
12920                 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
12921                FR16X))>;
12922def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
12923          (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
12924def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
12925          (VMOVW2SHrr GR32:$src)>;
12926// FIXME: We should really find a way to improve these patterns.
12927def : Pat<(v8i32 (X86vzmovl
12928                  (insert_subvector undef,
12929                                    (v4i32 (scalar_to_vector
12930                                            (and GR32:$src, 0xffff))),
12931                                    (iPTR 0)))),
12932          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12933def : Pat<(v16i32 (X86vzmovl
12934                   (insert_subvector undef,
12935                                     (v4i32 (scalar_to_vector
12936                                             (and GR32:$src, 0xffff))),
12937                                     (iPTR 0)))),
12938          (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
12939
12940def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
12941          (VMOVW2SHrr GR32:$src)>;
12942
12943// AVX 128-bit movw instruction write zeros in the high 128-bit part.
12944def : Pat<(v8i16 (X86vzload16 addr:$src)),
12945          (VMOVWrm addr:$src)>;
12946def : Pat<(v16i16 (X86vzload16 addr:$src)),
12947          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12948
12949// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
12950def : Pat<(v32i16 (X86vzload16 addr:$src)),
12951          (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
12952
12953def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
12954          (VMOVWrm addr:$src)>;
12955def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
12956          (VMOVWrm addr:$src)>;
12957def : Pat<(v8i32 (X86vzmovl
12958                  (insert_subvector undef,
12959                                    (v4i32 (scalar_to_vector
12960                                            (i32 (zextloadi16 addr:$src)))),
12961                                    (iPTR 0)))),
12962          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12963def : Pat<(v16i32 (X86vzmovl
12964                   (insert_subvector undef,
12965                                     (v4i32 (scalar_to_vector
12966                                             (i32 (zextloadi16 addr:$src)))),
12967                                     (iPTR 0)))),
12968          (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
12969
12970// Move word from xmm register to r/m16
12971def VMOVSH2Wrr  : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
12972                       "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
12973def VMOVWmr  : AVX512<0x7E, MRMDestMem, (outs),
12974                       (ins i16mem:$dst, VR128X:$src),
12975                       "vmovw\t{$src, $dst|$dst, $src}",
12976                       [(store (i16 (extractelt (v8i16 VR128X:$src),
12977                                     (iPTR 0))), addr:$dst)]>,
12978                       T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
12979
12980def : Pat<(i16 (bitconvert FR16X:$src)),
12981          (i16 (EXTRACT_SUBREG
12982                (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
12983                sub_16bit))>;
12984def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
12985          (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
12986}
12987
12988// Allow "vmovw" to use GR64
12989let hasSideEffects = 0 in {
12990  def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
12991                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
12992  def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
12993                     "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
12994}
12995
12996// Convert 16-bit float to i16/u16
12997multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
12998                          SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
12999                          AVX512VLVectorVTInfo _Dst,
13000                          AVX512VLVectorVTInfo _Src,
13001                          X86SchedWriteWidths sched> {
13002  let Predicates = [HasFP16] in {
13003    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13004                            OpNode, MaskOpNode, sched.ZMM>,
13005             avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
13006                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13007  }
13008  let Predicates = [HasFP16, HasVLX] in {
13009    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13010                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13011    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13012                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13013  }
13014}
13015
13016// Convert 16-bit float to i16/u16 truncate
13017multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13018                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13019                           AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
13020                           X86SchedWriteWidths sched> {
13021  let Predicates = [HasFP16] in {
13022    defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
13023                            OpNode, MaskOpNode, sched.ZMM>,
13024             avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
13025                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13026  }
13027  let Predicates = [HasFP16, HasVLX] in {
13028    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
13029                               OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
13030    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
13031                               OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
13032  }
13033}
13034
13035defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
13036                                X86cvtp2UIntRnd, avx512vl_i16_info,
13037                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13038                                T_MAP5PS, EVEX_CD8<16, CD8VF>;
13039defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
13040                                X86VUintToFpRnd, avx512vl_f16_info,
13041                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13042                                T_MAP5XD, EVEX_CD8<16, CD8VF>;
13043defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
13044                                X86cvttp2si, X86cvttp2siSAE,
13045                                avx512vl_i16_info, avx512vl_f16_info,
13046                                SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
13047defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
13048                                X86cvttp2ui, X86cvttp2uiSAE,
13049                                avx512vl_i16_info, avx512vl_f16_info,
13050                                SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
13051defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
13052                                X86cvtp2IntRnd, avx512vl_i16_info,
13053                                avx512vl_f16_info, SchedWriteCvtPD2DQ>,
13054                                T_MAP5PD, EVEX_CD8<16, CD8VF>;
13055defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
13056                                X86VSintToFpRnd, avx512vl_f16_info,
13057                                avx512vl_i16_info, SchedWriteCvtPD2DQ>,
13058                                T_MAP5XS, EVEX_CD8<16, CD8VF>;
13059
13060// Convert Half to Signed/Unsigned Doubleword
13061multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13062                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13063                           X86SchedWriteWidths sched> {
13064  let Predicates = [HasFP16] in {
13065    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13066                            MaskOpNode, sched.ZMM>,
13067             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
13068                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13069  }
13070  let Predicates = [HasFP16, HasVLX] in {
13071    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13072                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13073    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13074                               MaskOpNode, sched.YMM>, EVEX_V256;
13075  }
13076}
13077
13078// Convert Half to Signed/Unsigned Doubleword with truncation
13079multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13080                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13081                            X86SchedWriteWidths sched> {
13082  let Predicates = [HasFP16] in {
13083    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
13084                            MaskOpNode, sched.ZMM>,
13085             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
13086                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13087  }
13088  let Predicates = [HasFP16, HasVLX] in {
13089    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
13090                               MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
13091    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
13092                               MaskOpNode, sched.YMM>, EVEX_V256;
13093  }
13094}
13095
13096
13097defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
13098                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13099                                 EVEX_CD8<16, CD8VH>;
13100defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
13101                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
13102                                 EVEX_CD8<16, CD8VH>;
13103
13104defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
13105                                X86cvttp2si, X86cvttp2siSAE,
13106                                SchedWriteCvtPS2DQ>, T_MAP5XS,
13107                                EVEX_CD8<16, CD8VH>;
13108
13109defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
13110                                 X86cvttp2ui, X86cvttp2uiSAE,
13111                                 SchedWriteCvtPS2DQ>, T_MAP5PS,
13112                                 EVEX_CD8<16, CD8VH>;
13113
13114// Convert Half to Signed/Unsigned Quardword
13115multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13116                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13117                           X86SchedWriteWidths sched> {
13118  let Predicates = [HasFP16] in {
13119    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13120                            MaskOpNode, sched.ZMM>,
13121             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
13122                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13123  }
13124  let Predicates = [HasFP16, HasVLX] in {
13125    // Explicitly specified broadcast string, since we take only 2 elements
13126    // from v8f16x_info source
13127    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13128                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
13129                               EVEX_V128;
13130    // Explicitly specified broadcast string, since we take only 4 elements
13131    // from v8f16x_info source
13132    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13133                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
13134                               EVEX_V256;
13135  }
13136}
13137
13138// Convert Half to Signed/Unsigned Quardword with truncation
13139multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13140                            SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13141                            X86SchedWriteWidths sched> {
13142  let Predicates = [HasFP16] in {
13143    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
13144                            MaskOpNode, sched.ZMM>,
13145             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
13146                                OpNodeRnd, sched.ZMM>, EVEX_V512;
13147  }
13148  let Predicates = [HasFP16, HasVLX] in {
13149    // Explicitly specified broadcast string, since we take only 2 elements
13150    // from v8f16x_info source
13151    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
13152                               MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
13153    // Explicitly specified broadcast string, since we take only 4 elements
13154    // from v8f16x_info source
13155    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
13156                               MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
13157  }
13158}
13159
13160defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
13161                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13162                                 EVEX_CD8<16, CD8VQ>;
13163
13164defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
13165                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
13166                                 EVEX_CD8<16, CD8VQ>;
13167
13168defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
13169                                 X86cvttp2si, X86cvttp2siSAE,
13170                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13171                                 EVEX_CD8<16, CD8VQ>;
13172
13173defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
13174                                 X86cvttp2ui, X86cvttp2uiSAE,
13175                                 SchedWriteCvtPS2DQ>, T_MAP5PD,
13176                                 EVEX_CD8<16, CD8VQ>;
13177
13178// Convert Signed/Unsigned Quardword to Half
13179multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
13180                           SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
13181                           X86SchedWriteWidths sched> {
13182  // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
13183  // 512 memory forms of these instructions in Asm Parcer. They have the same
13184  // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
13185  // due to the same reason.
13186  let Predicates = [HasFP16] in {
13187    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
13188                            MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
13189             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
13190                               OpNodeRnd, sched.ZMM>, EVEX_V512;
13191  }
13192  let Predicates = [HasFP16, HasVLX] in {
13193    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
13194                               null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
13195                               i128mem, VK2WM>,
13196                               EVEX_V128, NotEVEX2VEXConvertible;
13197    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
13198                               null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
13199                               i256mem, VK4WM>,
13200                               EVEX_V256, NotEVEX2VEXConvertible;
13201  }
13202
13203  def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
13204                  (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
13205                  VR128X:$src), 0, "att">;
13206  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
13207                  (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
13208                  VK2WM:$mask, VR128X:$src), 0, "att">;
13209  def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
13210                  (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
13211                  VK2WM:$mask, VR128X:$src), 0, "att">;
13212  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
13213                  (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
13214                  i64mem:$src), 0, "att">;
13215  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
13216                  "$dst {${mask}}, ${src}{1to2}}",
13217                  (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
13218                  VK2WM:$mask, i64mem:$src), 0, "att">;
13219  def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
13220                  "$dst {${mask}} {z}, ${src}{1to2}}",
13221                  (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
13222                  VK2WM:$mask, i64mem:$src), 0, "att">;
13223
13224  def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
13225                  (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
13226                  VR256X:$src), 0, "att">;
13227  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
13228                  "$dst {${mask}}, $src}",
13229                  (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
13230                  VK4WM:$mask, VR256X:$src), 0, "att">;
13231  def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
13232                  "$dst {${mask}} {z}, $src}",
13233                  (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
13234                  VK4WM:$mask, VR256X:$src), 0, "att">;
13235  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
13236                  (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
13237                  i64mem:$src), 0, "att">;
13238  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
13239                  "$dst {${mask}}, ${src}{1to4}}",
13240                  (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
13241                  VK4WM:$mask, i64mem:$src), 0, "att">;
13242  def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
13243                  "$dst {${mask}} {z}, ${src}{1to4}}",
13244                  (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
13245                  VK4WM:$mask, i64mem:$src), 0, "att">;
13246
13247  def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
13248                  (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
13249                  VR512:$src), 0, "att">;
13250  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
13251                  "$dst {${mask}}, $src}",
13252                  (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
13253                  VK8WM:$mask, VR512:$src), 0, "att">;
13254  def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
13255                  "$dst {${mask}} {z}, $src}",
13256                  (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
13257                  VK8WM:$mask, VR512:$src), 0, "att">;
13258  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
13259                  (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
13260                  i64mem:$src), 0, "att">;
13261  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
13262                  "$dst {${mask}}, ${src}{1to8}}",
13263                  (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
13264                  VK8WM:$mask, i64mem:$src), 0, "att">;
13265  def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
13266                  "$dst {${mask}} {z}, ${src}{1to8}}",
13267                  (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
13268                  VK8WM:$mask, i64mem:$src), 0, "att">;
13269}
13270
13271defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
13272                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
13273                            EVEX_CD8<64, CD8VF>;
13274
13275defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
13276                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
13277                            EVEX_CD8<64, CD8VF>;
13278
13279// Convert half to signed/unsigned int 32/64
13280defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
13281                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
13282                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13283defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
13284                                   X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
13285                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13286defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
13287                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
13288                                   T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13289defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
13290                                   X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
13291                                   T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13292
13293defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
13294                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13295                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13296defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
13297                        any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
13298                        "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13299defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
13300                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13301                        "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
13302defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
13303                        any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
13304                        "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
13305
13306let Predicates = [HasFP16] in {
13307  defm VCVTSI2SHZ  : avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
13308                                   v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
13309                                   T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13310  defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A,  X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
13311                                   v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
13312                                   T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13313  defm VCVTUSI2SHZ   : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
13314                                    v8f16x_info, i32mem, loadi32,
13315                                    "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
13316  defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B,  X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
13317                                    v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
13318                                    T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
13319  def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13320              (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13321
13322  def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
13323              (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
13324
13325
13326  def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
13327            (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13328  def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
13329            (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13330
13331  def : Pat<(f16 (any_sint_to_fp GR32:$src)),
13332            (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13333  def : Pat<(f16 (any_sint_to_fp GR64:$src)),
13334            (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13335
13336  def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
13337            (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13338  def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
13339            (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
13340
13341  def : Pat<(f16 (any_uint_to_fp GR32:$src)),
13342            (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
13343  def : Pat<(f16 (any_uint_to_fp GR64:$src)),
13344            (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
13345
13346  // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
13347  // which produce unnecessary vmovsh instructions
13348  def : Pat<(v8f16 (X86Movsh
13349                     (v8f16 VR128X:$dst),
13350                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
13351            (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13352
13353  def : Pat<(v8f16 (X86Movsh
13354                     (v8f16 VR128X:$dst),
13355                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
13356            (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13357
13358  def : Pat<(v8f16 (X86Movsh
13359                     (v8f16 VR128X:$dst),
13360                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
13361            (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13362
13363  def : Pat<(v8f16 (X86Movsh
13364                     (v8f16 VR128X:$dst),
13365                     (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
13366            (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13367
13368  def : Pat<(v8f16 (X86Movsh
13369                     (v8f16 VR128X:$dst),
13370                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
13371            (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
13372
13373  def : Pat<(v8f16 (X86Movsh
13374                     (v8f16 VR128X:$dst),
13375                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
13376            (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
13377
13378  def : Pat<(v8f16 (X86Movsh
13379                     (v8f16 VR128X:$dst),
13380                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
13381            (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
13382
13383  def : Pat<(v8f16 (X86Movsh
13384                     (v8f16 VR128X:$dst),
13385                     (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
13386            (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
13387} // Predicates = [HasFP16]
13388
13389let Predicates = [HasFP16, HasVLX] in {
13390  // Special patterns to allow use of X86VMSintToFP for masking. Instruction
13391  // patterns have been disabled with null_frag.
13392  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
13393            (VCVTQQ2PHZ256rr VR256X:$src)>;
13394  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13395                           VK4WM:$mask),
13396            (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13397  def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13398                           VK4WM:$mask),
13399            (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13400
13401  def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
13402            (VCVTQQ2PHZ256rm addr:$src)>;
13403  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13404                           VK4WM:$mask),
13405            (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13406  def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13407                           VK4WM:$mask),
13408            (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13409
13410  def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13411            (VCVTQQ2PHZ256rmb addr:$src)>;
13412  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13413                           (v8f16 VR128X:$src0), VK4WM:$mask),
13414            (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13415  def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13416                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13417            (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13418
13419  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
13420            (VCVTQQ2PHZ128rr VR128X:$src)>;
13421  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13422                           VK2WM:$mask),
13423            (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13424  def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13425                           VK2WM:$mask),
13426            (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13427
13428  def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
13429            (VCVTQQ2PHZ128rm addr:$src)>;
13430  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13431                           VK2WM:$mask),
13432            (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13433  def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13434                           VK2WM:$mask),
13435            (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13436
13437  def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13438            (VCVTQQ2PHZ128rmb addr:$src)>;
13439  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13440                           (v8f16 VR128X:$src0), VK2WM:$mask),
13441            (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13442  def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13443                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13444            (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13445
13446  // Special patterns to allow use of X86VMUintToFP for masking. Instruction
13447  // patterns have been disabled with null_frag.
13448  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
13449            (VCVTUQQ2PHZ256rr VR256X:$src)>;
13450  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
13451                           VK4WM:$mask),
13452            (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
13453  def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
13454                           VK4WM:$mask),
13455            (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
13456
13457  def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
13458            (VCVTUQQ2PHZ256rm addr:$src)>;
13459  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
13460                           VK4WM:$mask),
13461            (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13462  def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
13463                           VK4WM:$mask),
13464            (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
13465
13466  def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
13467            (VCVTUQQ2PHZ256rmb addr:$src)>;
13468  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13469                           (v8f16 VR128X:$src0), VK4WM:$mask),
13470            (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
13471  def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
13472                           v8f16x_info.ImmAllZerosV, VK4WM:$mask),
13473            (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
13474
13475  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
13476            (VCVTUQQ2PHZ128rr VR128X:$src)>;
13477  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
13478                           VK2WM:$mask),
13479            (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
13480  def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
13481                           VK2WM:$mask),
13482            (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
13483
13484  def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
13485            (VCVTUQQ2PHZ128rm addr:$src)>;
13486  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
13487                           VK2WM:$mask),
13488            (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13489  def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
13490                           VK2WM:$mask),
13491            (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
13492
13493  def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
13494            (VCVTUQQ2PHZ128rmb addr:$src)>;
13495  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13496                           (v8f16 VR128X:$src0), VK2WM:$mask),
13497            (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
13498  def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
13499                           v8f16x_info.ImmAllZerosV, VK2WM:$mask),
13500            (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
13501}
13502
13503let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13504  multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
13505    defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13506            (ins _.RC:$src2, _.RC:$src3),
13507            OpcodeStr, "$src3, $src2", "$src2, $src3",
13508            (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
13509
13510    defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13511            (ins _.RC:$src2, _.MemOp:$src3),
13512            OpcodeStr, "$src3, $src2", "$src2, $src3",
13513            (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
13514
13515    defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
13516            (ins _.RC:$src2, _.ScalarMemOp:$src3),
13517            OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
13518            (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
13519  }
13520} // Constraints = "@earlyclobber $dst, $src1 = $dst"
13521
13522multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
13523                                 X86VectorVTInfo _> {
13524  let Constraints = "@earlyclobber $dst, $src1 = $dst" in
13525  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
13526          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
13527          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
13528          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
13529          EVEX_4V, EVEX_B, EVEX_RC;
13530}
13531
13532
13533multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
13534  let Predicates = [HasFP16] in {
13535    defm Z    : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
13536                avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
13537                      EVEX_V512, Sched<[WriteFMAZ]>;
13538  }
13539  let Predicates = [HasVLX, HasFP16] in {
13540    defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
13541    defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
13542  }
13543}
13544
13545multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13546                                 SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
13547  let Predicates = [HasFP16] in {
13548    defm Z    : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
13549                                 WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
13550                avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
13551                                       "", "@earlyclobber $dst">, EVEX_V512;
13552  }
13553  let Predicates = [HasVLX, HasFP16] in {
13554    defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
13555                                 WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
13556    defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
13557                                 WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
13558  }
13559}
13560
13561
13562let Uses = [MXCSR] in {
13563  defm VFMADDCPH  : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
13564                                    T_MAP6XS, EVEX_CD8<32, CD8VF>;
13565  defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
13566                                    T_MAP6XD, EVEX_CD8<32, CD8VF>;
13567
13568  defm VFMULCPH  : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
13569                                         x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
13570  defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
13571                                         x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
13572}
13573
13574
13575multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
13576                                   bit IsCommutable> {
13577  let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
13578    defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13579                        (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
13580                        "$src3, $src2", "$src2, $src3",
13581                        (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
13582                        Sched<[WriteFMAX]>;
13583    defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
13584                        (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
13585                        "$src3, $src2", "$src2, $src3",
13586                        (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
13587                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13588    defm rb : AVX512_maskable_3src<opc,  MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
13589                        (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
13590                        "$rc, $src3, $src2", "$src2, $src3, $rc",
13591                        (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
13592                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13593  }
13594}
13595
13596multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
13597                                     SDNode OpNodeRnd, bit IsCommutable> {
13598  let Predicates = [HasFP16] in {
13599    defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13600                        (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
13601                        "$src2, $src1", "$src1, $src2",
13602                        (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
13603                        IsCommutable, IsCommutable, IsCommutable,
13604                        X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
13605    defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
13606                        (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
13607                        "$src2, $src1", "$src1, $src2",
13608                        (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
13609                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13610                        Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
13611    defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
13612                        (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
13613                        "$rc, $src2, $src1", "$src1, $src2, $rc",
13614                        (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
13615                        0, 0, 0, X86selects, "@earlyclobber $dst">,
13616                        EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
13617  }
13618}
13619
13620let Uses = [MXCSR] in {
13621  defm VFMADDCSHZ  : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
13622                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13623  defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
13624                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
13625
13626  defm VFMULCSHZ  : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
13627                                    T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13628  defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
13629                                    T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
13630}
13631